* [PATCH 10/40] i386: Emulate MMX mmx_andnot<mode>3 with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 06/40] i386: Emulate MMX smulv4hi3_highpart " H.J. Lu
` (39 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_andnot<mode>3 with SSE. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/mmx.md (mmx_andnot<mode>3): Also allow
TARGET_MMX_WITH_SSE. Add SSE support.
---
gcc/config/i386/mmx.md | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9e7798d4b47..2a9972e79d9 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1063,14 +1063,18 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "mmx_andnot<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(and:MMXMODEI
- (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX"
- "pandn\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv"))
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,x,Yv")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ pandn\t{%2, %0|%0, %2}
+ pandn\t{%2, %0|%0, %2}
+ vpandn\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_<code><mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 06/40] i386: Emulate MMX smulv4hi3_highpart with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
2019-02-14 12:31 ` [PATCH 10/40] i386: Emulate MMX mmx_andnot<mode>3 " H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 08/40] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 " H.J. Lu
` (38 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mulv4hi3 with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_smulv4hi3_highpart): Also allow
TARGET_MMX_WITH_SSE.
(*mmx_smulv4hi3_highpart): Also allow TARGET_MMX_WITH_SSE. Add
SSE support.
---
gcc/config/i386/mmx.md | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 8ec7632912b..58054b7e0c7 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -752,23 +752,28 @@
(sign_extend:V4SI
(match_operand:V4HI 2 "nonimmediate_operand")))
(const_int 16))))]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_smulv4hi3_highpart"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
(lshiftrt:V4SI
(mult:V4SI
(sign_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv"))
(sign_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv")))
(const_int 16))))]
- "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
- "pmulhw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxmul")
- (set_attr "mode" "DI")])
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "@
+ pmulhw\t{%2, %0|%0, %2}
+ pmulhw\t{%2, %0|%0, %2}
+ vpmulhw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxmul,ssemul,ssemul")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_umulv4hi3_highpart"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 08/40] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
2019-02-14 12:31 ` [PATCH 10/40] i386: Emulate MMX mmx_andnot<mode>3 " H.J. Lu
2019-02-14 12:31 ` [PATCH 06/40] i386: Emulate MMX smulv4hi3_highpart " H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 14:04 ` Uros Bizjak
2019-02-14 12:31 ` [PATCH 02/40] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 H.J. Lu
` (37 subsequent siblings)
40 siblings, 1 reply; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX ashr<mode>3/<shift_insn><mode>3 with SSE. Only SSE register
source operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_ashr<mode>3): Changed to define_expand.
Disallow TARGET_MMX_WITH_SSE.
(mmx_<shift_insn><mode>3): Likewise.
(ashr<mode>3): New.
(*ashr<mode>3): Likewise.
(<shift_insn><mode>3): Likewise.
(*<shift_insn><mode>3): Likewise.
---
gcc/config/i386/mmx.md | 68 ++++++++++++++++++++++++++++++++----------
1 file changed, 52 insertions(+), 16 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 23c10dffc38..4738d6b428e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -958,33 +958,69 @@
[(set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
-(define_insn "mmx_ashr<mode>3"
- [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
+(define_expand "mmx_ashr<mode>3"
+ [(set (match_operand:MMXMODE24 0 "register_operand")
(ashiftrt:MMXMODE24
- (match_operand:MMXMODE24 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yN")))]
- "TARGET_MMX"
- "psra<mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
+ (match_operand:MMXMODE24 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE")
+
+(define_expand "ashr<mode>3"
+ [(set (match_operand:MMXMODE24 0 "register_operand")
+ (ashiftrt:MMXMODE24
+ (match_operand:MMXMODE24 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_MMX_WITH_SSE")
+
+(define_insn "*ashr<mode>3"
+ [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
+ (ashiftrt:MMXMODE24
+ (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ psra<mmxvecsize>\t{%2, %0|%0, %2}
+ psra<mmxvecsize>\t{%2, %0|%0, %2}
+ vpsra<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sseishft,sseishft")
(set (attr "length_immediate")
(if_then_else (match_operand 2 "const_int_operand")
(const_string "1")
(const_string "0")))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_<shift_insn><mode>3"
- [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+(define_expand "mmx_<shift_insn><mode>3"
+ [(set (match_operand:MMXMODE248 0 "register_operand")
(any_lshift:MMXMODE248
- (match_operand:MMXMODE248 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yN")))]
- "TARGET_MMX"
- "p<vshift><mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
+ (match_operand:MMXMODE248 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE")
+
+(define_expand "<shift_insn><mode>3"
+ [(set (match_operand:MMXMODE248 0 "register_operand")
+ (any_lshift:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_MMX_WITH_SSE")
+
+(define_insn "*<shift_insn><mode>3"
+ [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv")
+ (any_lshift:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
+ p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
+ vp<vshift><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sseishft,sseishft")
(set (attr "length_immediate")
(if_then_else (match_operand 2 "const_int_operand")
(const_string "1")
(const_string "0")))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 08/40] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 with SSE
2019-02-14 12:31 ` [PATCH 08/40] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 " H.J. Lu
@ 2019-02-14 14:04 ` Uros Bizjak
0 siblings, 0 replies; 62+ messages in thread
From: Uros Bizjak @ 2019-02-14 14:04 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 1:30 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Emulate MMX ashr<mode>3/<shift_insn><mode>3 with SSE. Only SSE register
> source operand is allowed.
>
> PR target/89021
> * config/i386/mmx.md (mmx_ashr<mode>3): Changed to define_expand.
> Disallow TARGET_MMX_WITH_SSE.
> (mmx_<shift_insn><mode>3): Likewise.
> (ashr<mode>3): New.
> (*ashr<mode>3): Likewise.
> (<shift_insn><mode>3): Likewise.
> (*<shift_insn><mode>3): Likewise.
Please add "|| TARGET_MMX_WITH_SSE" with new constraints to
mmx_*<mode>3 insn instead and don't introduce unnecessary mmx_*
expander.
Uros.
> ---
> gcc/config/i386/mmx.md | 68 ++++++++++++++++++++++++++++++++----------
> 1 file changed, 52 insertions(+), 16 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 23c10dffc38..4738d6b428e 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -958,33 +958,69 @@
> [(set_attr "type" "mmxadd")
> (set_attr "mode" "DI")])
>
> -(define_insn "mmx_ashr<mode>3"
> - [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
> +(define_expand "mmx_ashr<mode>3"
> + [(set (match_operand:MMXMODE24 0 "register_operand")
> (ashiftrt:MMXMODE24
> - (match_operand:MMXMODE24 1 "register_operand" "0")
> - (match_operand:DI 2 "nonmemory_operand" "yN")))]
> - "TARGET_MMX"
> - "psra<mmxvecsize>\t{%2, %0|%0, %2}"
> - [(set_attr "type" "mmxshft")
> + (match_operand:MMXMODE24 1 "register_operand")
> + (match_operand:DI 2 "nonmemory_operand")))]
> + "TARGET_MMX || TARGET_MMX_WITH_SSE")
> +
> +(define_expand "ashr<mode>3"
> + [(set (match_operand:MMXMODE24 0 "register_operand")
> + (ashiftrt:MMXMODE24
> + (match_operand:MMXMODE24 1 "register_operand")
> + (match_operand:DI 2 "nonmemory_operand")))]
> + "TARGET_MMX_WITH_SSE")
> +
> +(define_insn "*ashr<mode>3"
> + [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
> + (ashiftrt:MMXMODE24
> + (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv")
> + (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
> + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> + "@
> + psra<mmxvecsize>\t{%2, %0|%0, %2}
> + psra<mmxvecsize>\t{%2, %0|%0, %2}
> + vpsra<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
> + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> + (set_attr "type" "mmxshft,sseishft,sseishft")
> (set (attr "length_immediate")
> (if_then_else (match_operand 2 "const_int_operand")
> (const_string "1")
> (const_string "0")))
> - (set_attr "mode" "DI")])
> + (set_attr "mode" "DI,TI,TI")])
>
> -(define_insn "mmx_<shift_insn><mode>3"
> - [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
> +(define_expand "mmx_<shift_insn><mode>3"
> + [(set (match_operand:MMXMODE248 0 "register_operand")
> (any_lshift:MMXMODE248
> - (match_operand:MMXMODE248 1 "register_operand" "0")
> - (match_operand:DI 2 "nonmemory_operand" "yN")))]
> - "TARGET_MMX"
> - "p<vshift><mmxvecsize>\t{%2, %0|%0, %2}"
> - [(set_attr "type" "mmxshft")
> + (match_operand:MMXMODE248 1 "register_operand")
> + (match_operand:DI 2 "nonmemory_operand")))]
> + "TARGET_MMX || TARGET_MMX_WITH_SSE")
> +
> +(define_expand "<shift_insn><mode>3"
> + [(set (match_operand:MMXMODE248 0 "register_operand")
> + (any_lshift:MMXMODE248
> + (match_operand:MMXMODE248 1 "register_operand")
> + (match_operand:DI 2 "nonmemory_operand")))]
> + "TARGET_MMX_WITH_SSE")
> +
> +(define_insn "*<shift_insn><mode>3"
> + [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv")
> + (any_lshift:MMXMODE248
> + (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv")
> + (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
> + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> + "@
> + p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
> + p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
> + vp<vshift><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
> + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> + (set_attr "type" "mmxshft,sseishft,sseishft")
> (set (attr "length_immediate")
> (if_then_else (match_operand 2 "const_int_operand")
> (const_string "1")
> (const_string "0")))
> - (set_attr "mode" "DI")])
> + (set_attr "mode" "DI,TI,TI")])
>
> ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> ;;
> --
> 2.20.1
>
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 02/40] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (2 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 08/40] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 " H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 20/40] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE H.J. Lu
` (36 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb
plus moving bits 64:95 to bits 32:63 in SSE register. Only SSE register
source operand is allowed.
2019-02-08 H.J. Lu <hongjiu.lu@intel.com>
Uros Bizjak <ubizjak@gmail.com>
PR target/89021
* config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
New prototype.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New
function.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.md (mmx_isa): New.
(enabled): Also check mmx_isa.
* config/i386/mmx.md (any_s_truncate): New code iterator.
(s_trunsuffix): New code attr.
(mmx_packsswb): Removed.
(mmx_packssdw): Likewise.
(mmx_packuswb): Likewise.
(mmx_pack<s_trunsuffix>swb): New define_insn_and_split to emulate
MMX packsswb/packuswb with SSE2.
(mmx_packssdw): Likewise.
---
gcc/config/i386/i386-protos.h | 3 ++
gcc/config/i386/i386.c | 54 ++++++++++++++++++++++++++++
gcc/config/i386/i386.md | 13 +++++++
gcc/config/i386/mmx.md | 67 +++++++++++++++++++----------------
4 files changed, 107 insertions(+), 30 deletions(-)
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 27f5cc13abf..a53b48438ec 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -202,6 +202,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx);
extern rtx ix86_split_stack_guard (void);
+extern void ix86_move_vector_high_sse_to_mmx (rtx);
+extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
#endif /* TREE_CODE */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 83d3117f46d..c6325224c9d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20226,6 +20226,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
gcc_unreachable ();
}
+/* Move bits 64:95 to bits 32:63. */
+
+void
+ix86_move_vector_high_sse_to_mmx (rtx op)
+{
+ rtx mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (0), GEN_INT (2),
+ GEN_INT (0), GEN_INT (0)));
+ rtx dest = lowpart_subreg (V4SImode, op, GET_MODE (op));
+ op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+ rtx insn = gen_rtx_SET (dest, op);
+ emit_insn (insn);
+}
+
+/* Split MMX pack with signed/unsigned saturation with SSE/SSE2. */
+
+void
+ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+
+ machine_mode dmode = GET_MODE (op0);
+ machine_mode smode = GET_MODE (op1);
+ machine_mode inner_dmode = GET_MODE_INNER (dmode);
+ machine_mode inner_smode = GET_MODE_INNER (smode);
+
+ /* Get the corresponding SSE mode for destination. */
+ int nunits = 16 / GET_MODE_SIZE (inner_dmode);
+ machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+ nunits).require ();
+ machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+ nunits / 2).require ();
+
+ /* Get the corresponding SSE mode for source. */
+ nunits = 16 / GET_MODE_SIZE (inner_smode);
+ machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
+ nunits).require ();
+
+ /* Generate SSE pack with signed/unsigned saturation. */
+ rtx dest = lowpart_subreg (sse_dmode, op0, GET_MODE (op0));
+ op1 = lowpart_subreg (sse_smode, op1, GET_MODE (op1));
+ op2 = lowpart_subreg (sse_smode, op2, GET_MODE (op2));
+
+ op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
+ op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
+ rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
+ op1, op2));
+ emit_insn (insn);
+
+ ix86_move_vector_high_sse_to_mmx (op0);
+}
+
/* Helper function of ix86_fixup_binary_operands to canonicalize
operand order. Returns true if the operands should be swapped. */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 40ed93dc804..e1727676deb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -792,6 +792,10 @@
avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
(const_string "base"))
+;; Define instruction set of MMX instructions
+(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
+ (const_string "base"))
+
(define_attr "enabled" ""
(cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
(eq_attr "isa" "x64_sse2")
@@ -830,6 +834,15 @@
(eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
(eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
(eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
+
+ (eq_attr "mmx_isa" "native")
+ (symbol_ref "!TARGET_MMX_WITH_SSE")
+ (eq_attr "mmx_isa" "x64")
+ (symbol_ref "TARGET_MMX_WITH_SSE")
+ (eq_attr "mmx_isa" "x64_avx")
+ (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
+ (eq_attr "mmx_isa" "x64_noavx")
+ (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
]
(const_int 1)))
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9c3808338d3..ca9cf20f8e3 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1021,41 +1021,48 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "mmx_packsswb"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+;; Used in signed and unsigned truncations with saturation.
+(define_code_iterator any_s_truncate [ss_truncate us_truncate])
+;; Instruction suffix for truncations with saturation.
+(define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
+
+(define_insn_and_split "mmx_pack<s_trunsuffix>swb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(vec_concat:V8QI
- (ss_truncate:V4QI
- (match_operand:V4HI 1 "register_operand" "0"))
- (ss_truncate:V4QI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
- "TARGET_MMX"
- "packsswb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
+ (any_s_truncate:V4QI
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv"))
+ (any_s_truncate:V4QI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv"))))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ pack<s_trunsuffix>swb\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_pack (operands, <any_s_truncate:CODE>); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_packssdw"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_packssdw"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_concat:V4HI
(ss_truncate:V2HI
- (match_operand:V2SI 1 "register_operand" "0"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv"))
(ss_truncate:V2HI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))]
- "TARGET_MMX"
- "packssdw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
-
-(define_insn "mmx_packuswb"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (vec_concat:V8QI
- (us_truncate:V4QI
- (match_operand:V4HI 1 "register_operand" "0"))
- (us_truncate:V4QI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
- "TARGET_MMX"
- "packuswb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym,x,Yv"))))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ packssdw\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_pack (operands, SS_TRUNCATE); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_punpckhbw"
[(set (match_operand:V8QI 0 "register_operand" "=y")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 20/40] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (3 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 02/40] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 17/40] i386: Emulate MMX mmx_pinsrw " H.J. Lu
` (35 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_umulv4hi3_highpart with SSE. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_umulv4hi3_highpart): Also check
TARGET_MMX and TARGET_MMX_WITH_SSE.
(*mmx_umulv4hi3_highpart): Add SSE emulation.
---
gcc/config/i386/mmx.md | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9ff0db9c2ed..1fdd09242af 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -785,24 +785,30 @@
(zero_extend:V4SI
(match_operand:V4HI 2 "nonimmediate_operand")))
(const_int 16))))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_umulv4hi3_highpart"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
(lshiftrt:V4SI
(mult:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv")))
(const_int 16))))]
- "(TARGET_SSE || TARGET_3DNOW_A)
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (MULT, V4HImode, operands)"
- "pmulhuw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxmul")
- (set_attr "mode" "DI")])
+ "@
+ pmulhuw\t{%2, %0|%0, %2}
+ pmulhuw\t{%2, %0|%0, %2}
+ vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxmul,ssemul,ssemul")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_pmaddwd"
[(set (match_operand:V2SI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 17/40] i386: Emulate MMX mmx_pinsrw with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (4 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 20/40] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 23/40] i386: Emulate MMX mmx_uavgv4hi3 " H.J. Lu
` (34 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_pinsrw with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_pinsrw): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(*mmx_pinsrw): Add SSE emulation.
---
gcc/config/i386/mmx.md | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 99208d4a4de..b9f7c89cd55 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1296,32 +1296,45 @@
(match_operand:SI 2 "nonimmediate_operand"))
(match_operand:V4HI 1 "register_operand")
(match_operand:SI 3 "const_0_to_3_operand")))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
{
operands[2] = gen_lowpart (HImode, operands[2]);
operands[3] = GEN_INT (1 << INTVAL (operands[3]));
})
(define_insn "*mmx_pinsrw"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_merge:V4HI
(vec_duplicate:V4HI
- (match_operand:HI 2 "nonimmediate_operand" "rm"))
- (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
(match_operand:SI 3 "const_int_operand")))]
- "(TARGET_SSE || TARGET_3DNOW_A)
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
< GET_MODE_NUNITS (V4HImode))"
{
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
- if (MEM_P (operands[2]))
- return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+ if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+ {
+ if (MEM_P (operands[2]))
+ return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ else
+ return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+ }
else
- return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+ {
+ if (MEM_P (operands[2]))
+ return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+ else
+ return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+ }
}
- [(set_attr "type" "mmxcvt")
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "length_immediate" "1")
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_pextrw"
[(set (match_operand:SI 0 "register_operand" "=r,r")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 23/40] i386: Emulate MMX mmx_uavgv4hi3 with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (5 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 17/40] i386: Emulate MMX mmx_pinsrw " H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 35/40] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE H.J. Lu
` (33 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_uavgv4hi3 with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_uavgv4hi3): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(*mmx_uavgv4hi3): Add SSE emulation.
---
gcc/config/i386/mmx.md | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b0009afc35d..e1432edcd3d 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1753,27 +1753,33 @@
(const_vector:V4SI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
"ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
(define_insn "*mmx_uavgv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
(lshiftrt:V4SI
(plus:V4SI
(plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv")))
(const_vector:V4SI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
- "(TARGET_SSE || TARGET_3DNOW_A)
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V4HImode, operands)"
- "pavgw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
+ "@
+ pavgw\t{%2, %0|%0, %2}
+ pavgw\t{%2, %0|%0, %2}
+ vpavgw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sseiadd,sseiadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_psadbw"
[(set (match_operand:V1DI 0 "register_operand" "=y")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 35/40] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (6 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 23/40] i386: Emulate MMX mmx_uavgv4hi3 " H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 14/40] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE H.J. Lu
` (32 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
PR target/89021
* config/i386/mmx.md (MMXMODE:mov<mode>): Also allow
TARGET_MMX_WITH_SSE.
(MMXMODE:*mov<mode>_internal): Likewise.
(MMXMODE:movmisalign<mode>): Likewise.
---
gcc/config/i386/mmx.md | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index a618a620eb1..81ee6250051 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -70,7 +70,7 @@
(define_expand "mov<mode>"
[(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_move (<MODE>mode, operands);
DONE;
@@ -81,7 +81,7 @@
"=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x")
(match_operand:MMXMODE 1 "nonimm_or_0_operand"
"rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))]
- "TARGET_MMX
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -207,7 +207,7 @@
(define_expand "movmisalign<mode>"
[(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_move (<MODE>mode, operands);
DONE;
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 14/40] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (7 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 35/40] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 03/40] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX H.J. Lu
` (31 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE.
PR target/89021
* config/i386/mmx.md (sse_cvtps2pi): Add SSE emulation.
(sse_cvttps2pi): Likewise.
---
gcc/config/i386/sse.md | 30 ++++++++++++++++++------------
1 file changed, 18 insertions(+), 12 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c8e0133560a..083f9ef0f44 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4574,26 +4574,32 @@
(set_attr "mode" "V4SF")])
(define_insn "sse_cvtps2pi"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+ [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
- (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")]
UNSPEC_FIX_NOTRUNC)
(parallel [(const_int 0) (const_int 1)])))]
- "TARGET_SSE"
- "cvtps2pi\t{%1, %0|%0, %q1}"
- [(set_attr "type" "ssecvt")
- (set_attr "unit" "mmx")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
+ "@
+ cvtps2pi\t{%1, %0|%0, %q1}
+ %vcvtps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx,*")
(set_attr "mode" "DI")])
(define_insn "sse_cvttps2pi"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+ [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
- (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm"))
(parallel [(const_int 0) (const_int 1)])))]
- "TARGET_SSE"
- "cvttps2pi\t{%1, %0|%0, %q1}"
- [(set_attr "type" "ssecvt")
- (set_attr "unit" "mmx")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
+ "@
+ cvttps2pi\t{%1, %0|%0, %q1}
+ %vcvttps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx,*")
(set_attr "prefix_rep" "0")
(set_attr "mode" "SF")])
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 03/40] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (8 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 14/40] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 16/40] i386: Emulate MMX mmx_pextrw with SSE H.J. Lu
` (30 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX. For MMX punpckhXX,
move bits 64:127 to bits 0:63 in SSE register. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
prototype.
* config/i386/i386.c (ix86_split_mmx_punpck): New function.
* config/i386/mmx.m (mmx_punpckhbw): Changed to
define_insn_and_split to support SSE emulation.
(mmx_punpcklbw): Likewise.
(mmx_punpckhwd): Likewise.
(mmx_punpcklwd): Likewise.
(mmx_punpckhdq): Likewise.
(mmx_punpckldq): Likewise.
---
gcc/config/i386/i386-protos.h | 1 +
gcc/config/i386/i386.c | 77 +++++++++++++++++++
gcc/config/i386/mmx.md | 138 ++++++++++++++++++++++------------
3 files changed, 168 insertions(+), 48 deletions(-)
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index a53b48438ec..37581837a32 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -204,6 +204,7 @@ extern rtx ix86_split_stack_guard (void);
extern void ix86_move_vector_high_sse_to_mmx (rtx);
extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+extern void ix86_split_mmx_punpck (rtx[], bool);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c6325224c9d..dce4038685e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20280,6 +20280,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
ix86_move_vector_high_sse_to_mmx (op0);
}
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX. */
+
+void
+ix86_split_mmx_punpck (rtx operands[], bool high_p)
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ machine_mode mode = GET_MODE (op0);
+ rtx mask;
+ /* The corresponding SSE mode. */
+ machine_mode sse_mode, double_sse_mode;
+
+ switch (mode)
+ {
+ case E_V8QImode:
+ sse_mode = V16QImode;
+ double_sse_mode = V32QImode;
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (16,
+ GEN_INT (0), GEN_INT (16),
+ GEN_INT (1), GEN_INT (17),
+ GEN_INT (2), GEN_INT (18),
+ GEN_INT (3), GEN_INT (19),
+ GEN_INT (4), GEN_INT (20),
+ GEN_INT (5), GEN_INT (21),
+ GEN_INT (6), GEN_INT (22),
+ GEN_INT (7), GEN_INT (23)));
+ break;
+
+ case E_V4HImode:
+ sse_mode = V8HImode;
+ double_sse_mode = V16HImode;
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (8,
+ GEN_INT (0), GEN_INT (8),
+ GEN_INT (1), GEN_INT (9),
+ GEN_INT (2), GEN_INT (10),
+ GEN_INT (3), GEN_INT (11)));
+ break;
+
+ case E_V2SImode:
+ sse_mode = V4SImode;
+ double_sse_mode = V8SImode;
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4,
+ GEN_INT (0), GEN_INT (4),
+ GEN_INT (1), GEN_INT (5)));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Generate SSE punpcklXX. */
+ rtx dest = lowpart_subreg (sse_mode, op0, GET_MODE (op0));
+ op1 = lowpart_subreg (sse_mode, op1, GET_MODE (op1));
+ op2 = lowpart_subreg (sse_mode, op2, GET_MODE (op2));
+
+ op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
+ op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
+ rtx insn = gen_rtx_SET (dest, op2);
+ emit_insn (insn);
+
+ if (high_p)
+ {
+ /* Move bits 64:127 to bits 0:63. */
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (0)));
+ dest = lowpart_subreg (V4SImode, dest, GET_MODE (dest));
+ op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+ insn = gen_rtx_SET (dest, op1);
+ emit_insn (insn);
+ }
+}
+
/* Helper function of ix86_fixup_binary_operands to canonicalize
operand order. Returns true if the operands should be swapped. */
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ca9cf20f8e3..8ae24439e8d 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1064,87 +1064,129 @@
(set_attr "type" "mmxshft,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckhbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(vec_select:V8QI
(vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,x,Yv"))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
- "TARGET_MMX"
- "punpckhbw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckhbw\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, true); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpcklbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(vec_select:V8QI
(vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,x,Yv"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
(const_int 3) (const_int 11)])))]
- "TARGET_MMX"
- "punpcklbw\t{%2, %0|%0, %k2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpcklbw\t{%2, %0|%0, %k2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, false); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckhwd"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_select:V4HI
(vec_concat:V8HI
- (match_operand:V4HI 1 "register_operand" "0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv"))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
- "TARGET_MMX"
- "punpckhwd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckhwd\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, true); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpcklwd"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_select:V4HI
(vec_concat:V8HI
- (match_operand:V4HI 1 "register_operand" "0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv"))
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
- "TARGET_MMX"
- "punpcklwd\t{%2, %0|%0, %k2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpcklwd\t{%2, %0|%0, %k2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, false); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckhdq"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhdq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_select:V2SI
(vec_concat:V4SI
- (match_operand:V2SI 1 "register_operand" "0")
- (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym,x,Yv"))
(parallel [(const_int 1)
(const_int 3)])))]
- "TARGET_MMX"
- "punpckhdq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckhdq\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, true); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckldq"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckldq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_select:V2SI
(vec_concat:V4SI
- (match_operand:V2SI 1 "register_operand" "0")
- (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym,x,Yv"))
(parallel [(const_int 0)
(const_int 2)])))]
- "TARGET_MMX"
- "punpckldq\t{%2, %0|%0, %k2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckldq\t{%2, %0|%0, %k2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, false); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_pinsrw"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 16/40] i386: Emulate MMX mmx_pextrw with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (9 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 03/40] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 12/40] i386: Emulate MMX vec_dupv2si " H.J. Lu
` (29 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_pextrw with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_pextrw): Add SSE emulation.
---
gcc/config/i386/mmx.md | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 497af2d74b7..99208d4a4de 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1324,16 +1324,18 @@
(set_attr "mode" "DI")])
(define_insn "mmx_pextrw"
- [(set (match_operand:SI 0 "register_operand" "=r")
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
(zero_extend:SI
(vec_select:HI
- (match_operand:V4HI 1 "register_operand" "y")
- (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
- "TARGET_SSE || TARGET_3DNOW_A"
- "pextrw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "mmxcvt")
+ (match_operand:V4HI 1 "register_operand" "y,Yv")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]))))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "mmxcvt,sselog1")
(set_attr "length_immediate" "1")
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI")])
(define_expand "mmx_pshufw"
[(match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 12/40] i386: Emulate MMX vec_dupv2si with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (10 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 16/40] i386: Emulate MMX mmx_pextrw with SSE H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 31/40] i386: Emulate MMX pshufb with SSE version H.J. Lu
` (28 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX vec_dupv2si with SSE. Add the "Yw" constraint to allow
broadcast from integer register for AVX512BW with TARGET_AVX512VL.
Only SSE register source operand is allowed.
PR target/89021
* config/i386/constraints.md (Yw): New constraint.
* config/i386/mmx.md (*vec_dupv2si): Changed to
define_insn_and_split and also allow TARGET_MMX_WITH_SSE to
support SSE emulation.
---
gcc/config/i386/constraints.md | 6 ++++++
gcc/config/i386/mmx.md | 24 +++++++++++++++++-------
2 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 16075b4acf3..c546b20d9dc 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -110,6 +110,8 @@
;; v any EVEX encodable SSE register for AVX512VL target,
;; otherwise any SSE register
;; h EVEX encodable SSE register with number factor of four
+;; w any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
+;; target.
(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
"First SSE register (@code{%xmm0}).")
@@ -146,6 +148,10 @@
"TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
"@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.")
+(define_register_constraint "Yw"
+ "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target.")
+
;; We use the B prefix to denote any number of internal operands:
;; f FLAGS_REG
;; g GOT memory operand.
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 132ce7af802..441a08d22b7 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1395,14 +1395,24 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "DI")])
-(define_insn "*vec_dupv2si"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2si"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw")
(vec_duplicate:V2SI
- (match_operand:SI 1 "register_operand" "0")))]
- "TARGET_MMX"
- "punpckldq\t%0, %0"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ (match_operand:SI 1 "register_operand" "0,0,Yv,r")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckldq\t%0, %0
+ #
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(set (match_dup 0)
+ (vec_duplicate:V4SI (match_dup 1)))]
+ "operands[0] = lowpart_subreg (V4SImode, operands[0],
+ GET_MODE (operands[0]));"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx,x64_avx")
+ (set_attr "type" "mmxcvt,ssemov,ssemov,ssemov")
+ (set_attr "mode" "DI,TI,TI,TI")])
(define_insn "*mmx_concatv2si"
[(set (match_operand:V2SI 0 "register_operand" "=y,y")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 31/40] i386: Emulate MMX pshufb with SSE version
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (11 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 12/40] i386: Emulate MMX vec_dupv2si " H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 14:21 ` Uros Bizjak
2019-02-14 12:31 ` [PATCH 04/40] i386: Emulate MMX plusminus/sat_plusminus with SSE H.J. Lu
` (27 subsequent siblings)
40 siblings, 1 reply; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX version of pshufb with SSE version by masking out the bit 3
of the shuffle control byte. Only SSE register source operand is allowed.
PR target/89021
* config/i386/sse.md (ssse3_pshufbv8qi3): Renamed to ...
(ssse3_pshufbv8qi3_mmx): This.
(ssse3_pshufbv8qi3): New.
(ssse3_pshufbv8qi3_sse): Likewise.
---
gcc/config/i386/sse.md | 56 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 54 insertions(+), 2 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index cc7dbe79fa7..a92505c54a1 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15722,18 +15722,70 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "ssse3_pshufbv8qi3"
+(define_expand "ssse3_pshufbv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand")
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand")
+ (match_operand:V8QI 2 "nonimmediate_operand")]
+ UNSPEC_PSHUFB))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+{
+ if (TARGET_MMX_WITH_SSE)
+ {
+ rtx op2 = force_reg (V8QImode, operands[2]);
+ emit_insn (gen_ssse3_pshufbv8qi3_sse (operands[0], operands[1],
+ op2));
+ DONE;
+ }
+})
+
+(define_insn "ssse3_pshufbv8qi3_mmx"
[(set (match_operand:V8QI 0 "register_operand" "=y")
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")]
UNSPEC_PSHUFB))]
- "TARGET_SSSE3"
+ "TARGET_SSSE3 && !TARGET_MMX_WITH_SSE"
"pshufb\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
+(define_insn_and_split "ssse3_pshufbv8qi3_sse"
+ [(set (match_operand:V8QI 0 "register_operand" "=x,Yv")
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,Yv")
+ (match_operand:V8QI 2 "register_operand" "x,Yv")]
+ UNSPEC_PSHUFB))
+ (clobber (match_scratch:V4SI 3 "=x,Yv"))]
+ "TARGET_SSSE3 && TARGET_MMX_WITH_SSE"
+ "#"
+ "reload_completed"
+ [(set (match_dup 3) (match_dup 5))
+ (set (match_dup 3)
+ (and:V4SI (match_dup 3) (match_dup 2)))
+ (set (match_dup 0)
+ (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
+{
+ /* Emulate MMX version of pshufb with SSE version by masking out the
+ bit 3 of the shuffle control byte. */
+ operands[0] = lowpart_subreg (V16QImode, operands[0],
+ GET_MODE (operands[0]));
+ operands[1] = lowpart_subreg (V16QImode, operands[1],
+ GET_MODE (operands[1]));
+ operands[2] = lowpart_subreg (V4SImode, operands[2],
+ GET_MODE (operands[2]));
+ operands[4] = lowpart_subreg (V16QImode, operands[3],
+ GET_MODE (operands[3]));
+ rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
+ GEN_INT (0xf7f7f7f7),
+ GEN_INT (0xf7f7f7f7),
+ GEN_INT (0xf7f7f7f7));
+ rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
+ operands[5] = force_const_mem (V4SImode, vec_const);
+}
+ [(set_attr "mmx_isa" "x64_noavx,x64_avx")
+ (set_attr "type" "sselog1")
+ (set_attr "mode" "TI,TI")])
+
(define_insn "<ssse3_avx2>_psign<mode>3"
[(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
(unspec:VI124_AVX2
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 31/40] i386: Emulate MMX pshufb with SSE version
2019-02-14 12:31 ` [PATCH 31/40] i386: Emulate MMX pshufb with SSE version H.J. Lu
@ 2019-02-14 14:21 ` Uros Bizjak
0 siblings, 0 replies; 62+ messages in thread
From: Uros Bizjak @ 2019-02-14 14:21 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 1:30 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Emulate MMX version of pshufb with SSE version by masking out the bit 3
> of the shuffle control byte. Only SSE register source operand is allowed.
>
> PR target/89021
> * config/i386/sse.md (ssse3_pshufbv8qi3): Renamed to ...
> (ssse3_pshufbv8qi3_mmx): This.
> (ssse3_pshufbv8qi3): New.
> (ssse3_pshufbv8qi3_sse): Likewise.
These insns can also be merged together using
(clobber (match_scratch:V4SI 3 "=X,x,Yv"))
Uros.
> ---
> gcc/config/i386/sse.md | 56 ++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 54 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index cc7dbe79fa7..a92505c54a1 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -15722,18 +15722,70 @@
> (set_attr "btver2_decode" "vector")
> (set_attr "mode" "<sseinsnmode>")])
>
> -(define_insn "ssse3_pshufbv8qi3"
> +(define_expand "ssse3_pshufbv8qi3"
> + [(set (match_operand:V8QI 0 "register_operand")
> + (unspec:V8QI [(match_operand:V8QI 1 "register_operand")
> + (match_operand:V8QI 2 "nonimmediate_operand")]
> + UNSPEC_PSHUFB))]
> + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
> +{
> + if (TARGET_MMX_WITH_SSE)
> + {
> + rtx op2 = force_reg (V8QImode, operands[2]);
> + emit_insn (gen_ssse3_pshufbv8qi3_sse (operands[0], operands[1],
> + op2));
> + DONE;
> + }
> +})
> +
> +(define_insn "ssse3_pshufbv8qi3_mmx"
> [(set (match_operand:V8QI 0 "register_operand" "=y")
> (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
> (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
> UNSPEC_PSHUFB))]
> - "TARGET_SSSE3"
> + "TARGET_SSSE3 && !TARGET_MMX_WITH_SSE"
> "pshufb\t{%2, %0|%0, %2}";
> [(set_attr "type" "sselog1")
> (set_attr "prefix_extra" "1")
> (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
> (set_attr "mode" "DI")])
>
> +(define_insn_and_split "ssse3_pshufbv8qi3_sse"
> + [(set (match_operand:V8QI 0 "register_operand" "=x,Yv")
> + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,Yv")
> + (match_operand:V8QI 2 "register_operand" "x,Yv")]
> + UNSPEC_PSHUFB))
> + (clobber (match_scratch:V4SI 3 "=x,Yv"))]
> + "TARGET_SSSE3 && TARGET_MMX_WITH_SSE"
> + "#"
> + "reload_completed"
> + [(set (match_dup 3) (match_dup 5))
> + (set (match_dup 3)
> + (and:V4SI (match_dup 3) (match_dup 2)))
> + (set (match_dup 0)
> + (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
> +{
> + /* Emulate MMX version of pshufb with SSE version by masking out the
> + bit 3 of the shuffle control byte. */
> + operands[0] = lowpart_subreg (V16QImode, operands[0],
> + GET_MODE (operands[0]));
> + operands[1] = lowpart_subreg (V16QImode, operands[1],
> + GET_MODE (operands[1]));
> + operands[2] = lowpart_subreg (V4SImode, operands[2],
> + GET_MODE (operands[2]));
> + operands[4] = lowpart_subreg (V16QImode, operands[3],
> + GET_MODE (operands[3]));
> + rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
> + GEN_INT (0xf7f7f7f7),
> + GEN_INT (0xf7f7f7f7),
> + GEN_INT (0xf7f7f7f7));
> + rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
> + operands[5] = force_const_mem (V4SImode, vec_const);
> +}
> + [(set_attr "mmx_isa" "x64_noavx,x64_avx")
> + (set_attr "type" "sselog1")
> + (set_attr "mode" "TI,TI")])
> +
> (define_insn "<ssse3_avx2>_psign<mode>3"
> [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
> (unspec:VI124_AVX2
> --
> 2.20.1
>
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 04/40] i386: Emulate MMX plusminus/sat_plusminus with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (12 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 31/40] i386: Emulate MMX pshufb with SSE version H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 13/40] i386: Emulate MMX pshufw " H.J. Lu
` (26 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX plusminus/sat_plusminus with SSE. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/mmx.md (MMXMODEI8): Require TARGET_SSE2 for V1DI.
(plusminus:mmx_<plusminus_insn><mode>3): Check
TARGET_MMX_WITH_SSE.
(sat_plusminus:mmx_<plusminus_insn><mode>3): Likewise.
(<plusminus_insn><mode>3): New.
(*mmx_<plusminus_insn><mode>3): Add SSE emulation.
(*mmx_<plusminus_insn><mode>3): Likewise.
---
gcc/config/i386/mmx.md | 51 ++++++++++++++++++++++++++++--------------
1 file changed, 34 insertions(+), 17 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 8ae24439e8d..b6277789091 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -45,7 +45,7 @@
;; 8 byte integral modes handled by MMX (and by extension, SSE)
(define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
-(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
;; All 8-byte vector modes handled by MMX
(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
@@ -665,37 +665,54 @@
(plusminus:MMXMODEI8
(match_operand:MMXMODEI8 1 "nonimmediate_operand")
(match_operand:MMXMODEI8 2 "nonimmediate_operand")))]
- "TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_expand "<plusminus_insn><mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand")
+ (plusminus:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
+ "TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*mmx_<plusminus_insn><mode>3"
- [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
(plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0")
- (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
- "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
+ (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,Yv")
+ (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
+ "@
+ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+ vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_<plusminus_insn><mode>3"
[(set (match_operand:MMXMODE12 0 "register_operand")
(sat_plusminus:MMXMODE12
(match_operand:MMXMODE12 1 "nonimmediate_operand")
(match_operand:MMXMODE12 2 "nonimmediate_operand")))]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*mmx_<plusminus_insn><mode>3"
- [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv")
(sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "nonimmediate_operand" "<comm>0")
- (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
+ (match_operand:MMXMODE12 1 "nonimmediate_operand" "<comm>0,0,Yv")
+ (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "@
+ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+ vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_mulv4hi3"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 13/40] i386: Emulate MMX pshufw with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (13 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 04/40] i386: Emulate MMX plusminus/sat_plusminus with SSE H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 07/40] i386: Emulate MMX mmx_pmaddwd " H.J. Lu
` (25 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX pshufw with SSE. Only SSE register source operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_pshufw): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(mmx_pshufw_1): Add SSE emulation.
(*vec_dupv4hi): Changed to define_insn_and_split and also allow
TARGET_MMX_WITH_SSE to support SSE emulation.
---
gcc/config/i386/mmx.md | 79 ++++++++++++++++++++++++++++++++++--------
1 file changed, 64 insertions(+), 15 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 441a08d22b7..497af2d74b7 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1339,7 +1339,8 @@
[(match_operand:V4HI 0 "register_operand")
(match_operand:V4HI 1 "nonimmediate_operand")
(match_operand:SI 2 "const_int_operand")]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
{
int mask = INTVAL (operands[2]);
emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
@@ -1351,14 +1352,15 @@
})
(define_insn "mmx_pshufw_1"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,Yv")
(vec_select:V4HI
- (match_operand:V4HI 1 "nonimmediate_operand" "ym")
+ (match_operand:V4HI 1 "nonimmediate_operand" "ym,Yv")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
(match_operand 5 "const_0_to_3_operand")])))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
{
int mask = 0;
mask |= INTVAL (operands[2]) << 0;
@@ -1367,11 +1369,20 @@
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+ switch (which_alternative)
+ {
+ case 0:
+ return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+ case 1:
+ return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+ default:
+ gcc_unreachable ();
+ }
}
- [(set_attr "type" "mmxcvt")
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "mmxcvt,sselog")
(set_attr "length_immediate" "1")
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI")])
(define_insn "mmx_pswapdv2si2"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -1384,16 +1395,54 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
-(define_insn "*vec_dupv4hi"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv4hi"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,Yv,Yw")
(vec_duplicate:V4HI
(truncate:HI
- (match_operand:SI 1 "register_operand" "0"))))]
- "TARGET_SSE || TARGET_3DNOW_A"
- "pshufw\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "type" "mmxcvt")
- (set_attr "length_immediate" "1")
- (set_attr "mode" "DI")])
+ (match_operand:SI 1 "register_operand" "0,Yv,r"))))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "@
+ pshufw\t{$0, %0, %0|%0, %0, 0}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+{
+ rtx op;
+ operands[0] = lowpart_subreg (V8HImode, operands[0],
+ GET_MODE (operands[0]));
+ if (TARGET_AVX2)
+ {
+ operands[1] = lowpart_subreg (HImode, operands[1],
+ GET_MODE (operands[1]));
+ op = gen_rtx_VEC_DUPLICATE (V8HImode, operands[1]);
+ }
+ else
+ {
+ operands[1] = lowpart_subreg (V8HImode, operands[1],
+ GET_MODE (operands[1]));
+ rtx mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (8,
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (4),
+ GEN_INT (5),
+ GEN_INT (6),
+ GEN_INT (7)));
+
+ op = gen_rtx_VEC_SELECT (V8HImode, operands[1], mask);
+ }
+ rtx insn = gen_rtx_SET (operands[0], op);
+ emit_insn (insn);
+ DONE;
+}
+ [(set_attr "mmx_isa" "native,x64,x64_avx")
+ (set_attr "type" "mmxcvt,sselog1,ssemov")
+ (set_attr "length_immediate" "1,1,0")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "*vec_dupv2si"
[(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 07/40] i386: Emulate MMX mmx_pmaddwd with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (14 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 13/40] i386: Emulate MMX pshufw " H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 05/40] i386: Emulate MMX mulv4hi3 " H.J. Lu
` (24 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX pmaddwd with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE.
(*mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE. Add SSE support.
---
gcc/config/i386/mmx.md | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 58054b7e0c7..23c10dffc38 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -823,20 +823,20 @@
(sign_extend:V2SI
(vec_select:V2HI (match_dup 2)
(parallel [(const_int 1) (const_int 3)]))))))]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_pmaddwd"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(plus:V2SI
(mult:V2SI
(sign_extend:V2SI
(vec_select:V2HI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
(vec_select:V2HI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv")
(parallel [(const_int 0) (const_int 2)]))))
(mult:V2SI
(sign_extend:V2SI
@@ -845,10 +845,15 @@
(sign_extend:V2SI
(vec_select:V2HI (match_dup 2)
(parallel [(const_int 1) (const_int 3)]))))))]
- "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
- "pmaddwd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxmul")
- (set_attr "mode" "DI")])
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "@
+ pmaddwd\t{%2, %0|%0, %2}
+ pmaddwd\t{%2, %0|%0, %2}
+ vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxmul,sseiadd,sseiadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_pmulhrwv4hi3"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 05/40] i386: Emulate MMX mulv4hi3 with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (15 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 07/40] i386: Emulate MMX mmx_pmaddwd " H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 12:31 ` [PATCH 40/40] i386: Also enable SSSE3 __m64 tests in 64-bit mode H.J. Lu
` (23 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mulv4hi3 with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mulv4hi3): New.
(*mmx_mulv4hi3): Also allow TARGET_MMX_WITH_SSE. Add SSE
support.
---
gcc/config/i386/mmx.md | 26 +++++++++++++++++++-------
1 file changed, 19 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b6277789091..8ec7632912b 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -721,14 +721,26 @@
"TARGET_MMX"
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+(define_expand "mulv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand")
+ (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand")
+ (match_operand:V4HI 2 "nonimmediate_operand")))]
+ "TARGET_MMX_WITH_SSE"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
(define_insn "*mmx_mulv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
- "pmullw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxmul")
- (set_attr "mode" "DI")])
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "@
+ pmullw\t{%2, %0|%0, %2}
+ pmullw\t{%2, %0|%0, %2}
+ vpmullw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxmul,ssemul,ssemul")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_smulv4hi3_highpart"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 40/40] i386: Also enable SSSE3 __m64 tests in 64-bit mode
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (16 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 05/40] i386: Emulate MMX mulv4hi3 " H.J. Lu
@ 2019-02-14 12:31 ` H.J. Lu
2019-02-14 20:21 ` Uros Bizjak
2019-02-14 12:33 ` [PATCH 21/40] i386: Emulate MMX maskmovq with SSE2 maskmovdqu H.J. Lu
` (22 subsequent siblings)
40 siblings, 1 reply; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:31 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Since we now emulate MMX intrinsics with SSE in 64-bit mode, we can
enable SSSE3 __m64 tests even when AVX is enabled.
PR target/89021
* gcc.target/i386/ssse3-pabsb.c: Also enable __m64 check in
64-bit mode.
* gcc.target/i386/ssse3-pabsd.c: Likewise.
* gcc.target/i386/ssse3-pabsw.c: Likewise.
* gcc.target/i386/ssse3-palignr.c: Likewise.
* gcc.target/i386/ssse3-phaddd.c: Likewise.
* gcc.target/i386/ssse3-phaddsw.c: Likewise.
* gcc.target/i386/ssse3-phaddw.c: Likewise.
* gcc.target/i386/ssse3-phsubd.c: Likewise.
* gcc.target/i386/ssse3-phsubsw.c: Likewise.
* gcc.target/i386/ssse3-phsubw.c: Likewise.
* gcc.target/i386/ssse3-pmaddubsw.c: Likewise.
* gcc.target/i386/ssse3-pmulhrsw.c: Likewise.
* gcc.target/i386/ssse3-pshufb.c: Likewise.
* gcc.target/i386/ssse3-psignb.c: Likewise.
* gcc.target/i386/ssse3-psignd.c: Likewise.
* gcc.target/i386/ssse3-psignw.c: Likewise.
---
gcc/testsuite/gcc.target/i386/ssse3-pabsb.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-pabsd.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-pabsw.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-palignr.c | 6 +++---
gcc/testsuite/gcc.target/i386/ssse3-phaddd.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-phaddw.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-phsubd.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-phsubw.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-pshufb.c | 6 +++---
gcc/testsuite/gcc.target/i386/ssse3-psignb.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-psignd.c | 4 ++--
gcc/testsuite/gcc.target/i386/ssse3-psignw.c | 4 ++--
16 files changed, 34 insertions(+), 34 deletions(-)
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c b/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
index 7caa1b6c3a6..eef4ccae222 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
@@ -15,7 +15,7 @@
#include "ssse3-vals.h"
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_pabsb (int *i1, int *r)
@@ -63,7 +63,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result(&vals[i + 0], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_pabsb (&vals[i + 0], &r[0]);
ssse3_test_pabsb (&vals[i + 2], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c b/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c
index 3a73cf01170..60043bad4a4 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c
@@ -16,7 +16,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_pabsd (int *i1, int *r)
@@ -62,7 +62,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result(&vals[i + 0], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_pabsd (&vals[i + 0], &r[0]);
ssse3_test_pabsd (&vals[i + 2], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c b/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c
index 67e4721b8e6..dd0caa9783f 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c
@@ -16,7 +16,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_pabsw (int *i1, int *r)
@@ -64,7 +64,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result (&vals[i + 0], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_pabsw (&vals[i + 0], &r[0]);
ssse3_test_pabsw (&vals[i + 2], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-palignr.c b/gcc/testsuite/gcc.target/i386/ssse3-palignr.c
index dbee9bee4aa..f266f7805b8 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-palignr.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-palignr.c
@@ -17,7 +17,7 @@
#include <tmmintrin.h>
#include <string.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
@@ -214,7 +214,7 @@ compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
bout[i] = buf[imm + i];
}
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
static void
compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
{
@@ -256,7 +256,7 @@ TEST (void)
for (i = 0; i < 256; i += 8)
for (imm = 0; imm < 100; imm++)
{
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Manually compute the result */
compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phaddd.c b/gcc/testsuite/gcc.target/i386/ssse3-phaddd.c
index bef78168659..1bb9006230a 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-phaddd.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-phaddd.c
@@ -16,7 +16,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_phaddd (int *i1, int *i2, int *r)
@@ -64,7 +64,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_phaddd (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phaddd (&vals[i + 4], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c b/gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c
index ff31fe5a5fe..d93bb452347 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c
@@ -16,7 +16,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_phaddsw (int *i1, int *i2, int *r)
@@ -78,7 +78,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_phaddsw (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phaddsw (&vals[i + 4], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phaddw.c b/gcc/testsuite/gcc.target/i386/ssse3-phaddw.c
index 05c0afd4f69..5f0524e6c55 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-phaddw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-phaddw.c
@@ -16,7 +16,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_phaddw (int *i1, int *i2, int *r)
@@ -67,7 +67,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_phaddw (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phaddw (&vals[i + 4], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phsubd.c b/gcc/testsuite/gcc.target/i386/ssse3-phsubd.c
index 5884e5c12fe..d3bc13df15b 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-phsubd.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-phsubd.c
@@ -16,7 +16,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_phsubd (int *i1, int *i2, int *r)
@@ -63,7 +63,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_phsubd (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phsubd (&vals[i + 4], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c b/gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c
index 371c8d112d1..6024c04e0ce 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c
@@ -16,7 +16,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_phsubsw (int *i1, int *i2, int *r)
@@ -81,7 +81,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_phsubsw (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phsubsw (&vals[i + 4], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phsubw.c b/gcc/testsuite/gcc.target/i386/ssse3-phsubw.c
index f3dbf9c9896..ab7486628bf 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-phsubw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-phsubw.c
@@ -15,7 +15,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_phsubw (int *i1, int *i2, int *r)
@@ -66,7 +66,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_phsubw (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phsubw (&vals[i + 4], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c b/gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c
index 00bfc844f42..c3e97a1f4eb 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c
@@ -16,7 +16,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_pmaddubsw (int *i1, int *i2, int *r)
@@ -81,7 +81,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_pmaddubsw (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_pmaddubsw (&vals[i + 2], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c b/gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c
index 24570b3bd63..be4e7eb406a 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c
@@ -16,7 +16,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_pmulhrsw (int *i1, int *i2, int *r)
@@ -68,7 +68,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_pmulhrsw (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_pmulhrsw (&vals[i + 2], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pshufb.c b/gcc/testsuite/gcc.target/i386/ssse3-pshufb.c
index b995456b61c..67db8dcec7e 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pshufb.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pshufb.c
@@ -16,7 +16,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_pshufb (int *i1, int *i2, int *r)
@@ -38,7 +38,7 @@ ssse3_test_pshufb128 (int *i1, int *i2, int *r)
*(__m128i *)r = _mm_shuffle_epi8 (t1, t2);
}
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Routine to manually compute the results */
static void
compute_correct_result_64 (int *i1, int *i2, int *r)
@@ -91,7 +91,7 @@ TEST (void)
for (i = 0; i < 256; i += 8)
{
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Manually compute the result */
compute_correct_result_64 (&vals[i + 0], &vals[i + 4], ck);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-psignb.c b/gcc/testsuite/gcc.target/i386/ssse3-psignb.c
index 7462929aa20..2b676f4699d 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-psignb.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-psignb.c
@@ -15,7 +15,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_psignb (int *i1, int *i2, int *r)
@@ -68,7 +68,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_psignb (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_psignb (&vals[i + 2], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-psignd.c b/gcc/testsuite/gcc.target/i386/ssse3-psignd.c
index eca0489f8d3..0998f232eb8 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-psignd.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-psignd.c
@@ -15,7 +15,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_psignd (int *i1, int *i2, int *r)
@@ -65,7 +65,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_psignd (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_psignd (&vals[i + 2], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-psignw.c b/gcc/testsuite/gcc.target/i386/ssse3-psignw.c
index 00a506fd894..f8dc776a8ba 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-psignw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-psignw.c
@@ -15,7 +15,7 @@
#include <tmmintrin.h>
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Test the 64-bit form */
static void
ssse3_test_psignw (int *i1, int *i2, int *r)
@@ -68,7 +68,7 @@ TEST (void)
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
+#if !defined __AVX__ || defined __x86_64__
/* Run the 64-bit tests */
ssse3_test_psignw (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_psignw (&vals[i + 2], &vals[i + 6], &r[2]);
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 40/40] i386: Also enable SSSE3 __m64 tests in 64-bit mode
2019-02-14 12:31 ` [PATCH 40/40] i386: Also enable SSSE3 __m64 tests in 64-bit mode H.J. Lu
@ 2019-02-14 20:21 ` Uros Bizjak
2019-02-14 20:43 ` Uros Bizjak
0 siblings, 1 reply; 62+ messages in thread
From: Uros Bizjak @ 2019-02-14 20:21 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 1:30 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Since we now emulate MMX intrinsics with SSE in 64-bit mode, we can
> enable SSSE3 __m64 tests even when AVX is enabled.
>
> PR target/89021
> * gcc.target/i386/ssse3-pabsb.c: Also enable __m64 check in
> 64-bit mode.
> * gcc.target/i386/ssse3-pabsd.c: Likewise.
> * gcc.target/i386/ssse3-pabsw.c: Likewise.
> * gcc.target/i386/ssse3-palignr.c: Likewise.
> * gcc.target/i386/ssse3-phaddd.c: Likewise.
> * gcc.target/i386/ssse3-phaddsw.c: Likewise.
> * gcc.target/i386/ssse3-phaddw.c: Likewise.
> * gcc.target/i386/ssse3-phsubd.c: Likewise.
> * gcc.target/i386/ssse3-phsubsw.c: Likewise.
> * gcc.target/i386/ssse3-phsubw.c: Likewise.
> * gcc.target/i386/ssse3-pmaddubsw.c: Likewise.
> * gcc.target/i386/ssse3-pmulhrsw.c: Likewise.
> * gcc.target/i386/ssse3-pshufb.c: Likewise.
> * gcc.target/i386/ssse3-psignb.c: Likewise.
> * gcc.target/i386/ssse3-psignd.c: Likewise.
> * gcc.target/i386/ssse3-psignw.c: Likewise.
> ---
> gcc/testsuite/gcc.target/i386/ssse3-pabsb.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-pabsd.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-pabsw.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-palignr.c | 6 +++---
> gcc/testsuite/gcc.target/i386/ssse3-phaddd.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-phaddw.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-phsubd.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-phsubw.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-pshufb.c | 6 +++---
> gcc/testsuite/gcc.target/i386/ssse3-psignb.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-psignd.c | 4 ++--
> gcc/testsuite/gcc.target/i386/ssse3-psignw.c | 4 ++--
> 16 files changed, 34 insertions(+), 34 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c b/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
> index 7caa1b6c3a6..eef4ccae222 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
> @@ -15,7 +15,7 @@
> #include "ssse3-vals.h"
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
Better add || defined __x86_64__.
I also wonder why AVX has to be disabled here. MMX should be orthogonal to AVX.
Uros.
> /* Test the 64-bit form */
> static void
> ssse3_test_pabsb (int *i1, int *r)
> @@ -63,7 +63,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result(&vals[i + 0], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_pabsb (&vals[i + 0], &r[0]);
> ssse3_test_pabsb (&vals[i + 2], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c b/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c
> index 3a73cf01170..60043bad4a4 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c
> @@ -16,7 +16,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_pabsd (int *i1, int *r)
> @@ -62,7 +62,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result(&vals[i + 0], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_pabsd (&vals[i + 0], &r[0]);
> ssse3_test_pabsd (&vals[i + 2], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c b/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c
> index 67e4721b8e6..dd0caa9783f 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c
> @@ -16,7 +16,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_pabsw (int *i1, int *r)
> @@ -64,7 +64,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result (&vals[i + 0], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_pabsw (&vals[i + 0], &r[0]);
> ssse3_test_pabsw (&vals[i + 2], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-palignr.c b/gcc/testsuite/gcc.target/i386/ssse3-palignr.c
> index dbee9bee4aa..f266f7805b8 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-palignr.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-palignr.c
> @@ -17,7 +17,7 @@
> #include <tmmintrin.h>
> #include <string.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
> @@ -214,7 +214,7 @@ compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
> bout[i] = buf[imm + i];
> }
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> static void
> compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
> {
> @@ -256,7 +256,7 @@ TEST (void)
> for (i = 0; i < 256; i += 8)
> for (imm = 0; imm < 100; imm++)
> {
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Manually compute the result */
> compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
>
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phaddd.c b/gcc/testsuite/gcc.target/i386/ssse3-phaddd.c
> index bef78168659..1bb9006230a 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-phaddd.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-phaddd.c
> @@ -16,7 +16,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_phaddd (int *i1, int *i2, int *r)
> @@ -64,7 +64,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_phaddd (&vals[i + 0], &vals[i + 2], &r[0]);
> ssse3_test_phaddd (&vals[i + 4], &vals[i + 6], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c b/gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c
> index ff31fe5a5fe..d93bb452347 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c
> @@ -16,7 +16,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_phaddsw (int *i1, int *i2, int *r)
> @@ -78,7 +78,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_phaddsw (&vals[i + 0], &vals[i + 2], &r[0]);
> ssse3_test_phaddsw (&vals[i + 4], &vals[i + 6], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phaddw.c b/gcc/testsuite/gcc.target/i386/ssse3-phaddw.c
> index 05c0afd4f69..5f0524e6c55 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-phaddw.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-phaddw.c
> @@ -16,7 +16,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_phaddw (int *i1, int *i2, int *r)
> @@ -67,7 +67,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_phaddw (&vals[i + 0], &vals[i + 2], &r[0]);
> ssse3_test_phaddw (&vals[i + 4], &vals[i + 6], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phsubd.c b/gcc/testsuite/gcc.target/i386/ssse3-phsubd.c
> index 5884e5c12fe..d3bc13df15b 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-phsubd.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-phsubd.c
> @@ -16,7 +16,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_phsubd (int *i1, int *i2, int *r)
> @@ -63,7 +63,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_phsubd (&vals[i + 0], &vals[i + 2], &r[0]);
> ssse3_test_phsubd (&vals[i + 4], &vals[i + 6], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c b/gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c
> index 371c8d112d1..6024c04e0ce 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c
> @@ -16,7 +16,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_phsubsw (int *i1, int *i2, int *r)
> @@ -81,7 +81,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_phsubsw (&vals[i + 0], &vals[i + 2], &r[0]);
> ssse3_test_phsubsw (&vals[i + 4], &vals[i + 6], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phsubw.c b/gcc/testsuite/gcc.target/i386/ssse3-phsubw.c
> index f3dbf9c9896..ab7486628bf 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-phsubw.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-phsubw.c
> @@ -15,7 +15,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_phsubw (int *i1, int *i2, int *r)
> @@ -66,7 +66,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_phsubw (&vals[i + 0], &vals[i + 2], &r[0]);
> ssse3_test_phsubw (&vals[i + 4], &vals[i + 6], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c b/gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c
> index 00bfc844f42..c3e97a1f4eb 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c
> @@ -16,7 +16,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_pmaddubsw (int *i1, int *i2, int *r)
> @@ -81,7 +81,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_pmaddubsw (&vals[i + 0], &vals[i + 4], &r[0]);
> ssse3_test_pmaddubsw (&vals[i + 2], &vals[i + 6], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c b/gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c
> index 24570b3bd63..be4e7eb406a 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c
> @@ -16,7 +16,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_pmulhrsw (int *i1, int *i2, int *r)
> @@ -68,7 +68,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_pmulhrsw (&vals[i + 0], &vals[i + 4], &r[0]);
> ssse3_test_pmulhrsw (&vals[i + 2], &vals[i + 6], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pshufb.c b/gcc/testsuite/gcc.target/i386/ssse3-pshufb.c
> index b995456b61c..67db8dcec7e 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-pshufb.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-pshufb.c
> @@ -16,7 +16,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_pshufb (int *i1, int *i2, int *r)
> @@ -38,7 +38,7 @@ ssse3_test_pshufb128 (int *i1, int *i2, int *r)
> *(__m128i *)r = _mm_shuffle_epi8 (t1, t2);
> }
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Routine to manually compute the results */
> static void
> compute_correct_result_64 (int *i1, int *i2, int *r)
> @@ -91,7 +91,7 @@ TEST (void)
>
> for (i = 0; i < 256; i += 8)
> {
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Manually compute the result */
> compute_correct_result_64 (&vals[i + 0], &vals[i + 4], ck);
>
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-psignb.c b/gcc/testsuite/gcc.target/i386/ssse3-psignb.c
> index 7462929aa20..2b676f4699d 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-psignb.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-psignb.c
> @@ -15,7 +15,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_psignb (int *i1, int *i2, int *r)
> @@ -68,7 +68,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_psignb (&vals[i + 0], &vals[i + 4], &r[0]);
> ssse3_test_psignb (&vals[i + 2], &vals[i + 6], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-psignd.c b/gcc/testsuite/gcc.target/i386/ssse3-psignd.c
> index eca0489f8d3..0998f232eb8 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-psignd.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-psignd.c
> @@ -15,7 +15,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_psignd (int *i1, int *i2, int *r)
> @@ -65,7 +65,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_psignd (&vals[i + 0], &vals[i + 4], &r[0]);
> ssse3_test_psignd (&vals[i + 2], &vals[i + 6], &r[2]);
> diff --git a/gcc/testsuite/gcc.target/i386/ssse3-psignw.c b/gcc/testsuite/gcc.target/i386/ssse3-psignw.c
> index 00a506fd894..f8dc776a8ba 100644
> --- a/gcc/testsuite/gcc.target/i386/ssse3-psignw.c
> +++ b/gcc/testsuite/gcc.target/i386/ssse3-psignw.c
> @@ -15,7 +15,7 @@
>
> #include <tmmintrin.h>
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Test the 64-bit form */
> static void
> ssse3_test_psignw (int *i1, int *i2, int *r)
> @@ -68,7 +68,7 @@ TEST (void)
> /* Manually compute the result */
> compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
>
> -#ifndef __AVX__
> +#if !defined __AVX__ || defined __x86_64__
> /* Run the 64-bit tests */
> ssse3_test_psignw (&vals[i + 0], &vals[i + 4], &r[0]);
> ssse3_test_psignw (&vals[i + 2], &vals[i + 6], &r[2]);
> --
> 2.20.1
>
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 40/40] i386: Also enable SSSE3 __m64 tests in 64-bit mode
2019-02-14 20:21 ` Uros Bizjak
@ 2019-02-14 20:43 ` Uros Bizjak
2019-02-14 20:56 ` H.J. Lu
0 siblings, 1 reply; 62+ messages in thread
From: Uros Bizjak @ 2019-02-14 20:43 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 3210 bytes --]
On Thu, Feb 14, 2019 at 9:21 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Feb 14, 2019 at 1:30 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Since we now emulate MMX intrinsics with SSE in 64-bit mode, we can
> > enable SSSE3 __m64 tests even when AVX is enabled.
> >
> > PR target/89021
> > * gcc.target/i386/ssse3-pabsb.c: Also enable __m64 check in
> > 64-bit mode.
> > * gcc.target/i386/ssse3-pabsd.c: Likewise.
> > * gcc.target/i386/ssse3-pabsw.c: Likewise.
> > * gcc.target/i386/ssse3-palignr.c: Likewise.
> > * gcc.target/i386/ssse3-phaddd.c: Likewise.
> > * gcc.target/i386/ssse3-phaddsw.c: Likewise.
> > * gcc.target/i386/ssse3-phaddw.c: Likewise.
> > * gcc.target/i386/ssse3-phsubd.c: Likewise.
> > * gcc.target/i386/ssse3-phsubsw.c: Likewise.
> > * gcc.target/i386/ssse3-phsubw.c: Likewise.
> > * gcc.target/i386/ssse3-pmaddubsw.c: Likewise.
> > * gcc.target/i386/ssse3-pmulhrsw.c: Likewise.
> > * gcc.target/i386/ssse3-pshufb.c: Likewise.
> > * gcc.target/i386/ssse3-psignb.c: Likewise.
> > * gcc.target/i386/ssse3-psignd.c: Likewise.
> > * gcc.target/i386/ssse3-psignw.c: Likewise.
> > ---
> > gcc/testsuite/gcc.target/i386/ssse3-pabsb.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-pabsd.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-pabsw.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-palignr.c | 6 +++---
> > gcc/testsuite/gcc.target/i386/ssse3-phaddd.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-phaddw.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-phsubd.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-phsubw.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-pshufb.c | 6 +++---
> > gcc/testsuite/gcc.target/i386/ssse3-psignb.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-psignd.c | 4 ++--
> > gcc/testsuite/gcc.target/i386/ssse3-psignw.c | 4 ++--
> > 16 files changed, 34 insertions(+), 34 deletions(-)
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c b/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
> > index 7caa1b6c3a6..eef4ccae222 100644
> > --- a/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
> > +++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
> > @@ -15,7 +15,7 @@
> > #include "ssse3-vals.h"
> > #include <tmmintrin.h>
> >
> > -#ifndef __AVX__
> > +#if !defined __AVX__ || defined __x86_64__
>
> Better add || defined __x86_64__.
>
> I also wonder why AVX has to be disabled here. MMX should be orthogonal to AVX.
Actually, current trunk passes tests with #ifndef __AVX__ removed and:
gmake -k check-gcc
RUNTESTFLAGS="--target_board=unix\{,-m32\}\{,-mavx\}
i386.exp=ssse3-*.c"
=== gcc tests ===
Schedule of variations:
unix
unix/-mavx
unix/-m32
unix/-m32/-mavx
=== gcc Summary ===
# of expected passes 128
Uros.
[-- Attachment #2: t.diff.txt --]
[-- Type: text/plain, Size: 15425 bytes --]
Index: gcc.target/i386/ssse3-pabsb.c
===================================================================
--- gcc.target/i386/ssse3-pabsb.c (revision 268854)
+++ gcc.target/i386/ssse3-pabsb.c (working copy)
@@ -15,7 +15,6 @@
#include "ssse3-vals.h"
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_pabsb (int *i1, int *r)
@@ -24,7 +23,6 @@
*(__m64 *) r = _mm_abs_pi8 (t1);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -63,12 +61,10 @@
/* Manually compute the result */
compute_correct_result(&vals[i + 0], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_pabsb (&vals[i + 0], &r[0]);
ssse3_test_pabsb (&vals[i + 2], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_pabsb128 (&vals[i + 0], r);
Index: gcc.target/i386/ssse3-pabsd.c
===================================================================
--- gcc.target/i386/ssse3-pabsd.c (revision 268854)
+++ gcc.target/i386/ssse3-pabsd.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_pabsd (int *i1, int *r)
@@ -25,7 +24,6 @@
*(__m64 *) r = _mm_abs_pi32 (t1);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -62,12 +60,10 @@
/* Manually compute the result */
compute_correct_result(&vals[i + 0], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_pabsd (&vals[i + 0], &r[0]);
ssse3_test_pabsd (&vals[i + 2], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_pabsd128 (&vals[i + 0], r);
Index: gcc.target/i386/ssse3-pabsw.c
===================================================================
--- gcc.target/i386/ssse3-pabsw.c (revision 268854)
+++ gcc.target/i386/ssse3-pabsw.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_pabsw (int *i1, int *r)
@@ -25,7 +24,6 @@
*(__m64 *) r = _mm_abs_pi16 (t1);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -64,12 +62,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_pabsw (&vals[i + 0], &r[0]);
ssse3_test_pabsw (&vals[i + 2], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_pabsw128 (&vals[i + 0], r);
Index: gcc.target/i386/ssse3-palignr.c
===================================================================
--- gcc.target/i386/ssse3-palignr.c (revision 268854)
+++ gcc.target/i386/ssse3-palignr.c (working copy)
@@ -17,7 +17,6 @@
#include <tmmintrin.h>
#include <string.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
@@ -82,7 +81,6 @@
_mm_empty();
}
-#endif
/* Test the 128-bit form */
static void
@@ -214,7 +212,6 @@
bout[i] = buf[imm + i];
}
-#ifndef __AVX__
static void
compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
{
@@ -242,7 +239,6 @@
else
bout[i + 8] = buf[imm + i];
}
-#endif
static void
TEST (void)
@@ -256,7 +252,6 @@
for (i = 0; i < 256; i += 8)
for (imm = 0; imm < 100; imm++)
{
-#ifndef __AVX__
/* Manually compute the result */
compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
@@ -264,7 +259,6 @@
ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Recompute the results for 128-bits */
compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
Index: gcc.target/i386/ssse3-phaddd.c
===================================================================
--- gcc.target/i386/ssse3-phaddd.c (revision 268854)
+++ gcc.target/i386/ssse3-phaddd.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_phaddd (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *) r = _mm_hadd_pi32 (t1, t2);
_mm_empty();
}
-#endif
/* Test the 128-bit form */
static void
@@ -64,12 +62,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_phaddd (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phaddd (&vals[i + 4], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_phaddd128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-phaddsw.c
===================================================================
--- gcc.target/i386/ssse3-phaddsw.c (revision 268854)
+++ gcc.target/i386/ssse3-phaddsw.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_phaddsw (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *) r = _mm_hadds_pi16 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -78,12 +76,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_phaddsw (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phaddsw (&vals[i + 4], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_phaddsw128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-phaddw.c
===================================================================
--- gcc.target/i386/ssse3-phaddw.c (revision 268854)
+++ gcc.target/i386/ssse3-phaddw.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_phaddw (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *) r = _mm_hadd_pi16 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -67,12 +65,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_phaddw (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phaddw (&vals[i + 4], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_phaddw128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-phsubd.c
===================================================================
--- gcc.target/i386/ssse3-phsubd.c (revision 268854)
+++ gcc.target/i386/ssse3-phsubd.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_phsubd (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *) r = _mm_hsub_pi32(t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -63,12 +61,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_phsubd (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phsubd (&vals[i + 4], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_phsubd128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-phsubsw.c
===================================================================
--- gcc.target/i386/ssse3-phsubsw.c (revision 268854)
+++ gcc.target/i386/ssse3-phsubsw.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_phsubsw (int *i1, int *i2, int *r)
@@ -28,7 +27,6 @@
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -81,12 +79,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_phsubsw (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phsubsw (&vals[i + 4], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_phsubsw128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-phsubw.c
===================================================================
--- gcc.target/i386/ssse3-phsubw.c (revision 268854)
+++ gcc.target/i386/ssse3-phsubw.c (working copy)
@@ -15,7 +15,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_phsubw (int *i1, int *i2, int *r)
@@ -25,7 +24,6 @@
*(__m64 *) r = _mm_hsub_pi16 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -66,12 +64,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_phsubw (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phsubw (&vals[i + 4], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_phsubw128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-pmaddubsw.c
===================================================================
--- gcc.target/i386/ssse3-pmaddubsw.c (revision 268854)
+++ gcc.target/i386/ssse3-pmaddubsw.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_pmaddubsw (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *) r = _mm_maddubs_pi16 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -81,12 +79,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_pmaddubsw (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_pmaddubsw (&vals[i + 2], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_pmaddubsw128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-pmulhrsw.c
===================================================================
--- gcc.target/i386/ssse3-pmulhrsw.c (revision 268854)
+++ gcc.target/i386/ssse3-pmulhrsw.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_pmulhrsw (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *) r = _mm_mulhrs_pi16 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -68,12 +66,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_pmulhrsw (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_pmulhrsw (&vals[i + 2], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_pmulhrsw128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-pshufb.c
===================================================================
--- gcc.target/i386/ssse3-pshufb.c (revision 268854)
+++ gcc.target/i386/ssse3-pshufb.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_pshufb (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *)r = _mm_shuffle_pi8 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -38,7 +36,6 @@
*(__m128i *)r = _mm_shuffle_epi8 (t1, t2);
}
-#ifndef __AVX__
/* Routine to manually compute the results */
static void
compute_correct_result_64 (int *i1, int *i2, int *r)
@@ -60,7 +57,6 @@
bout[i] = b1[8 + (select & 0x7)];
}
}
-#endif
static void
compute_correct_result_128 (int *i1, int *i2, int *r)
@@ -91,7 +87,6 @@
for (i = 0; i < 256; i += 8)
{
-#ifndef __AVX__
/* Manually compute the result */
compute_correct_result_64 (&vals[i + 0], &vals[i + 4], ck);
@@ -99,7 +94,6 @@
ssse3_test_pshufb (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_pshufb (&vals[i + 2], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Recompute the result for 128-bits */
compute_correct_result_128 (&vals[i + 0], &vals[i + 4], ck);
Index: gcc.target/i386/ssse3-psignb.c
===================================================================
--- gcc.target/i386/ssse3-psignb.c (revision 268854)
+++ gcc.target/i386/ssse3-psignb.c (working copy)
@@ -15,7 +15,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_psignb (int *i1, int *i2, int *r)
@@ -25,7 +24,6 @@
*(__m64 *) r = _mm_sign_pi8 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -68,12 +66,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_psignb (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_psignb (&vals[i + 2], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_psignb128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-psignd.c
===================================================================
--- gcc.target/i386/ssse3-psignd.c (revision 268854)
+++ gcc.target/i386/ssse3-psignd.c (working copy)
@@ -15,7 +15,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_psignd (int *i1, int *i2, int *r)
@@ -25,7 +24,6 @@
*(__m64 *) r = _mm_sign_pi32 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -65,12 +63,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_psignd (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_psignd (&vals[i + 2], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_psignd128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-psignw.c
===================================================================
--- gcc.target/i386/ssse3-psignw.c (revision 268854)
+++ gcc.target/i386/ssse3-psignw.c (working copy)
@@ -15,7 +15,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_psignw (int *i1, int *i2, int *r)
@@ -25,7 +24,6 @@
*(__m64 *) r = _mm_sign_pi16 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -68,12 +66,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_psignw (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_psignw (&vals[i + 2], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_psignw128 (&vals[i + 0], &vals[i + 4], r);
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 40/40] i386: Also enable SSSE3 __m64 tests in 64-bit mode
2019-02-14 20:43 ` Uros Bizjak
@ 2019-02-14 20:56 ` H.J. Lu
2019-02-14 21:57 ` [PATCH, testsuite]: Re-enable 64-bit form in gcc.target/i386/ssse3-*.c on AVX targets Uros Bizjak
0 siblings, 1 reply; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 20:56 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 12:43 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Feb 14, 2019 at 9:21 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Thu, Feb 14, 2019 at 1:30 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > Since we now emulate MMX intrinsics with SSE in 64-bit mode, we can
> > > enable SSSE3 __m64 tests even when AVX is enabled.
> > >
> > > PR target/89021
> > > * gcc.target/i386/ssse3-pabsb.c: Also enable __m64 check in
> > > 64-bit mode.
> > > * gcc.target/i386/ssse3-pabsd.c: Likewise.
> > > * gcc.target/i386/ssse3-pabsw.c: Likewise.
> > > * gcc.target/i386/ssse3-palignr.c: Likewise.
> > > * gcc.target/i386/ssse3-phaddd.c: Likewise.
> > > * gcc.target/i386/ssse3-phaddsw.c: Likewise.
> > > * gcc.target/i386/ssse3-phaddw.c: Likewise.
> > > * gcc.target/i386/ssse3-phsubd.c: Likewise.
> > > * gcc.target/i386/ssse3-phsubsw.c: Likewise.
> > > * gcc.target/i386/ssse3-phsubw.c: Likewise.
> > > * gcc.target/i386/ssse3-pmaddubsw.c: Likewise.
> > > * gcc.target/i386/ssse3-pmulhrsw.c: Likewise.
> > > * gcc.target/i386/ssse3-pshufb.c: Likewise.
> > > * gcc.target/i386/ssse3-psignb.c: Likewise.
> > > * gcc.target/i386/ssse3-psignd.c: Likewise.
> > > * gcc.target/i386/ssse3-psignw.c: Likewise.
> > > ---
> > > gcc/testsuite/gcc.target/i386/ssse3-pabsb.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-pabsd.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-pabsw.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-palignr.c | 6 +++---
> > > gcc/testsuite/gcc.target/i386/ssse3-phaddd.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-phaddw.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-phsubd.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-phsubw.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-pshufb.c | 6 +++---
> > > gcc/testsuite/gcc.target/i386/ssse3-psignb.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-psignd.c | 4 ++--
> > > gcc/testsuite/gcc.target/i386/ssse3-psignw.c | 4 ++--
> > > 16 files changed, 34 insertions(+), 34 deletions(-)
> > >
> > > diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c b/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
> > > index 7caa1b6c3a6..eef4ccae222 100644
> > > --- a/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
> > > +++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
> > > @@ -15,7 +15,7 @@
> > > #include "ssse3-vals.h"
> > > #include <tmmintrin.h>
> > >
> > > -#ifndef __AVX__
> > > +#if !defined __AVX__ || defined __x86_64__
> >
> > Better add || defined __x86_64__.
> >
> > I also wonder why AVX has to be disabled here. MMX should be orthogonal to AVX.
>
> Actually, current trunk passes tests with #ifndef __AVX__ removed and:
I don't remember why AVX was disabled. It is possible that AVX SDE at
the time didn't
support MMX with AVX. Can you check in a separate patch to remove
__AVX__ check?
Thanks.
> gmake -k check-gcc
> RUNTESTFLAGS="--target_board=unix\{,-m32\}\{,-mavx\}
> i386.exp=ssse3-*.c"
>
> === gcc tests ===
>
> Schedule of variations:
> unix
> unix/-mavx
> unix/-m32
> unix/-m32/-mavx
>
> === gcc Summary ===
>
> # of expected passes 128
>
> Uros.
--
H.J.
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH, testsuite]: Re-enable 64-bit form in gcc.target/i386/ssse3-*.c on AVX targets
2019-02-14 20:56 ` H.J. Lu
@ 2019-02-14 21:57 ` Uros Bizjak
0 siblings, 0 replies; 62+ messages in thread
From: Uros Bizjak @ 2019-02-14 21:57 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 916 bytes --]
2019-02-14 Uroš Bizjak <ubizjak@gmail.com>
* gcc.target/i386/ssse3-pabsb.c: Re-enable 64-bit form on AVX targets.
* gcc.target/i386/ssse3-pabsd.c: Ditto.
* gcc.target/i386/ssse3-pabsw.c: Ditto.
* gcc.target/i386/ssse3-palignr.c: Ditto.
* gcc.target/i386/ssse3-phaddd.c: Ditto.
* gcc.target/i386/ssse3-phaddsw.c: Ditto.
* gcc.target/i386/ssse3-phaddw.c: Ditto.
* gcc.target/i386/ssse3-phsubd.c: Ditto.
* gcc.target/i386/ssse3-phsubsw.c: Ditto.
* gcc.target/i386/ssse3-phsubw.c: Ditto.
* gcc.target/i386/ssse3-pmaddubsw.c: Ditto.
* gcc.target/i386/ssse3-pmulhrsw.c: Ditto.
* gcc.target/i386/ssse3-pshufb.c: Ditto.
* gcc.target/i386/ssse3-psignb.c: Ditto.
* gcc.target/i386/ssse3-psignd.c: Ditto.
* gcc.target/i386/ssse3-psignw.c: Ditto.
Tested on x86_64-linux-gnu {,-m32} w/ and w/o -mavx.
Committed to mainline SVN.
Uros.
[-- Attachment #2: t.diff.txt --]
[-- Type: text/plain, Size: 15425 bytes --]
Index: gcc.target/i386/ssse3-pabsb.c
===================================================================
--- gcc.target/i386/ssse3-pabsb.c (revision 268854)
+++ gcc.target/i386/ssse3-pabsb.c (working copy)
@@ -15,7 +15,6 @@
#include "ssse3-vals.h"
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_pabsb (int *i1, int *r)
@@ -24,7 +23,6 @@
*(__m64 *) r = _mm_abs_pi8 (t1);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -63,12 +61,10 @@
/* Manually compute the result */
compute_correct_result(&vals[i + 0], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_pabsb (&vals[i + 0], &r[0]);
ssse3_test_pabsb (&vals[i + 2], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_pabsb128 (&vals[i + 0], r);
Index: gcc.target/i386/ssse3-pabsd.c
===================================================================
--- gcc.target/i386/ssse3-pabsd.c (revision 268854)
+++ gcc.target/i386/ssse3-pabsd.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_pabsd (int *i1, int *r)
@@ -25,7 +24,6 @@
*(__m64 *) r = _mm_abs_pi32 (t1);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -62,12 +60,10 @@
/* Manually compute the result */
compute_correct_result(&vals[i + 0], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_pabsd (&vals[i + 0], &r[0]);
ssse3_test_pabsd (&vals[i + 2], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_pabsd128 (&vals[i + 0], r);
Index: gcc.target/i386/ssse3-pabsw.c
===================================================================
--- gcc.target/i386/ssse3-pabsw.c (revision 268854)
+++ gcc.target/i386/ssse3-pabsw.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_pabsw (int *i1, int *r)
@@ -25,7 +24,6 @@
*(__m64 *) r = _mm_abs_pi16 (t1);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -64,12 +62,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_pabsw (&vals[i + 0], &r[0]);
ssse3_test_pabsw (&vals[i + 2], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_pabsw128 (&vals[i + 0], r);
Index: gcc.target/i386/ssse3-palignr.c
===================================================================
--- gcc.target/i386/ssse3-palignr.c (revision 268854)
+++ gcc.target/i386/ssse3-palignr.c (working copy)
@@ -17,7 +17,6 @@
#include <tmmintrin.h>
#include <string.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
@@ -82,7 +81,6 @@
_mm_empty();
}
-#endif
/* Test the 128-bit form */
static void
@@ -214,7 +212,6 @@
bout[i] = buf[imm + i];
}
-#ifndef __AVX__
static void
compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
{
@@ -242,7 +239,6 @@
else
bout[i + 8] = buf[imm + i];
}
-#endif
static void
TEST (void)
@@ -256,7 +252,6 @@
for (i = 0; i < 256; i += 8)
for (imm = 0; imm < 100; imm++)
{
-#ifndef __AVX__
/* Manually compute the result */
compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
@@ -264,7 +259,6 @@
ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Recompute the results for 128-bits */
compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
Index: gcc.target/i386/ssse3-phaddd.c
===================================================================
--- gcc.target/i386/ssse3-phaddd.c (revision 268854)
+++ gcc.target/i386/ssse3-phaddd.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_phaddd (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *) r = _mm_hadd_pi32 (t1, t2);
_mm_empty();
}
-#endif
/* Test the 128-bit form */
static void
@@ -64,12 +62,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_phaddd (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phaddd (&vals[i + 4], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_phaddd128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-phaddsw.c
===================================================================
--- gcc.target/i386/ssse3-phaddsw.c (revision 268854)
+++ gcc.target/i386/ssse3-phaddsw.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_phaddsw (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *) r = _mm_hadds_pi16 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -78,12 +76,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_phaddsw (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phaddsw (&vals[i + 4], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_phaddsw128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-phaddw.c
===================================================================
--- gcc.target/i386/ssse3-phaddw.c (revision 268854)
+++ gcc.target/i386/ssse3-phaddw.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_phaddw (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *) r = _mm_hadd_pi16 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -67,12 +65,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_phaddw (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phaddw (&vals[i + 4], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_phaddw128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-phsubd.c
===================================================================
--- gcc.target/i386/ssse3-phsubd.c (revision 268854)
+++ gcc.target/i386/ssse3-phsubd.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_phsubd (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *) r = _mm_hsub_pi32(t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -63,12 +61,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_phsubd (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phsubd (&vals[i + 4], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_phsubd128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-phsubsw.c
===================================================================
--- gcc.target/i386/ssse3-phsubsw.c (revision 268854)
+++ gcc.target/i386/ssse3-phsubsw.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_phsubsw (int *i1, int *i2, int *r)
@@ -28,7 +27,6 @@
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -81,12 +79,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_phsubsw (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phsubsw (&vals[i + 4], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_phsubsw128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-phsubw.c
===================================================================
--- gcc.target/i386/ssse3-phsubw.c (revision 268854)
+++ gcc.target/i386/ssse3-phsubw.c (working copy)
@@ -15,7 +15,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_phsubw (int *i1, int *i2, int *r)
@@ -25,7 +24,6 @@
*(__m64 *) r = _mm_hsub_pi16 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -66,12 +64,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_phsubw (&vals[i + 0], &vals[i + 2], &r[0]);
ssse3_test_phsubw (&vals[i + 4], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_phsubw128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-pmaddubsw.c
===================================================================
--- gcc.target/i386/ssse3-pmaddubsw.c (revision 268854)
+++ gcc.target/i386/ssse3-pmaddubsw.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_pmaddubsw (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *) r = _mm_maddubs_pi16 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -81,12 +79,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_pmaddubsw (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_pmaddubsw (&vals[i + 2], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_pmaddubsw128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-pmulhrsw.c
===================================================================
--- gcc.target/i386/ssse3-pmulhrsw.c (revision 268854)
+++ gcc.target/i386/ssse3-pmulhrsw.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_pmulhrsw (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *) r = _mm_mulhrs_pi16 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -68,12 +66,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_pmulhrsw (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_pmulhrsw (&vals[i + 2], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_pmulhrsw128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-pshufb.c
===================================================================
--- gcc.target/i386/ssse3-pshufb.c (revision 268854)
+++ gcc.target/i386/ssse3-pshufb.c (working copy)
@@ -16,7 +16,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_pshufb (int *i1, int *i2, int *r)
@@ -26,7 +25,6 @@
*(__m64 *)r = _mm_shuffle_pi8 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -38,7 +36,6 @@
*(__m128i *)r = _mm_shuffle_epi8 (t1, t2);
}
-#ifndef __AVX__
/* Routine to manually compute the results */
static void
compute_correct_result_64 (int *i1, int *i2, int *r)
@@ -60,7 +57,6 @@
bout[i] = b1[8 + (select & 0x7)];
}
}
-#endif
static void
compute_correct_result_128 (int *i1, int *i2, int *r)
@@ -91,7 +87,6 @@
for (i = 0; i < 256; i += 8)
{
-#ifndef __AVX__
/* Manually compute the result */
compute_correct_result_64 (&vals[i + 0], &vals[i + 4], ck);
@@ -99,7 +94,6 @@
ssse3_test_pshufb (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_pshufb (&vals[i + 2], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Recompute the result for 128-bits */
compute_correct_result_128 (&vals[i + 0], &vals[i + 4], ck);
Index: gcc.target/i386/ssse3-psignb.c
===================================================================
--- gcc.target/i386/ssse3-psignb.c (revision 268854)
+++ gcc.target/i386/ssse3-psignb.c (working copy)
@@ -15,7 +15,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_psignb (int *i1, int *i2, int *r)
@@ -25,7 +24,6 @@
*(__m64 *) r = _mm_sign_pi8 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -68,12 +66,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_psignb (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_psignb (&vals[i + 2], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_psignb128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-psignd.c
===================================================================
--- gcc.target/i386/ssse3-psignd.c (revision 268854)
+++ gcc.target/i386/ssse3-psignd.c (working copy)
@@ -15,7 +15,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_psignd (int *i1, int *i2, int *r)
@@ -25,7 +24,6 @@
*(__m64 *) r = _mm_sign_pi32 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -65,12 +63,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_psignd (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_psignd (&vals[i + 2], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_psignd128 (&vals[i + 0], &vals[i + 4], r);
Index: gcc.target/i386/ssse3-psignw.c
===================================================================
--- gcc.target/i386/ssse3-psignw.c (revision 268854)
+++ gcc.target/i386/ssse3-psignw.c (working copy)
@@ -15,7 +15,6 @@
#include <tmmintrin.h>
-#ifndef __AVX__
/* Test the 64-bit form */
static void
ssse3_test_psignw (int *i1, int *i2, int *r)
@@ -25,7 +24,6 @@
*(__m64 *) r = _mm_sign_pi16 (t1, t2);
_mm_empty ();
}
-#endif
/* Test the 128-bit form */
static void
@@ -68,12 +66,10 @@
/* Manually compute the result */
compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
-#ifndef __AVX__
/* Run the 64-bit tests */
ssse3_test_psignw (&vals[i + 0], &vals[i + 4], &r[0]);
ssse3_test_psignw (&vals[i + 2], &vals[i + 6], &r[2]);
fail += chk_128 (ck, r);
-#endif
/* Run the 128-bit tests */
ssse3_test_psignw128 (&vals[i + 0], &vals[i + 4], r);
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 21/40] i386: Emulate MMX maskmovq with SSE2 maskmovdqu
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (17 preceding siblings ...)
2019-02-14 12:31 ` [PATCH 40/40] i386: Also enable SSSE3 __m64 tests in 64-bit mode H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 18/40] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE H.J. Lu
` (21 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX maskmovq with SSE2 maskmovdqu for TARGET_MMX_WITH_SSE by
zero-extending source and mask operands to 128 bits. Handle unmapped
bits 64:127 at memory address by adjusting source and mask operands
together with memory address.
PR target/89021
* config/i386/i386-c.c (ix86_target_macros_internal): Define
__MMX_WITH_SSE__ for TARGET_MMX_WITH_SSE.
* config/i386/xmmintrin.h: Emulate MMX maskmovq with SSE2
maskmovdqu for __MMX_WITH_SSE__.
---
gcc/config/i386/i386-c.c | 2 ++
gcc/config/i386/xmmintrin.h | 61 +++++++++++++++++++++++++++++++++++++
2 files changed, 63 insertions(+)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 5e7e46fcebe..213e1b56c6b 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -548,6 +548,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__CLDEMOTE__");
if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
def_or_undef (parse_in, "__PTWRITE__");
+ if (TARGET_MMX_WITH_SSE)
+ def_or_undef (parse_in, "__MMX_WITH_SSE__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 58284378514..a915f6c87d7 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1165,7 +1165,68 @@ _m_pshufw (__m64 __A, int const __N)
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
{
+#ifdef __MMX_WITH_SSE__
+ /* Emulate MMX maskmovq with SSE2 maskmovdqu and handle unmapped bits
+ 64:127 at address __P. */
+ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+ typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+ /* Zero-extend __A and __N to 128 bits. */
+ __v2di __A128 = __extension__ (__v2di) { ((__v1di) __A)[0], 0 };
+ __v2di __N128 = __extension__ (__v2di) { ((__v1di) __N)[0], 0 };
+
+ /* Check the alignment of __P. */
+ __SIZE_TYPE__ offset = ((__SIZE_TYPE__) __P) & 0xf;
+ if (offset)
+ {
+ /* If the misalignment of __P > 8, subtract __P by 8 bytes.
+ Otherwise, subtract __P by the misalignment. */
+ if (offset > 8)
+ offset = 8;
+ __P = (char *) (((__SIZE_TYPE__) __P) - offset);
+
+ /* Shift __A128 and __N128 to the left by the adjustment. */
+ switch (offset)
+ {
+ case 1:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 8);
+ break;
+ case 2:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 2 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 2 * 8);
+ break;
+ case 3:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 3 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 3 * 8);
+ break;
+ case 4:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 4 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 4 * 8);
+ break;
+ case 5:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 5 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 5 * 8);
+ break;
+ case 6:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 6 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 6 * 8);
+ break;
+ case 7:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 7 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 7 * 8);
+ break;
+ case 8:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 8 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 8 * 8);
+ break;
+ default:
+ break;
+ }
+ }
+ __builtin_ia32_maskmovdqu ((__v16qi)__A128, (__v16qi)__N128, __P);
+#else
__builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+#endif
}
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 18/40] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (18 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 21/40] i386: Emulate MMX maskmovq with SSE2 maskmovdqu H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 09/40] i386: Emulate MMX <any_logic><mode>3 " H.J. Lu
` (20 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_<code>v4hi3): Also check TARGET_MMX
and TARGET_MMX_WITH_SSE.
(mmx_<code>v8qi3): Likewise.
(smaxmin:<code>v4hi3): New.
(umaxmin:<code>v8qi3): Likewise.
(smaxmin:*mmx_<code>v4hi3): Add SSE emulation.
(umaxmin:*mmx_<code>v8qi3): Likewise.
---
gcc/config/i386/mmx.md | 60 +++++++++++++++++++++++++++++++-----------
1 file changed, 44 insertions(+), 16 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b9f7c89cd55..dcc1bd1becf 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -925,38 +925,66 @@
(smaxmin:V4HI
(match_operand:V4HI 1 "nonimmediate_operand")
(match_operand:V4HI 2 "nonimmediate_operand")))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
+
+(define_expand "<code>v4hi3"
+ [(set (match_operand:V4HI 0 "register_operand")
+ (smaxmin:V4HI
+ (match_operand:V4HI 1 "nonimmediate_operand")
+ (match_operand:V4HI 2 "nonimmediate_operand")))]
+ "TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
(define_insn "*mmx_<code>v4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(smaxmin:V4HI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
- "(TARGET_SSE || TARGET_3DNOW_A)
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
- "p<maxmin_int>w\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
+ "@
+ p<maxmin_int>w\t{%2, %0|%0, %2}
+ p<maxmin_int>w\t{%2, %0|%0, %2}
+ vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd,sseiadd,sseiadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_<code>v8qi3"
[(set (match_operand:V8QI 0 "register_operand")
(umaxmin:V8QI
(match_operand:V8QI 1 "nonimmediate_operand")
(match_operand:V8QI 2 "nonimmediate_operand")))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
+
+(define_expand "<code>v8qi3"
+ [(set (match_operand:V8QI 0 "register_operand")
+ (umaxmin:V8QI
+ (match_operand:V8QI 1 "nonimmediate_operand")
+ (match_operand:V8QI 2 "nonimmediate_operand")))]
+ "TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
(define_insn "*mmx_<code>v8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(umaxmin:V8QI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
- "(TARGET_SSE || TARGET_3DNOW_A)
+ (match_operand:V8QI 1 "nonimmediate_operand" "%0,0,Yv")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
- "p<maxmin_int>b\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
+ "@
+ p<maxmin_int>b\t{%2, %0|%0, %2}
+ p<maxmin_int>b\t{%2, %0|%0, %2}
+ vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd,sseiadd,sseiadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_ashr<mode>3"
[(set (match_operand:MMXMODE24 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 09/40] i386: Emulate MMX <any_logic><mode>3 with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (19 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 18/40] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 22/40] i386: Emulate MMX mmx_uavgv8qi3 " H.J. Lu
` (19 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX <any_logic><mode>3 with SSE. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/mmx.md (any_logic:<code><mode>3): New.
(any_logic:*mmx_<code><mode>3): Also allow TARGET_MMX_WITH_SSE.
Add SSE support.
---
gcc/config/i386/mmx.md | 27 ++++++++++++++++++++-------
1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 4738d6b428e..9e7798d4b47 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1080,15 +1080,28 @@
"TARGET_MMX"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+(define_expand "<code><mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand")
+ (any_logic:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
+ "TARGET_MMX_WITH_SSE"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
(define_insn "*mmx_<code><mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "p<logic>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0,0,Yv")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "@
+ p<logic>\t{%2, %0|%0, %2}
+ p<logic>\t{%2, %0|%0, %2}
+ vp<logic>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 22/40] i386: Emulate MMX mmx_uavgv8qi3 with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (20 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 09/40] i386: Emulate MMX <any_logic><mode>3 " H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 39/40] i386: Add tests for MMX intrinsic emulations " H.J. Lu
` (18 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_uavgv8qi3 with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_uavgv8qi3): Also check TARGET_MMX
and TARGET_MMX_WITH_SSE.
(*mmx_uavgv8qi3): Add SSE emulation.
---
gcc/config/i386/mmx.md | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 1fdd09242af..b0009afc35d 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1698,42 +1698,47 @@
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
- "TARGET_SSE || TARGET_3DNOW"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
"ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
(define_insn "*mmx_uavgv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(truncate:V8QI
(lshiftrt:V8HI
(plus:V8HI
(plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V8QI 1 "nonimmediate_operand" "%0,0,Yv"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,x,Yv")))
(const_vector:V8HI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
- "(TARGET_SSE || TARGET_3DNOW)
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V8QImode, operands)"
{
/* These two instructions have the same operation, but their encoding
is different. Prefer the one that is de facto standard. */
- if (TARGET_SSE || TARGET_3DNOW_A)
+ if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+ return "vpavgb\t{%2, %1, %0|%0, %1, %2}";
+ else if (TARGET_SSE || TARGET_3DNOW_A)
return "pavgb\t{%2, %0|%0, %2}";
else
return "pavgusb\t{%2, %0|%0, %2}";
}
- [(set_attr "type" "mmxshft")
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sseiadd,sseiadd")
(set (attr "prefix_extra")
(if_then_else
(not (ior (match_test "TARGET_SSE")
(match_test "TARGET_3DNOW_A")))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_uavgv4hi3"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 39/40] i386: Add tests for MMX intrinsic emulations with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (21 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 22/40] i386: Emulate MMX mmx_uavgv8qi3 " H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-15 12:21 ` Uros Bizjak
2019-02-14 12:33 ` [PATCH 24/40] i386: Emulate MMX mmx_psadbw " H.J. Lu
` (17 subsequent siblings)
40 siblings, 1 reply; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Test MMX intrinsics with -msse2 -mno-mmx in 64-bit mode.
PR target/89021
* gcc.target/i386/mmx-vals.h: New file.
* gcc.target/i386/sse2-mmx-2.c: Likewise.
* gcc.target/i386/sse2-mmx-3.c: Likewise.
* gcc.target/i386/sse2-mmx-4.c: Likewise.
* gcc.target/i386/sse2-mmx-5.c: Likewise.
* gcc.target/i386/sse2-mmx-6.c: Likewise.
* gcc.target/i386/sse2-mmx-7.c: Likewise.
* gcc.target/i386/sse2-mmx-8.c: Likewise.
* gcc.target/i386/sse2-mmx-9.c: Likewise.
* gcc.target/i386/sse2-mmx-10.c: Likewise.
* gcc.target/i386/sse2-mmx-11.c: Likewise.
* gcc.target/i386/sse2-mmx-12.c: Likewise.
* gcc.target/i386/sse2-mmx-13.c: Likewise.
* gcc.target/i386/sse2-mmx-14.c: Likewise.
* gcc.target/i386/sse2-mmx-15.c: Likewise.
* gcc.target/i386/sse2-mmx-16.c: Likewise.
* gcc.target/i386/sse2-mmx-17.c: Likewise.
* gcc.target/i386/sse2-mmx-18a.c: Likewise.
* gcc.target/i386/sse2-mmx-18b.c: Likewise.
* gcc.target/i386/sse2-mmx-18c.c: Likewise.
* gcc.target/i386/sse2-mmx-19a.c: Likewise.
* gcc.target/i386/sse2-mmx-18b.c: Likewise.
* gcc.target/i386/sse2-mmx-19c.c: Likewise.
* gcc.target/i386/sse2-mmx-19d.c: Likewise.
* gcc.target/i386/sse2-mmx-19e.c: Likewise.
* gcc.target/i386/sse2-mmx-20.c: Likewise.
* gcc.target/i386/sse2-mmx-21.c: Likewise.
* gcc.target/i386/sse2-mmx-cvtpi2ps.c: Likewise.
* gcc.target/i386/sse2-mmx-cvtps2pi.c: Likewise.
* gcc.target/i386/sse2-mmx-cvttps2pi.c: Likewise.
* gcc.target/i386/sse2-mmx-maskmovq.c: Likewise.
* gcc.target/i386/sse2-mmx-packssdw.c: Likewise.
* gcc.target/i386/sse2-mmx-packsswb.c: Likewise.
* gcc.target/i386/sse2-mmx-packuswb.c: Likewise.
* gcc.target/i386/sse2-mmx-paddb.c: Likewise.
* gcc.target/i386/sse2-mmx-paddd.c: Likewise.
* gcc.target/i386/sse2-mmx-paddq.c: Likewise.
* gcc.target/i386/sse2-mmx-paddsb.c: Likewise.
* gcc.target/i386/sse2-mmx-paddsw.c: Likewise.
* gcc.target/i386/sse2-mmx-paddusb.c: Likewise.
* gcc.target/i386/sse2-mmx-paddusw.c: Likewise.
* gcc.target/i386/sse2-mmx-paddw.c: Likewise.
* gcc.target/i386/sse2-mmx-pand.c: Likewise.
* gcc.target/i386/sse2-mmx-pandn.c: Likewise.
* gcc.target/i386/sse2-mmx-pavgb.c: Likewise.
* gcc.target/i386/sse2-mmx-pavgw.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpeqb.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpeqd.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpeqw.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpgtb.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpgtd.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpgtw.c: Likewise.
* gcc.target/i386/sse2-mmx-pextrw.c: Likewise.
* gcc.target/i386/sse2-mmx-pinsrw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmaddwd.c: Likewise.
* gcc.target/i386/sse2-mmx-pmaxsw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmaxub.c: Likewise.
* gcc.target/i386/sse2-mmx-pminsw.c: Likewise.
* gcc.target/i386/sse2-mmx-pminub.c: Likewise.
* gcc.target/i386/sse2-mmx-pmovmskb.c: Likewise.
* gcc.target/i386/sse2-mmx-pmulhuw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmulhw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmullw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmuludq.c: Likewise.
* gcc.target/i386/sse2-mmx-por.c: Likewise.
* gcc.target/i386/sse2-mmx-psadbw.c: Likewise.
* gcc.target/i386/sse2-mmx-pshufw.c: Likewise.
* gcc.target/i386/sse2-mmx-pslld.c: Likewise.
* gcc.target/i386/sse2-mmx-pslldi.c: Likewise.
* gcc.target/i386/sse2-mmx-psllq.c: Likewise.
* gcc.target/i386/sse2-mmx-psllqi.c: Likewise.
* gcc.target/i386/sse2-mmx-psllw.c: Likewise.
* gcc.target/i386/sse2-mmx-psllwi.c: Likewise.
* gcc.target/i386/sse2-mmx-psrad.c: Likewise.
* gcc.target/i386/sse2-mmx-psradi.c: Likewise.
* gcc.target/i386/sse2-mmx-psraw.c: Likewise.
* gcc.target/i386/sse2-mmx-psrawi.c: Likewise.
* gcc.target/i386/sse2-mmx-psrld.c: Likewise.
* gcc.target/i386/sse2-mmx-psrldi.c: Likewise.
* gcc.target/i386/sse2-mmx-psrlq.c: Likewise.
* gcc.target/i386/sse2-mmx-psrlqi.c: Likewise.
* gcc.target/i386/sse2-mmx-psrlw.c: Likewise.
* gcc.target/i386/sse2-mmx-psrlwi.c: Likewise.
* gcc.target/i386/sse2-mmx-psubb.c: Likewise.
* gcc.target/i386/sse2-mmx-psubd.c: Likewise.
* gcc.target/i386/sse2-mmx-psubq.c: Likewise.
* gcc.target/i386/sse2-mmx-psubusb.c: Likewise.
* gcc.target/i386/sse2-mmx-psubusw.c: Likewise.
* gcc.target/i386/sse2-mmx-psubw.c: Likewise.
* gcc.target/i386/sse2-mmx-punpckhbw.c: Likewise.
* gcc.target/i386/sse2-mmx-punpckhdq.c: Likewise.
* gcc.target/i386/sse2-mmx-punpckhwd.c: Likewise.
* gcc.target/i386/sse2-mmx-punpcklbw.c: Likewise.
* gcc.target/i386/sse2-mmx-punpckldq.c: Likewise.
* gcc.target/i386/sse2-mmx-punpcklwd.c: Likewise.
* gcc.target/i386/sse2-mmx-pxor.c: Likewise.
---
gcc/testsuite/gcc.target/i386/mmx-vals.h | 77 ++++++
gcc/testsuite/gcc.target/i386/sse2-mmx-10.c | 42 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-11.c | 39 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-12.c | 41 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-13.c | 40 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-14.c | 30 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-15.c | 35 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-16.c | 39 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-17.c | 50 ++++
gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c | 14 +
gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c | 7 +
gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c | 7 +
gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c | 14 +
gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c | 7 +
gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c | 7 +
gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c | 7 +
gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c | 7 +
gcc/testsuite/gcc.target/i386/sse2-mmx-2.c | 12 +
gcc/testsuite/gcc.target/i386/sse2-mmx-20.c | 12 +
gcc/testsuite/gcc.target/i386/sse2-mmx-21.c | 13 +
gcc/testsuite/gcc.target/i386/sse2-mmx-3.c | 13 +
gcc/testsuite/gcc.target/i386/sse2-mmx-4.c | 4 +
gcc/testsuite/gcc.target/i386/sse2-mmx-5.c | 11 +
gcc/testsuite/gcc.target/i386/sse2-mmx-6.c | 11 +
gcc/testsuite/gcc.target/i386/sse2-mmx-7.c | 13 +
gcc/testsuite/gcc.target/i386/sse2-mmx-8.c | 4 +
gcc/testsuite/gcc.target/i386/sse2-mmx-9.c | 79 ++++++
.../gcc.target/i386/sse2-mmx-cvtpi2ps.c | 42 +++
.../gcc.target/i386/sse2-mmx-cvtps2pi.c | 35 +++
.../gcc.target/i386/sse2-mmx-cvttps2pi.c | 35 +++
.../gcc.target/i386/sse2-mmx-maskmovq.c | 98 +++++++
.../gcc.target/i386/sse2-mmx-packssdw.c | 51 ++++
.../gcc.target/i386/sse2-mmx-packsswb.c | 51 ++++
.../gcc.target/i386/sse2-mmx-packuswb.c | 51 ++++
.../gcc.target/i386/sse2-mmx-paddb.c | 47 ++++
.../gcc.target/i386/sse2-mmx-paddd.c | 47 ++++
.../gcc.target/i386/sse2-mmx-paddq.c | 42 +++
.../gcc.target/i386/sse2-mmx-paddsb.c | 47 ++++
.../gcc.target/i386/sse2-mmx-paddsw.c | 47 ++++
.../gcc.target/i386/sse2-mmx-paddusb.c | 47 ++++
.../gcc.target/i386/sse2-mmx-paddusw.c | 47 ++++
.../gcc.target/i386/sse2-mmx-paddw.c | 47 ++++
gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c | 43 +++
.../gcc.target/i386/sse2-mmx-pandn.c | 43 +++
.../gcc.target/i386/sse2-mmx-pavgb.c | 51 ++++
.../gcc.target/i386/sse2-mmx-pavgw.c | 51 ++++
.../gcc.target/i386/sse2-mmx-pcmpeqb.c | 47 ++++
.../gcc.target/i386/sse2-mmx-pcmpeqd.c | 47 ++++
.../gcc.target/i386/sse2-mmx-pcmpeqw.c | 47 ++++
.../gcc.target/i386/sse2-mmx-pcmpgtb.c | 47 ++++
.../gcc.target/i386/sse2-mmx-pcmpgtd.c | 47 ++++
.../gcc.target/i386/sse2-mmx-pcmpgtw.c | 47 ++++
.../gcc.target/i386/sse2-mmx-pextrw.c | 58 ++++
.../gcc.target/i386/sse2-mmx-pinsrw.c | 60 +++++
.../gcc.target/i386/sse2-mmx-pmaddwd.c | 46 ++++
.../gcc.target/i386/sse2-mmx-pmaxsw.c | 47 ++++
.../gcc.target/i386/sse2-mmx-pmaxub.c | 47 ++++
.../gcc.target/i386/sse2-mmx-pminsw.c | 47 ++++
.../gcc.target/i386/sse2-mmx-pminub.c | 47 ++++
.../gcc.target/i386/sse2-mmx-pmovmskb.c | 45 ++++
.../gcc.target/i386/sse2-mmx-pmulhuw.c | 50 ++++
.../gcc.target/i386/sse2-mmx-pmulhw.c | 52 ++++
.../gcc.target/i386/sse2-mmx-pmullw.c | 51 ++++
.../gcc.target/i386/sse2-mmx-pmuludq.c | 46 ++++
gcc/testsuite/gcc.target/i386/sse2-mmx-por.c | 43 +++
.../gcc.target/i386/sse2-mmx-psadbw.c | 57 ++++
.../gcc.target/i386/sse2-mmx-pshufw.c | 247 ++++++++++++++++++
.../gcc.target/i386/sse2-mmx-pslld.c | 51 ++++
.../gcc.target/i386/sse2-mmx-pslldi.c | 152 +++++++++++
.../gcc.target/i386/sse2-mmx-psllq.c | 46 ++++
.../gcc.target/i386/sse2-mmx-psllqi.c | 244 +++++++++++++++++
.../gcc.target/i386/sse2-mmx-psllw.c | 51 ++++
.../gcc.target/i386/sse2-mmx-psllwi.c | 104 ++++++++
.../gcc.target/i386/sse2-mmx-psrad.c | 51 ++++
.../gcc.target/i386/sse2-mmx-psradi.c | 152 +++++++++++
.../gcc.target/i386/sse2-mmx-psraw.c | 51 ++++
.../gcc.target/i386/sse2-mmx-psrawi.c | 104 ++++++++
.../gcc.target/i386/sse2-mmx-psrld.c | 51 ++++
.../gcc.target/i386/sse2-mmx-psrldi.c | 152 +++++++++++
.../gcc.target/i386/sse2-mmx-psrlq.c | 46 ++++
.../gcc.target/i386/sse2-mmx-psrlqi.c | 244 +++++++++++++++++
.../gcc.target/i386/sse2-mmx-psrlw.c | 51 ++++
.../gcc.target/i386/sse2-mmx-psrlwi.c | 104 ++++++++
.../gcc.target/i386/sse2-mmx-psubb.c | 47 ++++
.../gcc.target/i386/sse2-mmx-psubd.c | 47 ++++
.../gcc.target/i386/sse2-mmx-psubq.c | 42 +++
.../gcc.target/i386/sse2-mmx-psubusb.c | 47 ++++
.../gcc.target/i386/sse2-mmx-psubusw.c | 47 ++++
.../gcc.target/i386/sse2-mmx-psubw.c | 47 ++++
.../gcc.target/i386/sse2-mmx-punpckhbw.c | 52 ++++
.../gcc.target/i386/sse2-mmx-punpckhdq.c | 46 ++++
.../gcc.target/i386/sse2-mmx-punpckhwd.c | 48 ++++
.../gcc.target/i386/sse2-mmx-punpcklbw.c | 52 ++++
.../gcc.target/i386/sse2-mmx-punpckldq.c | 46 ++++
.../gcc.target/i386/sse2-mmx-punpcklwd.c | 48 ++++
gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c | 43 +++
gcc/testsuite/gcc.target/i386/sse2-mmx.c | 1 -
97 files changed, 5048 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/i386/mmx-vals.h
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c
diff --git a/gcc/testsuite/gcc.target/i386/mmx-vals.h b/gcc/testsuite/gcc.target/i386/mmx-vals.h
new file mode 100644
index 00000000000..62d0c1cb514
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mmx-vals.h
@@ -0,0 +1,77 @@
+/* Routine to check correctness of the results */
+
+__attribute__((unused))
+static int
+saturate_b (int i)
+{
+ if (i > 127)
+ i = 127;
+ else if (i < -128)
+ i = -128;
+ return i;
+}
+
+__attribute__((unused))
+static int
+saturate_w (int i)
+{
+ if (i > 32767)
+ i = 32767;
+ else if (i < -32768)
+ i = -32768;
+ return i;
+}
+
+__attribute__((unused))
+static int
+saturate_ub (int i)
+{
+ if (i > 255)
+ i = 255;
+ else if (i < 0)
+ i = 0;
+ return i;
+}
+
+__attribute__((unused))
+static int
+saturate_uw (int i)
+{
+ if (i > 65535)
+ i = 65535;
+ else if (i < 0)
+ i = 0;
+ return i;
+}
+
+static long long MMXops[] =
+{
+ 0x3467512347612976LL, 0x000000000000000eLL,
+ 0x3467512347612976LL, 0x0000000000000014LL,
+ 0x3467512347612976LL, 0x000000000000003cLL,
+ 0x0000000000000000LL, 0xFFFFFFFFFFFFFFFFLL,
+ 0xFFFFFFFFFFFFFFFFLL, 0x0000000000000000LL,
+ 0x0000000000000001LL, 0x1000000000000000LL,
+ 0x1000000000000000LL, 0x0000000000000001LL,
+ 0xFF00FF00FF00FF00LL, 0x00FF00FF00FF00FFLL,
+ 0xFFFFFFFFFFFFFFFFLL, 0x0101010101010101LL,
+ 0x0101010101010101LL, 0xFFFFFFFFFFFFFFFFLL,
+ 0x0123456789ABCDEFLL, 0x0123456789ABCDEFLL,
+ 0x3467512347612976LL, 0x1839876340879234LL,
+ 0x0000000000000000LL, 0x0000000000000000LL,
+ 0xFFFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL,
+ 0x7F7F7F7F7F7F7F7FLL, 0x7F7F7F7F7F7F7F7FLL,
+ 0x7F7F7F7F7F7F7F7FLL, 0x0101010101010101LL,
+ 0x7F7F7F7F7F7F7F7FLL, 0x4782082349761237LL,
+ 0x0000000000000000LL, 0x7F7F7F7F7F7F7F7FLL,
+ 0x8080808080808080LL, 0x8080808080808080LL,
+ 0x0101010101010101LL, 0x8080808080808080LL,
+ 0x8080808080808080LL, 0x0000000000000000LL,
+ 0x2372347120982458LL, 0x8080808080808080LL,
+ 0xFFFFFFFFFFFFFFFFLL, 0x8080808080808080LL,
+ 0x7F7F7F7F7F7F7F7FLL, 0xFFFFFFFFFFFFFFFFLL,
+ 0x8080808080808080LL, 0x7F7F7F7F7F7F7F7FLL,
+ 0xFFFFFFFFFFFFFFFFLL, 0x7F7F7F7F7F7F7F7FLL
+};
+
+#define MMX_num_ops (sizeof (MMXops) / sizeof (MMXops[0]))
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-10.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
new file mode 100644
index 00000000000..cb63401a251
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
@@ -0,0 +1,42 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_from_int (long long *ll1, long long *r)
+{
+ int i1 = *(int *) ll1;
+ *(__m64 *) r = _m_from_int (i1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+ int *res = (int *) r;
+ res[0] = *(int *) ll1;
+ res[1] = 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_from_int (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-11.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
new file mode 100644
index 00000000000..6737ec5f2d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_from_long_long (long long *ll1, long long *r)
+{
+ *(__m64 *) r = _mm_cvtsi64_m64 (*ll1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+ *r = *ll1;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_from_long_long (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-12.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
new file mode 100644
index 00000000000..7390bcf3ccc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
@@ -0,0 +1,41 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_to_int (long long *ll1, long long *r)
+{
+ __m64 m = *(__m64 *) ll1;
+ *(int *) r = _m_to_int (m);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+ int *i1 = (int *) ll1;
+ *(int *) r = *i1;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r = 0, ck = 0;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_to_int (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-13.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
new file mode 100644
index 00000000000..fd1eed66daa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
@@ -0,0 +1,40 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_to_long_long (long long *ll1, long long *r)
+{
+ __m64 m = *(__m64 *) ll1;
+ *r = _mm_cvtm64_si64 (m);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+ *r = *ll1;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_to_long_long (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-14.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
new file mode 100644
index 00000000000..cc586182259
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_setzero (long long *r)
+{
+ *(__m64 *) r = _mm_setzero_si64 ();
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *r)
+{
+ *r = 0x0LL;
+}
+
+static void
+sse2_test (void)
+{
+ long long r, ck;
+
+ /* Run the MMX tests */
+ test_setzero (&r);
+ compute_correct_result (&ck);
+ if (ck != r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-15.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
new file mode 100644
index 00000000000..35308633f59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
@@ -0,0 +1,35 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_set (int x, int y, long long *r)
+{
+ *(__m64 *) r = _mm_set_pi32 (x, y);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int x, int y, long long *res_p)
+{
+ int *res = (int *) res_p;
+ res[0] = y;
+ res[1] = x;
+}
+
+static void
+sse2_test (void)
+{
+ int x, y;
+ long long r, ck;
+
+ /* Run the MMX tests */
+ x = 0x0badbeef;
+ y = 0x0badfeed;
+ test_set (x, y, &r);
+ compute_correct_result (x, y, &ck);
+ if (ck != r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-16.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
new file mode 100644
index 00000000000..9f0fb46765c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_set (int i0, int i1, int i2, int i3, long long *r)
+{
+ *(__m64 *) r = _mm_set_pi16 (i0, i1, i2, i3);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int i0, int i1, int i2, int i3, long long *res_p)
+{
+ short *res = (short *) res_p;
+ res[0] = i3;
+ res[1] = i2;
+ res[2] = i1;
+ res[3] = i0;
+}
+
+static void
+sse2_test (void)
+{
+ short i0, i1, i2, i3;
+ long long r, ck;
+
+ /* Run the MMX tests */
+ i0 = 0x0bad;
+ i1 = 0xbeef;
+ i2 = 0x0bad;
+ i3 = 0xfeed;
+ test_set (i0, i1, i2, i3, &r);
+ compute_correct_result (i0, i1, i2, i3, &ck);
+ if (ck != r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-17.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
new file mode 100644
index 00000000000..a38351ea056
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
@@ -0,0 +1,50 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_set (char i0, char i1, char i2, char i3,
+ char i4, char i5, char i6, char i7, long long *r)
+{
+ *(__m64 *) r = _mm_set_pi8 (i0, i1, i2, i3, i4, i5, i6, i7);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (char i0, char i1, char i2, char i3,
+ char i4, char i5, char i6, char i7,
+ long long *res_p)
+{
+ char *res = (char *) res_p;
+ res[0] = i7;
+ res[1] = i6;
+ res[2] = i5;
+ res[3] = i4;
+ res[4] = i3;
+ res[5] = i2;
+ res[6] = i1;
+ res[7] = i0;
+}
+
+static void
+sse2_test (void)
+{
+ char i0, i1, i2, i3, i4, i5, i6, i7;
+ long long r, ck;
+
+ /* Run the MMX tests */
+ i0 = 0x12;
+ i1 = 0x34;
+ i2 = 0x56;
+ i3 = 0x78;
+ i4 = 0x90;
+ i5 = 0xab;
+ i6 = 0xcd;
+ i7 = 0xef;
+ test_set (i0, i1, i2, i3, i4, i5, i6, i7, &r);
+ compute_correct_result (i0, i1, i2, i3, i4, i5, i6, i7, &ck);
+ if (ck != r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c
new file mode 100644
index 00000000000..3505a5c0cf4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx -mno-avx512vl" } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include <mmintrin.h>
+
+__m64
+foo (int i)
+{
+ __v2si x = { i, i };
+ return (__m64) x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c
new file mode 100644
index 00000000000..9b267b17346
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "pbroadcastd" 1 } } */
+/* { dg-final { scan-assembler-not "movd" } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-18a.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c
new file mode 100644
index 00000000000..394f05b6b49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx512bw -mno-avx512vl" } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-18a.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c
new file mode 100644
index 00000000000..9715ace241f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx -mno-avx -mtune=intel" } */
+/* { dg-final { scan-assembler-times "pshuflw" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include <mmintrin.h>
+
+__m64
+foo (short i)
+{
+ __v4hi x = { i, i, i, i };
+ return (__m64) x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c
new file mode 100644
index 00000000000..a6d42313336
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "pbroadcastw" 1 } } */
+/* { dg-final { scan-assembler-not "movd" } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-19a.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c
new file mode 100644
index 00000000000..b02dc8c2ffd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx -mno-avx2 -mtune=intel" } */
+/* { dg-final { scan-assembler-times "pshuflw" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-19a.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c
new file mode 100644
index 00000000000..54691883c9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx512bw -mno-avx512vl -mtune=intel" } */
+/* { dg-final { scan-assembler-times "pbroadcastw" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-19a.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c
new file mode 100644
index 00000000000..8be973cc4fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx2 -mno-avx512f -mtune=intel" } */
+/* { dg-final { scan-assembler-times "pbroadcastw" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-19a.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-2.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
new file mode 100644
index 00000000000..e4cee2da83e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-not "%xmm" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+float
+foo (__m64 x)
+{
+ return ((__v2sf) x)[0];
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-20.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
new file mode 100644
index 00000000000..173fa154d40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+int
+foo (__m64 x)
+{
+ return ((__v2si) x)[0];
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
new file mode 100644
index 00000000000..8f5341e2de6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+int
+foo (__m64 x)
+{
+ return ((__v2si) x)[1];
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-3.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
new file mode 100644
index 00000000000..77f518b6c5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler "cvtdq2ps" } } */
+/* { dg-final { scan-assembler-not "cvtpi2ps" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+__m128
+foo (__m128 i1, __m64 i2)
+{
+ return _mm_cvtpi32_ps (i1, i2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-4.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
new file mode 100644
index 00000000000..d923724fc1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+
+#include "mmx-4.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-5.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
new file mode 100644
index 00000000000..1953dc89bb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+int
+foo (__m64 i)
+{
+ return _m_pextrw (i, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-6.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
new file mode 100644
index 00000000000..f73444f493b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+__m64
+foo (__m64 i, int w)
+{
+ return _m_pinsrw (i, w, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-7.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
new file mode 100644
index 00000000000..6ea491d2715
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler "movnti" } } */
+/* { dg-final { scan-assembler-not "movntq" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+void
+foo (__m64 *p, __m64 i)
+{
+ _mm_stream_pi (p, i);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-8.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
new file mode 100644
index 00000000000..342c2fa4f25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+
+#include "mmx-8.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-9.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
new file mode 100644
index 00000000000..f0bf7256c0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
@@ -0,0 +1,79 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "sse2-check.h"
+
+#include <string.h>
+
+#define FLOAT_X 2.3456
+#define FLOAT_Y -4.5987
+
+static float expected_x = FLOAT_X;
+static float expected_y = FLOAT_Y;
+static __v2sf expected1 = { FLOAT_X, FLOAT_Y };
+static __v2sf expected2 = { FLOAT_X, 0 };
+static __v2sf expected3 = { FLOAT_X, FLOAT_X };
+
+float
+__attribute__((noinline, noclone))
+foo1 (__m64 x)
+{
+ return ((__v2sf) x)[0];
+}
+
+float
+__attribute__((noinline, noclone))
+foo2 (__m64 x)
+{
+ return ((__v2sf) x)[1];
+}
+
+__m64
+__attribute__((noinline, noclone))
+foo3 (float x)
+{
+ return __extension__ (__m64) (__v2sf) { x, 0 };
+}
+
+__m64
+__attribute__((noinline, noclone))
+foo4 (float x)
+{
+ return __extension__ (__m64) (__v2sf) { x, x };
+}
+
+__m64
+__attribute__((noinline, noclone))
+foo5 (float x, float y)
+{
+ return __extension__ (__m64) (__v2sf) { x, y };
+}
+
+void
+__attribute__((noinline))
+sse2_test (void)
+{
+ __m64 res;
+ float x;
+
+ x = foo1 ((__m64) expected1);
+ if (x != expected_x)
+ abort ();
+
+ x = foo2 ((__m64) expected1);
+ if (x != expected_y)
+ abort ();
+
+ res = foo3 (FLOAT_X);
+ if (memcmp (&res, &expected2, sizeof (res)))
+ abort ();
+
+ res = foo4 (FLOAT_X);
+ if (memcmp (&res, &expected3, sizeof (res)))
+ abort ();
+
+ res = foo5 (FLOAT_X, FLOAT_Y);
+ if (memcmp (&res, &expected1, sizeof (res)))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
new file mode 100644
index 00000000000..bdf1085446b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
@@ -0,0 +1,42 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include <string.h>
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_cvtpi32_ps (__m128 *i1, __m64 *i2, __m128 *r)
+{
+ *(__m128 *) r = _mm_cvtpi32_ps (*i1, *i2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m128 *dst_p, __m64 *src_p, __m128 *res_p)
+{
+ int *src = (int *) src_p;
+ float *res = (float *) res_p;
+ *res_p = *dst_p;
+ int i;
+ __m128 r;
+ for (i = 0; i < 2; i++)
+ {
+ r = _mm_cvt_si2ss (*dst_p, src[i]);
+ res[i] = ((__v4sf) r)[0];
+ }
+}
+
+static void
+sse2_test (void)
+{
+ __m128 r, ck;
+ __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
+ __v2si y = { 30, -39 };
+
+ /* Run the MMX tests */
+ test_cvtpi32_ps ((__m128 *) &x, (__m64 *) &y, &r);
+ compute_correct_result ((__m128 *) &x, (__m64 *) &y, &ck);
+ if (memcmp (&ck, &r, sizeof (r)))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
new file mode 100644
index 00000000000..8e860e7a5be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
@@ -0,0 +1,35 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_cvtps_pi32 (__m128 *src_p, long long *r)
+{
+ *(__m64 *) r = _mm_cvtps_pi32 (*src_p);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m128 *src_p, long long *res_p)
+{
+ __v4sf *src = (__v4sf *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = _mm_cvt_ss2si (_mm_set_ss ((*src)[i]));
+}
+
+static void
+sse2_test (void)
+{
+ long long r, ck;
+ __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
+
+ /* Run the MMX tests */
+ test_cvtps_pi32 ((__m128 *) &x, &r);
+ compute_correct_result ((__m128 *) &x, &ck);
+ if (ck != r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
new file mode 100644
index 00000000000..8b9dd7fc8ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
@@ -0,0 +1,35 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_cvttps_pi32 (__m128 *src_p, long long *r)
+{
+ *(__m64 *) r = _mm_cvttps_pi32 (*src_p);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m128 *src_p, long long *res_p)
+{
+ __v4sf *src = (__v4sf *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = _mm_cvtt_ss2si (_mm_set_ss ((*src)[i]));
+}
+
+static void
+sse2_test (void)
+{
+ long long r, ck;
+ __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
+
+ /* Run the MMX tests */
+ test_cvttps_pi32 ((__m128 *) &x, &r);
+ compute_correct_result ((__m128 *) &x, &ck);
+ if (ck != r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
new file mode 100644
index 00000000000..815a499ff84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
@@ -0,0 +1,98 @@
+/* { dg-do run { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_maskmovq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ _m_maskmovq (t1, t2, (char *) r);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ if ((src[i] & 0x80) != 0)
+ res[i] = dst[i];
+}
+
+static void
+do_maskmovq_test (long long *r)
+{
+ int i;
+ long long ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ r[0] = -1LL;
+ ck = -1LL;
+ test_maskmovq (&MMXops[i], &MMXops[i], r);
+ compute_correct_result (&MMXops[i], &MMXops[i], &ck);
+ if (*r != ck)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
+
+static void
+sse2_test (void)
+{
+ char *buf;
+ long long *r;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+
+ buf = mmap (0, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (buf == MAP_FAILED)
+ {
+ perror ("mmap");
+ abort ();
+ }
+
+ if (mprotect (buf, page_size, PROT_NONE))
+ {
+ perror ("mprotect");
+ abort ();
+ }
+
+ if (mprotect (buf + 2 * page_size, page_size, PROT_NONE))
+ {
+ perror ("mprotect");
+ abort ();
+ }
+
+ r = (long long *) (buf + page_size);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + page_size + 3);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + page_size + 11);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + 2 * page_size - 16);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + 2 * page_size - 16 + 3);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + 2 * page_size - 16 + 8);
+ do_maskmovq_test (r);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
new file mode 100644
index 00000000000..fb895c6cfe7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_packssdw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_packssdw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ short *res = (short *) res_p;
+ int i;
+
+ for (i = 0; i < 2; i++)
+ {
+ res[i] = saturate_w (dst[i]);
+ res[i + 2] = saturate_w (src[i]);
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_packssdw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
new file mode 100644
index 00000000000..1c4a948027c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_packsswb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_packsswb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ char *res = (char *) res_p;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ res[i] = saturate_b (dst[i]);
+ res[i + 4] = saturate_b (src[i]);
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_packsswb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
new file mode 100644
index 00000000000..24abd5dcc9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_packuswb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_packuswb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ res[i] = saturate_ub (dst[i]);
+ res[i + 4] = saturate_ub (src[i]);
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_packuswb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
new file mode 100644
index 00000000000..f4c8273c5e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] + src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
new file mode 100644
index 00000000000..32911a7852a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] + src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
new file mode 100644
index 00000000000..8e257a314e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
@@ -0,0 +1,42 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _mm_add_si64 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ res_p[0] = dst_p[0] + src_p[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
new file mode 100644
index 00000000000..9798a2024fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddsb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddsb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = saturate_b (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddsb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
new file mode 100644
index 00000000000..6371b1930fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddsw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddsw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = saturate_w (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddsw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
new file mode 100644
index 00000000000..bac22b641cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddusb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddusb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = saturate_ub (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddusb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
new file mode 100644
index 00000000000..70f987bf381
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddusw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddusw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = saturate_uw (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddusw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
new file mode 100644
index 00000000000..8e01cc4734f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] + src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
new file mode 100644
index 00000000000..0876fee92ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
@@ -0,0 +1,43 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pand (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pand (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ res[0] = dst[0] & src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pand (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
new file mode 100644
index 00000000000..362c475029b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
@@ -0,0 +1,43 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pandn (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pandn (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ res[0] = ~dst[0] & src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pandn (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
new file mode 100644
index 00000000000..0c57d94e9e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pavgb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pavgb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ unsigned int tmp;
+ for (i = 0; i < 8; i++)
+ {
+ tmp = dst[i] + src[i] + 1;
+ res[i] = tmp >> 1;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pavgb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
new file mode 100644
index 00000000000..e38669ffbb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pavgw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pavgw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ unsigned int tmp;
+ for (i = 0; i < 4; i++)
+ {
+ tmp = dst[i] + src[i] + 1;
+ res[i] = tmp >> 1;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pavgw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
new file mode 100644
index 00000000000..f0f3a28cf62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpeqb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpeqb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] == src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpeqb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
new file mode 100644
index 00000000000..7dc13f147e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpeqd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpeqd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] == src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpeqd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
new file mode 100644
index 00000000000..d6e59077204
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpeqw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpeqw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] == src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpeqw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
new file mode 100644
index 00000000000..3a1c188a407
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpgtb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpgtb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] > src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpgtb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
new file mode 100644
index 00000000000..121cafcd834
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpgtd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpgtd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] > src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpgtd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
new file mode 100644
index 00000000000..7b4e99d0a34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpgtw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpgtw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] > src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpgtw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
new file mode 100644
index 00000000000..58e5ea5aa3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
@@ -0,0 +1,58 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing" } */
+
+#include <string.h>
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pextrw (__m64 *i, unsigned int imm, int *r)
+{
+ switch (imm)
+ {
+ case 0:
+ *r = _m_pextrw (*i, 0);
+ break;
+ case 1:
+ *r = _m_pextrw (*i, 1);
+ break;
+ case 2:
+ *r = _m_pextrw (*i, 2);
+ break;
+ case 3:
+ *r = _m_pextrw (*i, 3);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m64 *src_p, unsigned int imm, int *res_p)
+{
+ short *src = (short *) src_p;
+ if (imm < 4)
+ *res_p = src[imm];
+}
+
+static void
+sse2_test (void)
+{
+ int r, ck;
+ int i;
+ int failed = 0;
+ __v4hi y = { 3320, -3339, 48, 4392 };
+
+ /* Run the MMX tests */
+ for (i = 0; i < 4; i++)
+ {
+ test_pextrw ((__m64 *) &y, i, &r);
+ compute_correct_result ((__m64 *) &y, i, &ck);
+ if (r != ck)
+ failed++;
+ }
+
+ if (failed)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
new file mode 100644
index 00000000000..2c49d8ce5f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
@@ -0,0 +1,60 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing" } */
+
+#include <string.h>
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pinsrw (__m64 *i, int val, unsigned int imm, int *r)
+{
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_pinsrw (*i, val, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_pinsrw (*i, val, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_pinsrw (*i, val, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_pinsrw (*i, val, 3);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m64 *src_p, int val, unsigned int imm,
+ int *res_p)
+{
+ short *res = (short *) res_p;
+ *(__m64 *) res_p = *src_p;
+ if (imm < 4)
+ res[imm] = val;
+}
+
+static void
+sse2_test (void)
+{
+ int r, ck;
+ int i;
+ int failed = 0;
+ __v4hi y = { 3320, -3339, 48, 4392 };
+
+ /* Run the MMX tests */
+ for (i = 0; i < 4; i++)
+ {
+ test_pinsrw ((__m64 *) &y, 0x1234, i, &r);
+ compute_correct_result ((__m64 *) &y, 0x1234, i, &ck);
+ if (r != ck)
+ failed++;
+ }
+
+ if (failed)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
new file mode 100644
index 00000000000..8f08aabf954
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmaddwd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmaddwd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ int *res = (int *) res_p;
+ res[0] = dst[0] * src[0] + dst[1] * src[1];
+ res[1] = dst[2] * src[2] + dst[3] * src[3];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmaddwd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
new file mode 100644
index 00000000000..e4a6d87f344
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmaxsw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmaxsw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] > src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmaxsw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
new file mode 100644
index 00000000000..f943989b96f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmaxub (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmaxub (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] > src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmaxub (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
new file mode 100644
index 00000000000..6a92f7eb3a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pminsw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pminsw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] < src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pminsw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
new file mode 100644
index 00000000000..a3b0e5093a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pminub (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pminub (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] < src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pminub (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
new file mode 100644
index 00000000000..73b2a00d282
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmovmskb (long long *ll1, int *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ *r = _m_pmovmskb (t1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, int *res_p)
+{
+ char *src = (char *) src_p;
+ int i;
+ int res = 0;
+ for (i = 0; i < 8; i++)
+ res |= ((src[i] & 0x80) >> 7) << i;
+ *res_p = res;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ int r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_pmovmskb (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
new file mode 100644
index 00000000000..ebf2a760fd7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
@@ -0,0 +1,50 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmulhuw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmulhuw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ unsigned int t = dst[i] * src[i];
+ res[i] = t >> 16;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmulhuw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
new file mode 100644
index 00000000000..4aa8d605c4c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
@@ -0,0 +1,52 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmulhw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmulhw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int tmp;
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ tmp = dst[i] * src[i];
+ tmp >>= 16;
+ res[i] = tmp;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmulhw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
new file mode 100644
index 00000000000..732687e8cd7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmullw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmullw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int tmp;
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ tmp = dst[i] * src[i];
+ res[i] = tmp;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmullw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
new file mode 100644
index 00000000000..eec4d970d63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmuludq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _mm_mul_su32 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned int *dst = (unsigned int*) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ unsigned long long *res = (unsigned long long *) res_p;
+ res[0] = dst[0];
+ res[0] *= src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmuludq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-por.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
new file mode 100644
index 00000000000..79d3a9a548f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
@@ -0,0 +1,43 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_por (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_por (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ res[0] = dst[0] | src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_por (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
new file mode 100644
index 00000000000..324ce8c51a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
@@ -0,0 +1,57 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psadbw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psadbw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ int tmp;
+ unsigned int sum = 0;
+ for (i = 0; i < 8; i++)
+ {
+ tmp = dst[i] - src[i];
+ if (tmp < 0)
+ tmp = -tmp;
+ sum += tmp;
+ }
+ res[0] = sum;
+ for (i = 1; i < 3; i++)
+ res[i] = 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psadbw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
new file mode 100644
index 00000000000..8da4b7afac7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
@@ -0,0 +1,247 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pshufw (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_pshufw (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_pshufw (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_pshufw (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_pshufw (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_pshufw (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_pshufw (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_pshufw (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_pshufw (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_pshufw (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_pshufw (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_pshufw (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_pshufw (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_pshufw (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_pshufw (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_pshufw (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_pshufw (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_pshufw (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_pshufw (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_pshufw (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_pshufw (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_pshufw (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_pshufw (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_pshufw (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_pshufw (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_pshufw (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_pshufw (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_pshufw (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_pshufw (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_pshufw (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_pshufw (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_pshufw (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_pshufw (t1, 31);
+ break;
+ case 32:
+ *(__m64 *) r = _m_pshufw (t1, 32);
+ break;
+ case 33:
+ *(__m64 *) r = _m_pshufw (t1, 33);
+ break;
+ case 34:
+ *(__m64 *) r = _m_pshufw (t1, 34);
+ break;
+ case 35:
+ *(__m64 *) r = _m_pshufw (t1, 35);
+ break;
+ case 36:
+ *(__m64 *) r = _m_pshufw (t1, 36);
+ break;
+ case 37:
+ *(__m64 *) r = _m_pshufw (t1, 37);
+ break;
+ case 38:
+ *(__m64 *) r = _m_pshufw (t1, 38);
+ break;
+ case 39:
+ *(__m64 *) r = _m_pshufw (t1, 39);
+ break;
+ case 40:
+ *(__m64 *) r = _m_pshufw (t1, 40);
+ break;
+ case 41:
+ *(__m64 *) r = _m_pshufw (t1, 41);
+ break;
+ case 42:
+ *(__m64 *) r = _m_pshufw (t1, 42);
+ break;
+ case 43:
+ *(__m64 *) r = _m_pshufw (t1, 43);
+ break;
+ case 44:
+ *(__m64 *) r = _m_pshufw (t1, 44);
+ break;
+ case 45:
+ *(__m64 *) r = _m_pshufw (t1, 45);
+ break;
+ case 46:
+ *(__m64 *) r = _m_pshufw (t1, 46);
+ break;
+ case 47:
+ *(__m64 *) r = _m_pshufw (t1, 47);
+ break;
+ case 48:
+ *(__m64 *) r = _m_pshufw (t1, 48);
+ break;
+ case 49:
+ *(__m64 *) r = _m_pshufw (t1, 49);
+ break;
+ case 50:
+ *(__m64 *) r = _m_pshufw (t1, 50);
+ break;
+ case 51:
+ *(__m64 *) r = _m_pshufw (t1, 51);
+ break;
+ case 52:
+ *(__m64 *) r = _m_pshufw (t1, 52);
+ break;
+ case 53:
+ *(__m64 *) r = _m_pshufw (t1, 53);
+ break;
+ case 54:
+ *(__m64 *) r = _m_pshufw (t1, 54);
+ break;
+ case 55:
+ *(__m64 *) r = _m_pshufw (t1, 55);
+ break;
+ case 56:
+ *(__m64 *) r = _m_pshufw (t1, 56);
+ break;
+ case 57:
+ *(__m64 *) r = _m_pshufw (t1, 57);
+ break;
+ case 58:
+ *(__m64 *) r = _m_pshufw (t1, 58);
+ break;
+ case 59:
+ *(__m64 *) r = _m_pshufw (t1, 59);
+ break;
+ case 60:
+ *(__m64 *) r = _m_pshufw (t1, 60);
+ break;
+ case 61:
+ *(__m64 *) r = _m_pshufw (t1, 61);
+ break;
+ case 62:
+ *(__m64 *) r = _m_pshufw (t1, 62);
+ break;
+ case 63:
+ *(__m64 *) r = _m_pshufw (t1, 63);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ unsigned long long src = *(unsigned long long *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ unsigned int shift;
+ for (i = 0; i < 4; i++)
+ {
+ shift = ((imm >> (2 * i)) & 0x3) * 16;
+ res[i] = (src >> shift) & 0xffff;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ if (i > 63)
+ break;
+ test_pshufw (&MMXops[i], i, &r);
+ compute_correct_result (&MMXops[i], i, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
new file mode 100644
index 00000000000..0eead0822fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pslld (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pslld (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned int *dst = (unsigned int *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ unsigned int *res = (unsigned int *) res_p;
+ int i;
+ if (src[1] || src[0] > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] << src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pslld (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
new file mode 100644
index 00000000000..6ae1d4a78e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
@@ -0,0 +1,152 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_pslldi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_pslldi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_pslldi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_pslldi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_pslldi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_pslldi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_pslldi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_pslldi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_pslldi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_pslldi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_pslldi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_pslldi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_pslldi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_pslldi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_pslldi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_pslldi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_pslldi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_pslldi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_pslldi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_pslldi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_pslldi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_pslldi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_pslldi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_pslldi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_pslldi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_pslldi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_pslldi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_pslldi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_pslldi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_pslldi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_pslldi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_pslldi (t1, 31);
+ break;
+ default:
+ *(__m64 *) r = _m_pslldi (t1, 32);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ unsigned int *src = (unsigned int *) src_p;
+ unsigned int *res = (unsigned int *) res_p;
+ int i;
+ if (imm > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = src[i] << imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psllwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
new file mode 100644
index 00000000000..0283ba0f329
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psllq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ if (src[0] > 63)
+ res[0] = 0;
+ else
+ res[0] = dst[0] << src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psllq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
new file mode 100644
index 00000000000..48d7e5f2e31
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
@@ -0,0 +1,244 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psllqi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psllqi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psllqi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psllqi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psllqi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psllqi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psllqi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psllqi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psllqi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psllqi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psllqi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psllqi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psllqi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psllqi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psllqi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psllqi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_psllqi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_psllqi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_psllqi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_psllqi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_psllqi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_psllqi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_psllqi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_psllqi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_psllqi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_psllqi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_psllqi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_psllqi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_psllqi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_psllqi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_psllqi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_psllqi (t1, 31);
+ break;
+ case 32:
+ *(__m64 *) r = _m_psllqi (t1, 32);
+ break;
+ case 33:
+ *(__m64 *) r = _m_psllqi (t1, 33);
+ break;
+ case 34:
+ *(__m64 *) r = _m_psllqi (t1, 34);
+ break;
+ case 35:
+ *(__m64 *) r = _m_psllqi (t1, 35);
+ break;
+ case 36:
+ *(__m64 *) r = _m_psllqi (t1, 36);
+ break;
+ case 37:
+ *(__m64 *) r = _m_psllqi (t1, 37);
+ break;
+ case 38:
+ *(__m64 *) r = _m_psllqi (t1, 38);
+ break;
+ case 39:
+ *(__m64 *) r = _m_psllqi (t1, 39);
+ break;
+ case 40:
+ *(__m64 *) r = _m_psllqi (t1, 40);
+ break;
+ case 41:
+ *(__m64 *) r = _m_psllqi (t1, 41);
+ break;
+ case 42:
+ *(__m64 *) r = _m_psllqi (t1, 42);
+ break;
+ case 43:
+ *(__m64 *) r = _m_psllqi (t1, 43);
+ break;
+ case 44:
+ *(__m64 *) r = _m_psllqi (t1, 44);
+ break;
+ case 45:
+ *(__m64 *) r = _m_psllqi (t1, 45);
+ break;
+ case 46:
+ *(__m64 *) r = _m_psllqi (t1, 46);
+ break;
+ case 47:
+ *(__m64 *) r = _m_psllqi (t1, 47);
+ break;
+ case 48:
+ *(__m64 *) r = _m_psllqi (t1, 48);
+ break;
+ case 49:
+ *(__m64 *) r = _m_psllqi (t1, 49);
+ break;
+ case 50:
+ *(__m64 *) r = _m_psllqi (t1, 50);
+ break;
+ case 51:
+ *(__m64 *) r = _m_psllqi (t1, 51);
+ break;
+ case 52:
+ *(__m64 *) r = _m_psllqi (t1, 52);
+ break;
+ case 53:
+ *(__m64 *) r = _m_psllqi (t1, 53);
+ break;
+ case 54:
+ *(__m64 *) r = _m_psllqi (t1, 54);
+ break;
+ case 55:
+ *(__m64 *) r = _m_psllqi (t1, 55);
+ break;
+ case 56:
+ *(__m64 *) r = _m_psllqi (t1, 56);
+ break;
+ case 57:
+ *(__m64 *) r = _m_psllqi (t1, 57);
+ break;
+ case 58:
+ *(__m64 *) r = _m_psllqi (t1, 58);
+ break;
+ case 59:
+ *(__m64 *) r = _m_psllqi (t1, 59);
+ break;
+ case 60:
+ *(__m64 *) r = _m_psllqi (t1, 60);
+ break;
+ case 61:
+ *(__m64 *) r = _m_psllqi (t1, 61);
+ break;
+ case 62:
+ *(__m64 *) r = _m_psllqi (t1, 62);
+ break;
+ case 63:
+ *(__m64 *) r = _m_psllqi (t1, 63);
+ break;
+ default:
+ *(__m64 *) r = _m_psllqi (t1, 64);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *src, unsigned int imm,
+ unsigned long long *res)
+{
+ int i;
+ if (imm > 63)
+ res[0] = 0;
+ else
+ res[0] = src[0] << imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psllwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
new file mode 100644
index 00000000000..10d1b79bf26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psllw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ if (src[1] || src[0] > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] << src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psllw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
new file mode 100644
index 00000000000..373fa5c146b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
@@ -0,0 +1,104 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psllwi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psllwi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psllwi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psllwi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psllwi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psllwi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psllwi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psllwi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psllwi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psllwi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psllwi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psllwi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psllwi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psllwi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psllwi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psllwi (t1, 15);
+ break;
+ default:
+ *(__m64 *) r = _m_psllwi (t1, 16);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ if (imm > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = src[i] << imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psllwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
new file mode 100644
index 00000000000..a9d41c273cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrad (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psrad (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ if (src[1] || src[0] > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] < 0 ? -1 : 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psrad (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
new file mode 100644
index 00000000000..8237250c48f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
@@ -0,0 +1,152 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psradi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psradi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psradi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psradi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psradi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psradi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psradi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psradi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psradi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psradi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psradi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psradi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psradi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psradi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psradi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psradi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psradi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_psradi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_psradi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_psradi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_psradi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_psradi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_psradi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_psradi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_psradi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_psradi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_psradi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_psradi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_psradi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_psradi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_psradi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_psradi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_psradi (t1, 31);
+ break;
+ default:
+ *(__m64 *) r = _m_psradi (t1, 32);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ if (imm > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = src[i] < 0 ? -1 : 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psradi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
new file mode 100644
index 00000000000..3fed516b811
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psraw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psraw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ if (src[1] || src[0] > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] < 0 ? -1 : 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psraw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
new file mode 100644
index 00000000000..1c8973db3db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
@@ -0,0 +1,104 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrawi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psrawi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psrawi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psrawi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psrawi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psrawi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psrawi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psrawi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psrawi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psrawi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psrawi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psrawi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psrawi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psrawi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psrawi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psrawi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psrawi (t1, 15);
+ break;
+ default:
+ *(__m64 *) r = _m_psrawi (t1, 16);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ if (imm > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = src[i] < 0 ? -1 : 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psrawi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
new file mode 100644
index 00000000000..b7c9565cb24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrld (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psrld (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ if (src[1] || src[0] > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psrld (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
new file mode 100644
index 00000000000..6a150ee2eff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
@@ -0,0 +1,152 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrldi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psrldi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psrldi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psrldi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psrldi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psrldi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psrldi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psrldi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psrldi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psrldi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psrldi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psrldi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psrldi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psrldi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psrldi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psrldi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psrldi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_psrldi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_psrldi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_psrldi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_psrldi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_psrldi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_psrldi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_psrldi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_psrldi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_psrldi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_psrldi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_psrldi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_psrldi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_psrldi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_psrldi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_psrldi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_psrldi (t1, 31);
+ break;
+ default:
+ *(__m64 *) r = _m_psrldi (t1, 32);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ if (imm > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psrldi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
new file mode 100644
index 00000000000..c9fa8b45671
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrlq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psrlq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ if (src[0] > 63)
+ res[0] = 0;
+ else
+ res[0] = dst[0] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psrlq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
new file mode 100644
index 00000000000..bdbecd6ab6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
@@ -0,0 +1,244 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psrlqi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psrlqi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psrlqi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psrlqi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psrlqi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psrlqi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psrlqi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psrlqi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psrlqi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psrlqi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psrlqi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psrlqi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psrlqi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psrlqi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psrlqi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psrlqi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_psrlqi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_psrlqi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_psrlqi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_psrlqi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_psrlqi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_psrlqi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_psrlqi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_psrlqi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_psrlqi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_psrlqi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_psrlqi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_psrlqi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_psrlqi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_psrlqi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_psrlqi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_psrlqi (t1, 31);
+ break;
+ case 32:
+ *(__m64 *) r = _m_psrlqi (t1, 32);
+ break;
+ case 33:
+ *(__m64 *) r = _m_psrlqi (t1, 33);
+ break;
+ case 34:
+ *(__m64 *) r = _m_psrlqi (t1, 34);
+ break;
+ case 35:
+ *(__m64 *) r = _m_psrlqi (t1, 35);
+ break;
+ case 36:
+ *(__m64 *) r = _m_psrlqi (t1, 36);
+ break;
+ case 37:
+ *(__m64 *) r = _m_psrlqi (t1, 37);
+ break;
+ case 38:
+ *(__m64 *) r = _m_psrlqi (t1, 38);
+ break;
+ case 39:
+ *(__m64 *) r = _m_psrlqi (t1, 39);
+ break;
+ case 40:
+ *(__m64 *) r = _m_psrlqi (t1, 40);
+ break;
+ case 41:
+ *(__m64 *) r = _m_psrlqi (t1, 41);
+ break;
+ case 42:
+ *(__m64 *) r = _m_psrlqi (t1, 42);
+ break;
+ case 43:
+ *(__m64 *) r = _m_psrlqi (t1, 43);
+ break;
+ case 44:
+ *(__m64 *) r = _m_psrlqi (t1, 44);
+ break;
+ case 45:
+ *(__m64 *) r = _m_psrlqi (t1, 45);
+ break;
+ case 46:
+ *(__m64 *) r = _m_psrlqi (t1, 46);
+ break;
+ case 47:
+ *(__m64 *) r = _m_psrlqi (t1, 47);
+ break;
+ case 48:
+ *(__m64 *) r = _m_psrlqi (t1, 48);
+ break;
+ case 49:
+ *(__m64 *) r = _m_psrlqi (t1, 49);
+ break;
+ case 50:
+ *(__m64 *) r = _m_psrlqi (t1, 50);
+ break;
+ case 51:
+ *(__m64 *) r = _m_psrlqi (t1, 51);
+ break;
+ case 52:
+ *(__m64 *) r = _m_psrlqi (t1, 52);
+ break;
+ case 53:
+ *(__m64 *) r = _m_psrlqi (t1, 53);
+ break;
+ case 54:
+ *(__m64 *) r = _m_psrlqi (t1, 54);
+ break;
+ case 55:
+ *(__m64 *) r = _m_psrlqi (t1, 55);
+ break;
+ case 56:
+ *(__m64 *) r = _m_psrlqi (t1, 56);
+ break;
+ case 57:
+ *(__m64 *) r = _m_psrlqi (t1, 57);
+ break;
+ case 58:
+ *(__m64 *) r = _m_psrlqi (t1, 58);
+ break;
+ case 59:
+ *(__m64 *) r = _m_psrlqi (t1, 59);
+ break;
+ case 60:
+ *(__m64 *) r = _m_psrlqi (t1, 60);
+ break;
+ case 61:
+ *(__m64 *) r = _m_psrlqi (t1, 61);
+ break;
+ case 62:
+ *(__m64 *) r = _m_psrlqi (t1, 62);
+ break;
+ case 63:
+ *(__m64 *) r = _m_psrlqi (t1, 63);
+ break;
+ default:
+ *(__m64 *) r = _m_psrlqi (t1, 64);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *src, unsigned int imm,
+ unsigned long long *res)
+{
+ int i;
+ if (imm > 63)
+ res[0] = 0;
+ else
+ res[0] = src[0] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psllwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
new file mode 100644
index 00000000000..6382448b1a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrlw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psrlw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ if (src[1] || src[0] > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psrlw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
new file mode 100644
index 00000000000..98c6df35e5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
@@ -0,0 +1,104 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrlwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psrlwi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psrlwi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psrlwi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psrlwi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psrlwi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psrlwi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psrlwi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psrlwi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psrlwi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psrlwi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psrlwi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psrlwi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psrlwi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psrlwi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psrlwi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psrlwi (t1, 15);
+ break;
+ default:
+ *(__m64 *) r = _m_psrlwi (t1, 16);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ if (imm > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psrlwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
new file mode 100644
index 00000000000..b3637353879
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] - src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
new file mode 100644
index 00000000000..b091d7f590f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] - src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
new file mode 100644
index 00000000000..767bf8ea303
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
@@ -0,0 +1,42 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _mm_sub_si64 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ res_p[0] = dst_p[0] - src_p[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
new file mode 100644
index 00000000000..29a5f708e12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubusb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubusb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = saturate_ub (dst[i] - src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubusb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
new file mode 100644
index 00000000000..279051f7303
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubusw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubusw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = saturate_uw (dst[i] - src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubusw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
new file mode 100644
index 00000000000..dde5fce50ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] - src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
new file mode 100644
index 00000000000..5059d74d6c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
@@ -0,0 +1,52 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckhbw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpckhbw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ res[0] = dst[4];
+ res[1] = src[4];
+ res[2] = dst[5];
+ res[3] = src[5];
+ res[4] = dst[6];
+ res[5] = src[6];
+ res[6] = dst[7];
+ res[7] = src[7];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpckhbw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
new file mode 100644
index 00000000000..9c4690dee0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckhdq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpckhdq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ res[0] = dst[1];
+ res[1] = src[1];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpckhdq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
new file mode 100644
index 00000000000..7525a2bba63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckhwd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpckhwd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ res[0] = dst[2];
+ res[1] = src[2];
+ res[2] = dst[3];
+ res[3] = src[3];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpckhwd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
new file mode 100644
index 00000000000..14bdc433ed3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
@@ -0,0 +1,52 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpcklbw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpcklbw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ res[0] = dst[0];
+ res[1] = src[0];
+ res[2] = dst[1];
+ res[3] = src[1];
+ res[4] = dst[2];
+ res[5] = src[2];
+ res[6] = dst[3];
+ res[7] = src[3];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpcklbw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
new file mode 100644
index 00000000000..1d8a932ba7c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckldq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpckldq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ res[0] = dst[0];
+ res[1] = src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpckldq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
new file mode 100644
index 00000000000..6b2a9d56a89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpcklwd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpcklwd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ res[0] = dst[0];
+ res[1] = src[0];
+ res[2] = dst[1];
+ res[3] = src[1];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpcklwd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c
new file mode 100644
index 00000000000..7858c2f6856
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c
@@ -0,0 +1,43 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pxor (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pxor (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ res[0] = dst[0] ^ src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pxor (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx.c b/gcc/testsuite/gcc.target/i386/sse2-mmx.c
index fb226a8e8f3..338cb9da289 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-mmx.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx.c
@@ -4,7 +4,6 @@
#include "sse2-check.h"
-#include <mmintrin.h>
#define N 4
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 39/40] i386: Add tests for MMX intrinsic emulations with SSE
2019-02-14 12:33 ` [PATCH 39/40] i386: Add tests for MMX intrinsic emulations " H.J. Lu
@ 2019-02-15 12:21 ` Uros Bizjak
0 siblings, 0 replies; 62+ messages in thread
From: Uros Bizjak @ 2019-02-15 12:21 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 1:30 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Test MMX intrinsics with -msse2 -mno-mmx in 64-bit mode.
We don't have to disable MMX anymore to switch to __MMX_WITH_SSE__. A
better coverage can be achieved by using:
/* { dg-do run } */
/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
This will test the following cases:
a) that the intrinsics still work on 32bit targets.
b) we know that -msse2 switches to mmx-with-sse on 64bit targets, but
we can add:
/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
In any case, you should add _mm_empty () at the end of mmx functions.
Uros.
> PR target/89021
> * gcc.target/i386/mmx-vals.h: New file.
> * gcc.target/i386/sse2-mmx-2.c: Likewise.
> * gcc.target/i386/sse2-mmx-3.c: Likewise.
> * gcc.target/i386/sse2-mmx-4.c: Likewise.
> * gcc.target/i386/sse2-mmx-5.c: Likewise.
> * gcc.target/i386/sse2-mmx-6.c: Likewise.
> * gcc.target/i386/sse2-mmx-7.c: Likewise.
> * gcc.target/i386/sse2-mmx-8.c: Likewise.
> * gcc.target/i386/sse2-mmx-9.c: Likewise.
> * gcc.target/i386/sse2-mmx-10.c: Likewise.
> * gcc.target/i386/sse2-mmx-11.c: Likewise.
> * gcc.target/i386/sse2-mmx-12.c: Likewise.
> * gcc.target/i386/sse2-mmx-13.c: Likewise.
> * gcc.target/i386/sse2-mmx-14.c: Likewise.
> * gcc.target/i386/sse2-mmx-15.c: Likewise.
> * gcc.target/i386/sse2-mmx-16.c: Likewise.
> * gcc.target/i386/sse2-mmx-17.c: Likewise.
> * gcc.target/i386/sse2-mmx-18a.c: Likewise.
> * gcc.target/i386/sse2-mmx-18b.c: Likewise.
> * gcc.target/i386/sse2-mmx-18c.c: Likewise.
> * gcc.target/i386/sse2-mmx-19a.c: Likewise.
> * gcc.target/i386/sse2-mmx-18b.c: Likewise.
> * gcc.target/i386/sse2-mmx-19c.c: Likewise.
> * gcc.target/i386/sse2-mmx-19d.c: Likewise.
> * gcc.target/i386/sse2-mmx-19e.c: Likewise.
> * gcc.target/i386/sse2-mmx-20.c: Likewise.
> * gcc.target/i386/sse2-mmx-21.c: Likewise.
> * gcc.target/i386/sse2-mmx-cvtpi2ps.c: Likewise.
> * gcc.target/i386/sse2-mmx-cvtps2pi.c: Likewise.
> * gcc.target/i386/sse2-mmx-cvttps2pi.c: Likewise.
> * gcc.target/i386/sse2-mmx-maskmovq.c: Likewise.
> * gcc.target/i386/sse2-mmx-packssdw.c: Likewise.
> * gcc.target/i386/sse2-mmx-packsswb.c: Likewise.
> * gcc.target/i386/sse2-mmx-packuswb.c: Likewise.
> * gcc.target/i386/sse2-mmx-paddb.c: Likewise.
> * gcc.target/i386/sse2-mmx-paddd.c: Likewise.
> * gcc.target/i386/sse2-mmx-paddq.c: Likewise.
> * gcc.target/i386/sse2-mmx-paddsb.c: Likewise.
> * gcc.target/i386/sse2-mmx-paddsw.c: Likewise.
> * gcc.target/i386/sse2-mmx-paddusb.c: Likewise.
> * gcc.target/i386/sse2-mmx-paddusw.c: Likewise.
> * gcc.target/i386/sse2-mmx-paddw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pand.c: Likewise.
> * gcc.target/i386/sse2-mmx-pandn.c: Likewise.
> * gcc.target/i386/sse2-mmx-pavgb.c: Likewise.
> * gcc.target/i386/sse2-mmx-pavgw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pcmpeqb.c: Likewise.
> * gcc.target/i386/sse2-mmx-pcmpeqd.c: Likewise.
> * gcc.target/i386/sse2-mmx-pcmpeqw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pcmpgtb.c: Likewise.
> * gcc.target/i386/sse2-mmx-pcmpgtd.c: Likewise.
> * gcc.target/i386/sse2-mmx-pcmpgtw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pextrw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pinsrw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pmaddwd.c: Likewise.
> * gcc.target/i386/sse2-mmx-pmaxsw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pmaxub.c: Likewise.
> * gcc.target/i386/sse2-mmx-pminsw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pminub.c: Likewise.
> * gcc.target/i386/sse2-mmx-pmovmskb.c: Likewise.
> * gcc.target/i386/sse2-mmx-pmulhuw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pmulhw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pmullw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pmuludq.c: Likewise.
> * gcc.target/i386/sse2-mmx-por.c: Likewise.
> * gcc.target/i386/sse2-mmx-psadbw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pshufw.c: Likewise.
> * gcc.target/i386/sse2-mmx-pslld.c: Likewise.
> * gcc.target/i386/sse2-mmx-pslldi.c: Likewise.
> * gcc.target/i386/sse2-mmx-psllq.c: Likewise.
> * gcc.target/i386/sse2-mmx-psllqi.c: Likewise.
> * gcc.target/i386/sse2-mmx-psllw.c: Likewise.
> * gcc.target/i386/sse2-mmx-psllwi.c: Likewise.
> * gcc.target/i386/sse2-mmx-psrad.c: Likewise.
> * gcc.target/i386/sse2-mmx-psradi.c: Likewise.
> * gcc.target/i386/sse2-mmx-psraw.c: Likewise.
> * gcc.target/i386/sse2-mmx-psrawi.c: Likewise.
> * gcc.target/i386/sse2-mmx-psrld.c: Likewise.
> * gcc.target/i386/sse2-mmx-psrldi.c: Likewise.
> * gcc.target/i386/sse2-mmx-psrlq.c: Likewise.
> * gcc.target/i386/sse2-mmx-psrlqi.c: Likewise.
> * gcc.target/i386/sse2-mmx-psrlw.c: Likewise.
> * gcc.target/i386/sse2-mmx-psrlwi.c: Likewise.
> * gcc.target/i386/sse2-mmx-psubb.c: Likewise.
> * gcc.target/i386/sse2-mmx-psubd.c: Likewise.
> * gcc.target/i386/sse2-mmx-psubq.c: Likewise.
> * gcc.target/i386/sse2-mmx-psubusb.c: Likewise.
> * gcc.target/i386/sse2-mmx-psubusw.c: Likewise.
> * gcc.target/i386/sse2-mmx-psubw.c: Likewise.
> * gcc.target/i386/sse2-mmx-punpckhbw.c: Likewise.
> * gcc.target/i386/sse2-mmx-punpckhdq.c: Likewise.
> * gcc.target/i386/sse2-mmx-punpckhwd.c: Likewise.
> * gcc.target/i386/sse2-mmx-punpcklbw.c: Likewise.
> * gcc.target/i386/sse2-mmx-punpckldq.c: Likewise.
> * gcc.target/i386/sse2-mmx-punpcklwd.c: Likewise.
> * gcc.target/i386/sse2-mmx-pxor.c: Likewise.
> ---
> gcc/testsuite/gcc.target/i386/mmx-vals.h | 77 ++++++
> gcc/testsuite/gcc.target/i386/sse2-mmx-10.c | 42 +++
> gcc/testsuite/gcc.target/i386/sse2-mmx-11.c | 39 +++
> gcc/testsuite/gcc.target/i386/sse2-mmx-12.c | 41 +++
> gcc/testsuite/gcc.target/i386/sse2-mmx-13.c | 40 +++
> gcc/testsuite/gcc.target/i386/sse2-mmx-14.c | 30 +++
> gcc/testsuite/gcc.target/i386/sse2-mmx-15.c | 35 +++
> gcc/testsuite/gcc.target/i386/sse2-mmx-16.c | 39 +++
> gcc/testsuite/gcc.target/i386/sse2-mmx-17.c | 50 ++++
> gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c | 14 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c | 7 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c | 7 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c | 14 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c | 7 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c | 7 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c | 7 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c | 7 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-2.c | 12 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-20.c | 12 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-21.c | 13 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-3.c | 13 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-4.c | 4 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-5.c | 11 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-6.c | 11 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-7.c | 13 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-8.c | 4 +
> gcc/testsuite/gcc.target/i386/sse2-mmx-9.c | 79 ++++++
> .../gcc.target/i386/sse2-mmx-cvtpi2ps.c | 42 +++
> .../gcc.target/i386/sse2-mmx-cvtps2pi.c | 35 +++
> .../gcc.target/i386/sse2-mmx-cvttps2pi.c | 35 +++
> .../gcc.target/i386/sse2-mmx-maskmovq.c | 98 +++++++
> .../gcc.target/i386/sse2-mmx-packssdw.c | 51 ++++
> .../gcc.target/i386/sse2-mmx-packsswb.c | 51 ++++
> .../gcc.target/i386/sse2-mmx-packuswb.c | 51 ++++
> .../gcc.target/i386/sse2-mmx-paddb.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-paddd.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-paddq.c | 42 +++
> .../gcc.target/i386/sse2-mmx-paddsb.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-paddsw.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-paddusb.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-paddusw.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-paddw.c | 47 ++++
> gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c | 43 +++
> .../gcc.target/i386/sse2-mmx-pandn.c | 43 +++
> .../gcc.target/i386/sse2-mmx-pavgb.c | 51 ++++
> .../gcc.target/i386/sse2-mmx-pavgw.c | 51 ++++
> .../gcc.target/i386/sse2-mmx-pcmpeqb.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-pcmpeqd.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-pcmpeqw.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-pcmpgtb.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-pcmpgtd.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-pcmpgtw.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-pextrw.c | 58 ++++
> .../gcc.target/i386/sse2-mmx-pinsrw.c | 60 +++++
> .../gcc.target/i386/sse2-mmx-pmaddwd.c | 46 ++++
> .../gcc.target/i386/sse2-mmx-pmaxsw.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-pmaxub.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-pminsw.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-pminub.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-pmovmskb.c | 45 ++++
> .../gcc.target/i386/sse2-mmx-pmulhuw.c | 50 ++++
> .../gcc.target/i386/sse2-mmx-pmulhw.c | 52 ++++
> .../gcc.target/i386/sse2-mmx-pmullw.c | 51 ++++
> .../gcc.target/i386/sse2-mmx-pmuludq.c | 46 ++++
> gcc/testsuite/gcc.target/i386/sse2-mmx-por.c | 43 +++
> .../gcc.target/i386/sse2-mmx-psadbw.c | 57 ++++
> .../gcc.target/i386/sse2-mmx-pshufw.c | 247 ++++++++++++++++++
> .../gcc.target/i386/sse2-mmx-pslld.c | 51 ++++
> .../gcc.target/i386/sse2-mmx-pslldi.c | 152 +++++++++++
> .../gcc.target/i386/sse2-mmx-psllq.c | 46 ++++
> .../gcc.target/i386/sse2-mmx-psllqi.c | 244 +++++++++++++++++
> .../gcc.target/i386/sse2-mmx-psllw.c | 51 ++++
> .../gcc.target/i386/sse2-mmx-psllwi.c | 104 ++++++++
> .../gcc.target/i386/sse2-mmx-psrad.c | 51 ++++
> .../gcc.target/i386/sse2-mmx-psradi.c | 152 +++++++++++
> .../gcc.target/i386/sse2-mmx-psraw.c | 51 ++++
> .../gcc.target/i386/sse2-mmx-psrawi.c | 104 ++++++++
> .../gcc.target/i386/sse2-mmx-psrld.c | 51 ++++
> .../gcc.target/i386/sse2-mmx-psrldi.c | 152 +++++++++++
> .../gcc.target/i386/sse2-mmx-psrlq.c | 46 ++++
> .../gcc.target/i386/sse2-mmx-psrlqi.c | 244 +++++++++++++++++
> .../gcc.target/i386/sse2-mmx-psrlw.c | 51 ++++
> .../gcc.target/i386/sse2-mmx-psrlwi.c | 104 ++++++++
> .../gcc.target/i386/sse2-mmx-psubb.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-psubd.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-psubq.c | 42 +++
> .../gcc.target/i386/sse2-mmx-psubusb.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-psubusw.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-psubw.c | 47 ++++
> .../gcc.target/i386/sse2-mmx-punpckhbw.c | 52 ++++
> .../gcc.target/i386/sse2-mmx-punpckhdq.c | 46 ++++
> .../gcc.target/i386/sse2-mmx-punpckhwd.c | 48 ++++
> .../gcc.target/i386/sse2-mmx-punpcklbw.c | 52 ++++
> .../gcc.target/i386/sse2-mmx-punpckldq.c | 46 ++++
> .../gcc.target/i386/sse2-mmx-punpcklwd.c | 48 ++++
> gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c | 43 +++
> gcc/testsuite/gcc.target/i386/sse2-mmx.c | 1 -
> 97 files changed, 5048 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/mmx-vals.h
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c
>
> diff --git a/gcc/testsuite/gcc.target/i386/mmx-vals.h b/gcc/testsuite/gcc.target/i386/mmx-vals.h
> new file mode 100644
> index 00000000000..62d0c1cb514
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/mmx-vals.h
> @@ -0,0 +1,77 @@
> +/* Routine to check correctness of the results */
> +
> +__attribute__((unused))
> +static int
> +saturate_b (int i)
> +{
> + if (i > 127)
> + i = 127;
> + else if (i < -128)
> + i = -128;
> + return i;
> +}
> +
> +__attribute__((unused))
> +static int
> +saturate_w (int i)
> +{
> + if (i > 32767)
> + i = 32767;
> + else if (i < -32768)
> + i = -32768;
> + return i;
> +}
> +
> +__attribute__((unused))
> +static int
> +saturate_ub (int i)
> +{
> + if (i > 255)
> + i = 255;
> + else if (i < 0)
> + i = 0;
> + return i;
> +}
> +
> +__attribute__((unused))
> +static int
> +saturate_uw (int i)
> +{
> + if (i > 65535)
> + i = 65535;
> + else if (i < 0)
> + i = 0;
> + return i;
> +}
> +
> +static long long MMXops[] =
> +{
> + 0x3467512347612976LL, 0x000000000000000eLL,
> + 0x3467512347612976LL, 0x0000000000000014LL,
> + 0x3467512347612976LL, 0x000000000000003cLL,
> + 0x0000000000000000LL, 0xFFFFFFFFFFFFFFFFLL,
> + 0xFFFFFFFFFFFFFFFFLL, 0x0000000000000000LL,
> + 0x0000000000000001LL, 0x1000000000000000LL,
> + 0x1000000000000000LL, 0x0000000000000001LL,
> + 0xFF00FF00FF00FF00LL, 0x00FF00FF00FF00FFLL,
> + 0xFFFFFFFFFFFFFFFFLL, 0x0101010101010101LL,
> + 0x0101010101010101LL, 0xFFFFFFFFFFFFFFFFLL,
> + 0x0123456789ABCDEFLL, 0x0123456789ABCDEFLL,
> + 0x3467512347612976LL, 0x1839876340879234LL,
> + 0x0000000000000000LL, 0x0000000000000000LL,
> + 0xFFFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL,
> + 0x7F7F7F7F7F7F7F7FLL, 0x7F7F7F7F7F7F7F7FLL,
> + 0x7F7F7F7F7F7F7F7FLL, 0x0101010101010101LL,
> + 0x7F7F7F7F7F7F7F7FLL, 0x4782082349761237LL,
> + 0x0000000000000000LL, 0x7F7F7F7F7F7F7F7FLL,
> + 0x8080808080808080LL, 0x8080808080808080LL,
> + 0x0101010101010101LL, 0x8080808080808080LL,
> + 0x8080808080808080LL, 0x0000000000000000LL,
> + 0x2372347120982458LL, 0x8080808080808080LL,
> + 0xFFFFFFFFFFFFFFFFLL, 0x8080808080808080LL,
> + 0x7F7F7F7F7F7F7F7FLL, 0xFFFFFFFFFFFFFFFFLL,
> + 0x8080808080808080LL, 0x7F7F7F7F7F7F7F7FLL,
> + 0xFFFFFFFFFFFFFFFFLL, 0x7F7F7F7F7F7F7F7FLL
> +};
> +
> +#define MMX_num_ops (sizeof (MMXops) / sizeof (MMXops[0]))
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-10.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
> new file mode 100644
> index 00000000000..cb63401a251
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_from_int (long long *ll1, long long *r)
> +{
> + int i1 = *(int *) ll1;
> + *(__m64 *) r = _m_from_int (i1);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *ll1, long long *r)
> +{
> + int *res = (int *) r;
> + res[0] = *(int *) ll1;
> + res[1] = 0;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + test_from_int (&MMXops[i], &r);
> + compute_correct_result (&MMXops[i], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-11.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
> new file mode 100644
> index 00000000000..6737ec5f2d4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
> @@ -0,0 +1,39 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_from_long_long (long long *ll1, long long *r)
> +{
> + *(__m64 *) r = _mm_cvtsi64_m64 (*ll1);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *ll1, long long *r)
> +{
> + *r = *ll1;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + test_from_long_long (&MMXops[i], &r);
> + compute_correct_result (&MMXops[i], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-12.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
> new file mode 100644
> index 00000000000..7390bcf3ccc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
> @@ -0,0 +1,41 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_to_int (long long *ll1, long long *r)
> +{
> + __m64 m = *(__m64 *) ll1;
> + *(int *) r = _m_to_int (m);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *ll1, long long *r)
> +{
> + int *i1 = (int *) ll1;
> + *(int *) r = *i1;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r = 0, ck = 0;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + test_to_int (&MMXops[i], &r);
> + compute_correct_result (&MMXops[i], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-13.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
> new file mode 100644
> index 00000000000..fd1eed66daa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
> @@ -0,0 +1,40 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_to_long_long (long long *ll1, long long *r)
> +{
> + __m64 m = *(__m64 *) ll1;
> + *r = _mm_cvtm64_si64 (m);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *ll1, long long *r)
> +{
> + *r = *ll1;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + test_to_long_long (&MMXops[i], &r);
> + compute_correct_result (&MMXops[i], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-14.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
> new file mode 100644
> index 00000000000..cc586182259
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
> @@ -0,0 +1,30 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_setzero (long long *r)
> +{
> + *(__m64 *) r = _mm_setzero_si64 ();
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *r)
> +{
> + *r = 0x0LL;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + long long r, ck;
> +
> + /* Run the MMX tests */
> + test_setzero (&r);
> + compute_correct_result (&ck);
> + if (ck != r)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-15.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
> new file mode 100644
> index 00000000000..35308633f59
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
> @@ -0,0 +1,35 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_set (int x, int y, long long *r)
> +{
> + *(__m64 *) r = _mm_set_pi32 (x, y);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int x, int y, long long *res_p)
> +{
> + int *res = (int *) res_p;
> + res[0] = y;
> + res[1] = x;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int x, y;
> + long long r, ck;
> +
> + /* Run the MMX tests */
> + x = 0x0badbeef;
> + y = 0x0badfeed;
> + test_set (x, y, &r);
> + compute_correct_result (x, y, &ck);
> + if (ck != r)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-16.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
> new file mode 100644
> index 00000000000..9f0fb46765c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
> @@ -0,0 +1,39 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_set (int i0, int i1, int i2, int i3, long long *r)
> +{
> + *(__m64 *) r = _mm_set_pi16 (i0, i1, i2, i3);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int i0, int i1, int i2, int i3, long long *res_p)
> +{
> + short *res = (short *) res_p;
> + res[0] = i3;
> + res[1] = i2;
> + res[2] = i1;
> + res[3] = i0;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + short i0, i1, i2, i3;
> + long long r, ck;
> +
> + /* Run the MMX tests */
> + i0 = 0x0bad;
> + i1 = 0xbeef;
> + i2 = 0x0bad;
> + i3 = 0xfeed;
> + test_set (i0, i1, i2, i3, &r);
> + compute_correct_result (i0, i1, i2, i3, &ck);
> + if (ck != r)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-17.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
> new file mode 100644
> index 00000000000..a38351ea056
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
> @@ -0,0 +1,50 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_set (char i0, char i1, char i2, char i3,
> + char i4, char i5, char i6, char i7, long long *r)
> +{
> + *(__m64 *) r = _mm_set_pi8 (i0, i1, i2, i3, i4, i5, i6, i7);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (char i0, char i1, char i2, char i3,
> + char i4, char i5, char i6, char i7,
> + long long *res_p)
> +{
> + char *res = (char *) res_p;
> + res[0] = i7;
> + res[1] = i6;
> + res[2] = i5;
> + res[3] = i4;
> + res[4] = i3;
> + res[5] = i2;
> + res[6] = i1;
> + res[7] = i0;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + char i0, i1, i2, i3, i4, i5, i6, i7;
> + long long r, ck;
> +
> + /* Run the MMX tests */
> + i0 = 0x12;
> + i1 = 0x34;
> + i2 = 0x56;
> + i3 = 0x78;
> + i4 = 0x90;
> + i5 = 0xab;
> + i6 = 0xcd;
> + i7 = 0xef;
> + test_set (i0, i1, i2, i3, i4, i5, i6, i7, &r);
> + compute_correct_result (i0, i1, i2, i3, i4, i5, i6, i7, &ck);
> + if (ck != r)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c
> new file mode 100644
> index 00000000000..3505a5c0cf4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx -mno-avx512vl" } */
> +/* { dg-final { scan-assembler-times "pshufd" 1 } } */
> +/* { dg-final { scan-assembler-times "movd" 1 } } */
> +/* { dg-final { scan-assembler-not "movl" } } */
> +
> +#include <mmintrin.h>
> +
> +__m64
> +foo (int i)
> +{
> + __v2si x = { i, i };
> + return (__m64) x;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c
> new file mode 100644
> index 00000000000..9b267b17346
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-mmx -mavx512bw -mavx512vl" } */
> +/* { dg-final { scan-assembler-times "pbroadcastd" 1 } } */
> +/* { dg-final { scan-assembler-not "movd" } } */
> +/* { dg-final { scan-assembler-not "movl" } } */
> +
> +#include "sse2-mmx-18a.c"
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c
> new file mode 100644
> index 00000000000..394f05b6b49
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-mmx -mavx512bw -mno-avx512vl" } */
> +/* { dg-final { scan-assembler-times "pshufd" 1 } } */
> +/* { dg-final { scan-assembler-times "movd" 1 } } */
> +/* { dg-final { scan-assembler-not "movl" } } */
> +
> +#include "sse2-mmx-18a.c"
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c
> new file mode 100644
> index 00000000000..9715ace241f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx -mno-avx -mtune=intel" } */
> +/* { dg-final { scan-assembler-times "pshuflw" 1 } } */
> +/* { dg-final { scan-assembler-times "movd" 1 } } */
> +/* { dg-final { scan-assembler-not "movl" } } */
> +
> +#include <mmintrin.h>
> +
> +__m64
> +foo (short i)
> +{
> + __v4hi x = { i, i, i, i };
> + return (__m64) x;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c
> new file mode 100644
> index 00000000000..a6d42313336
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-mmx -mavx512bw -mavx512vl" } */
> +/* { dg-final { scan-assembler-times "pbroadcastw" 1 } } */
> +/* { dg-final { scan-assembler-not "movd" } } */
> +/* { dg-final { scan-assembler-not "movl" } } */
> +
> +#include "sse2-mmx-19a.c"
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c
> new file mode 100644
> index 00000000000..b02dc8c2ffd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-mmx -mavx -mno-avx2 -mtune=intel" } */
> +/* { dg-final { scan-assembler-times "pshuflw" 1 } } */
> +/* { dg-final { scan-assembler-times "movd" 1 } } */
> +/* { dg-final { scan-assembler-not "movl" } } */
> +
> +#include "sse2-mmx-19a.c"
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c
> new file mode 100644
> index 00000000000..54691883c9c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-mmx -mavx512bw -mno-avx512vl -mtune=intel" } */
> +/* { dg-final { scan-assembler-times "pbroadcastw" 1 } } */
> +/* { dg-final { scan-assembler-times "movd" 1 } } */
> +/* { dg-final { scan-assembler-not "movl" } } */
> +
> +#include "sse2-mmx-19a.c"
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c
> new file mode 100644
> index 00000000000..8be973cc4fc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-mmx -mavx2 -mno-avx512f -mtune=intel" } */
> +/* { dg-final { scan-assembler-times "pbroadcastw" 1 } } */
> +/* { dg-final { scan-assembler-times "movd" 1 } } */
> +/* { dg-final { scan-assembler-not "movl" } } */
> +
> +#include "sse2-mmx-19a.c"
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-2.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
> new file mode 100644
> index 00000000000..e4cee2da83e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-not "%xmm" } } */
> +/* { dg-final { scan-assembler-not "%mm" } } */
> +
> +#include <mmintrin.h>
> +
> +float
> +foo (__m64 x)
> +{
> + return ((__v2sf) x)[0];
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-20.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
> new file mode 100644
> index 00000000000..173fa154d40
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "movd" 1 } } */
> +/* { dg-final { scan-assembler-not "%mm" } } */
> +
> +#include <mmintrin.h>
> +
> +int
> +foo (__m64 x)
> +{
> + return ((__v2si) x)[0];
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
> new file mode 100644
> index 00000000000..8f5341e2de6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-times "pshufd" 1 } } */
> +/* { dg-final { scan-assembler-times "movd" 1 } } */
> +/* { dg-final { scan-assembler-not "%mm" } } */
> +
> +#include <mmintrin.h>
> +
> +int
> +foo (__m64 x)
> +{
> + return ((__v2si) x)[1];
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-3.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
> new file mode 100644
> index 00000000000..77f518b6c5f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler "cvtdq2ps" } } */
> +/* { dg-final { scan-assembler-not "cvtpi2ps" } } */
> +/* { dg-final { scan-assembler-not "%mm" } } */
> +
> +#include <xmmintrin.h>
> +
> +__m128
> +foo (__m128 i1, __m64 i2)
> +{
> + return _mm_cvtpi32_ps (i1, i2);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-4.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
> new file mode 100644
> index 00000000000..d923724fc1c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
> @@ -0,0 +1,4 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +
> +#include "mmx-4.c"
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-5.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
> new file mode 100644
> index 00000000000..1953dc89bb1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-not "%mm" } } */
> +
> +#include <xmmintrin.h>
> +
> +int
> +foo (__m64 i)
> +{
> + return _m_pextrw (i, 2);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-6.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
> new file mode 100644
> index 00000000000..f73444f493b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler-not "%mm" } } */
> +
> +#include <xmmintrin.h>
> +
> +__m64
> +foo (__m64 i, int w)
> +{
> + return _m_pinsrw (i, w, 2);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-7.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
> new file mode 100644
> index 00000000000..6ea491d2715
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-final { scan-assembler "movnti" } } */
> +/* { dg-final { scan-assembler-not "movntq" } } */
> +/* { dg-final { scan-assembler-not "%mm" } } */
> +
> +#include <xmmintrin.h>
> +
> +void
> +foo (__m64 *p, __m64 i)
> +{
> + _mm_stream_pi (p, i);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-8.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
> new file mode 100644
> index 00000000000..342c2fa4f25
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
> @@ -0,0 +1,4 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +
> +#include "mmx-8.c"
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-9.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
> new file mode 100644
> index 00000000000..f0bf7256c0e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
> @@ -0,0 +1,79 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-mmx" } */
> +/* { dg-require-effective-target sse2 } */
> +
> +#include "sse2-check.h"
> +
> +#include <string.h>
> +
> +#define FLOAT_X 2.3456
> +#define FLOAT_Y -4.5987
> +
> +static float expected_x = FLOAT_X;
> +static float expected_y = FLOAT_Y;
> +static __v2sf expected1 = { FLOAT_X, FLOAT_Y };
> +static __v2sf expected2 = { FLOAT_X, 0 };
> +static __v2sf expected3 = { FLOAT_X, FLOAT_X };
> +
> +float
> +__attribute__((noinline, noclone))
> +foo1 (__m64 x)
> +{
> + return ((__v2sf) x)[0];
> +}
> +
> +float
> +__attribute__((noinline, noclone))
> +foo2 (__m64 x)
> +{
> + return ((__v2sf) x)[1];
> +}
> +
> +__m64
> +__attribute__((noinline, noclone))
> +foo3 (float x)
> +{
> + return __extension__ (__m64) (__v2sf) { x, 0 };
> +}
> +
> +__m64
> +__attribute__((noinline, noclone))
> +foo4 (float x)
> +{
> + return __extension__ (__m64) (__v2sf) { x, x };
> +}
> +
> +__m64
> +__attribute__((noinline, noclone))
> +foo5 (float x, float y)
> +{
> + return __extension__ (__m64) (__v2sf) { x, y };
> +}
> +
> +void
> +__attribute__((noinline))
> +sse2_test (void)
> +{
> + __m64 res;
> + float x;
> +
> + x = foo1 ((__m64) expected1);
> + if (x != expected_x)
> + abort ();
> +
> + x = foo2 ((__m64) expected1);
> + if (x != expected_y)
> + abort ();
> +
> + res = foo3 (FLOAT_X);
> + if (memcmp (&res, &expected2, sizeof (res)))
> + abort ();
> +
> + res = foo4 (FLOAT_X);
> + if (memcmp (&res, &expected3, sizeof (res)))
> + abort ();
> +
> + res = foo5 (FLOAT_X, FLOAT_Y);
> + if (memcmp (&res, &expected1, sizeof (res)))
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
> new file mode 100644
> index 00000000000..bdf1085446b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include <string.h>
> +#include "sse2-check.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_cvtpi32_ps (__m128 *i1, __m64 *i2, __m128 *r)
> +{
> + *(__m128 *) r = _mm_cvtpi32_ps (*i1, *i2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (__m128 *dst_p, __m64 *src_p, __m128 *res_p)
> +{
> + int *src = (int *) src_p;
> + float *res = (float *) res_p;
> + *res_p = *dst_p;
> + int i;
> + __m128 r;
> + for (i = 0; i < 2; i++)
> + {
> + r = _mm_cvt_si2ss (*dst_p, src[i]);
> + res[i] = ((__v4sf) r)[0];
> + }
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + __m128 r, ck;
> + __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
> + __v2si y = { 30, -39 };
> +
> + /* Run the MMX tests */
> + test_cvtpi32_ps ((__m128 *) &x, (__m64 *) &y, &r);
> + compute_correct_result ((__m128 *) &x, (__m64 *) &y, &ck);
> + if (memcmp (&ck, &r, sizeof (r)))
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
> new file mode 100644
> index 00000000000..8e860e7a5be
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
> @@ -0,0 +1,35 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_cvtps_pi32 (__m128 *src_p, long long *r)
> +{
> + *(__m64 *) r = _mm_cvtps_pi32 (*src_p);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (__m128 *src_p, long long *res_p)
> +{
> + __v4sf *src = (__v4sf *) src_p;
> + int *res = (int *) res_p;
> + int i;
> + for (i = 0; i < 2; i++)
> + res[i] = _mm_cvt_ss2si (_mm_set_ss ((*src)[i]));
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + long long r, ck;
> + __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
> +
> + /* Run the MMX tests */
> + test_cvtps_pi32 ((__m128 *) &x, &r);
> + compute_correct_result ((__m128 *) &x, &ck);
> + if (ck != r)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
> new file mode 100644
> index 00000000000..8b9dd7fc8ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
> @@ -0,0 +1,35 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_cvttps_pi32 (__m128 *src_p, long long *r)
> +{
> + *(__m64 *) r = _mm_cvttps_pi32 (*src_p);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (__m128 *src_p, long long *res_p)
> +{
> + __v4sf *src = (__v4sf *) src_p;
> + int *res = (int *) res_p;
> + int i;
> + for (i = 0; i < 2; i++)
> + res[i] = _mm_cvtt_ss2si (_mm_set_ss ((*src)[i]));
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + long long r, ck;
> + __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
> +
> + /* Run the MMX tests */
> + test_cvttps_pi32 ((__m128 *) &x, &r);
> + compute_correct_result ((__m128 *) &x, &ck);
> + if (ck != r)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
> new file mode 100644
> index 00000000000..815a499ff84
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
> @@ -0,0 +1,98 @@
> +/* { dg-do run { target { *-*-linux* && { ! ia32 } } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include <unistd.h>
> +#include <sys/mman.h>
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_maskmovq (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + _m_maskmovq (t1, t2, (char *) r);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + char *dst = (char *) dst_p;
> + char *src = (char *) src_p;
> + char *res = (char *) res_p;
> + int i;
> + for (i = 0; i < 8; i++)
> + if ((src[i] & 0x80) != 0)
> + res[i] = dst[i];
> +}
> +
> +static void
> +do_maskmovq_test (long long *r)
> +{
> + int i;
> + long long ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + r[0] = -1LL;
> + ck = -1LL;
> + test_maskmovq (&MMXops[i], &MMXops[i], r);
> + compute_correct_result (&MMXops[i], &MMXops[i], &ck);
> + if (*r != ck)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + char *buf;
> + long long *r;
> + size_t page_size = sysconf(_SC_PAGESIZE);
> +
> + buf = mmap (0, 3 * page_size, PROT_READ | PROT_WRITE,
> + MAP_PRIVATE | MAP_ANON, -1, 0);
> + if (buf == MAP_FAILED)
> + {
> + perror ("mmap");
> + abort ();
> + }
> +
> + if (mprotect (buf, page_size, PROT_NONE))
> + {
> + perror ("mprotect");
> + abort ();
> + }
> +
> + if (mprotect (buf + 2 * page_size, page_size, PROT_NONE))
> + {
> + perror ("mprotect");
> + abort ();
> + }
> +
> + r = (long long *) (buf + page_size);
> + do_maskmovq_test (r);
> +
> + r = (long long *) (buf + page_size + 3);
> + do_maskmovq_test (r);
> +
> + r = (long long *) (buf + page_size + 11);
> + do_maskmovq_test (r);
> +
> + r = (long long *) (buf + 2 * page_size - 16);
> + do_maskmovq_test (r);
> +
> + r = (long long *) (buf + 2 * page_size - 16 + 3);
> + do_maskmovq_test (r);
> +
> + r = (long long *) (buf + 2 * page_size - 16 + 8);
> + do_maskmovq_test (r);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
> new file mode 100644
> index 00000000000..fb895c6cfe7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_packssdw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_packssdw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + int *dst = (int *) dst_p;
> + int *src = (int *) src_p;
> + short *res = (short *) res_p;
> + int i;
> +
> + for (i = 0; i < 2; i++)
> + {
> + res[i] = saturate_w (dst[i]);
> + res[i + 2] = saturate_w (src[i]);
> + }
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_packssdw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
> new file mode 100644
> index 00000000000..1c4a948027c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_packsswb (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_packsswb (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + char *res = (char *) res_p;
> + int i;
> +
> + for (i = 0; i < 4; i++)
> + {
> + res[i] = saturate_b (dst[i]);
> + res[i + 4] = saturate_b (src[i]);
> + }
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_packsswb (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
> new file mode 100644
> index 00000000000..24abd5dcc9e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_packuswb (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_packuswb (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + unsigned char *res = (unsigned char *) res_p;
> + int i;
> +
> + for (i = 0; i < 4; i++)
> + {
> + res[i] = saturate_ub (dst[i]);
> + res[i + 4] = saturate_ub (src[i]);
> + }
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_packuswb (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
> new file mode 100644
> index 00000000000..f4c8273c5e2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_paddb (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_paddb (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + char *dst = (char *) dst_p;
> + char *src = (char *) src_p;
> + char *res = (char *) res_p;
> + int i;
> + for (i = 0; i < 8; i++)
> + res[i] = dst[i] + src[i];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_paddb (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
> new file mode 100644
> index 00000000000..32911a7852a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_paddd (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_paddd (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + int *dst = (int *) dst_p;
> + int *src = (int *) src_p;
> + int *res = (int *) res_p;
> + int i;
> + for (i = 0; i < 2; i++)
> + res[i] = dst[i] + src[i];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_paddd (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
> new file mode 100644
> index 00000000000..8e257a314e0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_paddq (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _mm_add_si64 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + res_p[0] = dst_p[0] + src_p[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_paddq (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
> new file mode 100644
> index 00000000000..9798a2024fb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_paddsb (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_paddsb (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + char *dst = (char *) dst_p;
> + char *src = (char *) src_p;
> + char *res = (char *) res_p;
> + int i;
> + for (i = 0; i < 8; i++)
> + res[i] = saturate_b (dst[i] + src[i]);
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_paddsb (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
> new file mode 100644
> index 00000000000..6371b1930fc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_paddsw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_paddsw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + int i;
> + for (i = 0; i < 4; i++)
> + res[i] = saturate_w (dst[i] + src[i]);
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_paddsw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
> new file mode 100644
> index 00000000000..bac22b641cd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_paddusb (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_paddusb (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned char *dst = (unsigned char *) dst_p;
> + unsigned char *src = (unsigned char *) src_p;
> + unsigned char *res = (unsigned char *) res_p;
> + int i;
> + for (i = 0; i < 8; i++)
> + res[i] = saturate_ub (dst[i] + src[i]);
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_paddusb (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
> new file mode 100644
> index 00000000000..70f987bf381
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_paddusw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_paddusw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned short *dst = (unsigned short *) dst_p;
> + unsigned short *src = (unsigned short *) src_p;
> + unsigned short *res = (unsigned short *) res_p;
> + int i;
> + for (i = 0; i < 4; i++)
> + res[i] = saturate_uw (dst[i] + src[i]);
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_paddusw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
> new file mode 100644
> index 00000000000..8e01cc4734f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_paddw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_paddw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + int i;
> + for (i = 0; i < 4; i++)
> + res[i] = dst[i] + src[i];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_paddw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
> new file mode 100644
> index 00000000000..0876fee92ce
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
> @@ -0,0 +1,43 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pand (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pand (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (unsigned long long *dst,
> + unsigned long long *src,
> + unsigned long long *res)
> +{
> + res[0] = dst[0] & src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pand (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
> new file mode 100644
> index 00000000000..362c475029b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
> @@ -0,0 +1,43 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pandn (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pandn (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (unsigned long long *dst,
> + unsigned long long *src,
> + unsigned long long *res)
> +{
> + res[0] = ~dst[0] & src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pandn (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
> new file mode 100644
> index 00000000000..0c57d94e9e7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pavgb (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pavgb (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned char *dst = (unsigned char *) dst_p;
> + unsigned char *src = (unsigned char *) src_p;
> + unsigned char *res = (unsigned char *) res_p;
> + int i;
> + unsigned int tmp;
> + for (i = 0; i < 8; i++)
> + {
> + tmp = dst[i] + src[i] + 1;
> + res[i] = tmp >> 1;
> + }
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pavgb (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
> new file mode 100644
> index 00000000000..e38669ffbb4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pavgw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pavgw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned short *dst = (unsigned short *) dst_p;
> + unsigned short *src = (unsigned short *) src_p;
> + unsigned short *res = (unsigned short *) res_p;
> + int i;
> + unsigned int tmp;
> + for (i = 0; i < 4; i++)
> + {
> + tmp = dst[i] + src[i] + 1;
> + res[i] = tmp >> 1;
> + }
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pavgw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
> new file mode 100644
> index 00000000000..f0f3a28cf62
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pcmpeqb (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pcmpeqb (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + char *dst = (char *) dst_p;
> + char *src = (char *) src_p;
> + char *res = (char *) res_p;
> + int i;
> + for (i = 0; i < 8; i++)
> + res[i] = dst[i] == src[i] ? -1 : 0;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pcmpeqb (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
> new file mode 100644
> index 00000000000..7dc13f147e8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pcmpeqd (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pcmpeqd (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + int *dst = (int *) dst_p;
> + int *src = (int *) src_p;
> + int *res = (int *) res_p;
> + int i;
> + for (i = 0; i < 2; i++)
> + res[i] = dst[i] == src[i] ? -1 : 0;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pcmpeqd (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
> new file mode 100644
> index 00000000000..d6e59077204
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pcmpeqw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pcmpeqw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + int i;
> + for (i = 0; i < 4; i++)
> + res[i] = dst[i] == src[i] ? -1 : 0;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pcmpeqw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
> new file mode 100644
> index 00000000000..3a1c188a407
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pcmpgtb (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pcmpgtb (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + char *dst = (char *) dst_p;
> + char *src = (char *) src_p;
> + char *res = (char *) res_p;
> + int i;
> + for (i = 0; i < 8; i++)
> + res[i] = dst[i] > src[i] ? -1 : 0;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pcmpgtb (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
> new file mode 100644
> index 00000000000..121cafcd834
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pcmpgtd (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pcmpgtd (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + int *dst = (int *) dst_p;
> + int *src = (int *) src_p;
> + int *res = (int *) res_p;
> + int i;
> + for (i = 0; i < 2; i++)
> + res[i] = dst[i] > src[i] ? -1 : 0;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pcmpgtd (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
> new file mode 100644
> index 00000000000..7b4e99d0a34
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pcmpgtw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pcmpgtw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + int i;
> + for (i = 0; i < 4; i++)
> + res[i] = dst[i] > src[i] ? -1 : 0;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pcmpgtw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
> new file mode 100644
> index 00000000000..58e5ea5aa3f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
> @@ -0,0 +1,58 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing" } */
> +
> +#include <string.h>
> +#include "sse2-check.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pextrw (__m64 *i, unsigned int imm, int *r)
> +{
> + switch (imm)
> + {
> + case 0:
> + *r = _m_pextrw (*i, 0);
> + break;
> + case 1:
> + *r = _m_pextrw (*i, 1);
> + break;
> + case 2:
> + *r = _m_pextrw (*i, 2);
> + break;
> + case 3:
> + *r = _m_pextrw (*i, 3);
> + break;
> + default:
> + break;
> + }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (__m64 *src_p, unsigned int imm, int *res_p)
> +{
> + short *src = (short *) src_p;
> + if (imm < 4)
> + *res_p = src[imm];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int r, ck;
> + int i;
> + int failed = 0;
> + __v4hi y = { 3320, -3339, 48, 4392 };
> +
> + /* Run the MMX tests */
> + for (i = 0; i < 4; i++)
> + {
> + test_pextrw ((__m64 *) &y, i, &r);
> + compute_correct_result ((__m64 *) &y, i, &ck);
> + if (r != ck)
> + failed++;
> + }
> +
> + if (failed)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
> new file mode 100644
> index 00000000000..2c49d8ce5f9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
> @@ -0,0 +1,60 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing" } */
> +
> +#include <string.h>
> +#include "sse2-check.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pinsrw (__m64 *i, int val, unsigned int imm, int *r)
> +{
> + switch (imm)
> + {
> + case 0:
> + *(__m64 *) r = _m_pinsrw (*i, val, 0);
> + break;
> + case 1:
> + *(__m64 *) r = _m_pinsrw (*i, val, 1);
> + break;
> + case 2:
> + *(__m64 *) r = _m_pinsrw (*i, val, 2);
> + break;
> + case 3:
> + *(__m64 *) r = _m_pinsrw (*i, val, 3);
> + break;
> + default:
> + break;
> + }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (__m64 *src_p, int val, unsigned int imm,
> + int *res_p)
> +{
> + short *res = (short *) res_p;
> + *(__m64 *) res_p = *src_p;
> + if (imm < 4)
> + res[imm] = val;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int r, ck;
> + int i;
> + int failed = 0;
> + __v4hi y = { 3320, -3339, 48, 4392 };
> +
> + /* Run the MMX tests */
> + for (i = 0; i < 4; i++)
> + {
> + test_pinsrw ((__m64 *) &y, 0x1234, i, &r);
> + compute_correct_result ((__m64 *) &y, 0x1234, i, &ck);
> + if (r != ck)
> + failed++;
> + }
> +
> + if (failed)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
> new file mode 100644
> index 00000000000..8f08aabf954
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pmaddwd (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pmaddwd (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + int *res = (int *) res_p;
> + res[0] = dst[0] * src[0] + dst[1] * src[1];
> + res[1] = dst[2] * src[2] + dst[3] * src[3];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pmaddwd (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
> new file mode 100644
> index 00000000000..e4a6d87f344
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pmaxsw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pmaxsw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + int i;
> + for (i = 0; i < 4; i++)
> + res[i] = dst[i] > src[i] ? dst[i] : src[i];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pmaxsw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
> new file mode 100644
> index 00000000000..f943989b96f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pmaxub (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pmaxub (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned char *dst = (unsigned char *) dst_p;
> + unsigned char *src = (unsigned char *) src_p;
> + unsigned char *res = (unsigned char *) res_p;
> + int i;
> + for (i = 0; i < 8; i++)
> + res[i] = dst[i] > src[i] ? dst[i] : src[i];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pmaxub (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
> new file mode 100644
> index 00000000000..6a92f7eb3a4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pminsw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pminsw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + int i;
> + for (i = 0; i < 4; i++)
> + res[i] = dst[i] < src[i] ? dst[i] : src[i];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pminsw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
> new file mode 100644
> index 00000000000..a3b0e5093a8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pminub (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pminub (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned char *dst = (unsigned char *) dst_p;
> + unsigned char *src = (unsigned char *) src_p;
> + unsigned char *res = (unsigned char *) res_p;
> + int i;
> + for (i = 0; i < 8; i++)
> + res[i] = dst[i] < src[i] ? dst[i] : src[i];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pminub (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
> new file mode 100644
> index 00000000000..73b2a00d282
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
> @@ -0,0 +1,45 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pmovmskb (long long *ll1, int *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + *r = _m_pmovmskb (t1);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *src_p, int *res_p)
> +{
> + char *src = (char *) src_p;
> + int i;
> + int res = 0;
> + for (i = 0; i < 8; i++)
> + res |= ((src[i] & 0x80) >> 7) << i;
> + *res_p = res;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + int r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + test_pmovmskb (&MMXops[i], &r);
> + compute_correct_result (&MMXops[i], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
> new file mode 100644
> index 00000000000..ebf2a760fd7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
> @@ -0,0 +1,50 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pmulhuw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pmulhuw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned short *dst = (unsigned short *) dst_p;
> + unsigned short *src = (unsigned short *) src_p;
> + unsigned short *res = (unsigned short *) res_p;
> + int i;
> + for (i = 0; i < 4; i++)
> + {
> + unsigned int t = dst[i] * src[i];
> + res[i] = t >> 16;
> + }
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pmulhuw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
> new file mode 100644
> index 00000000000..4aa8d605c4c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
> @@ -0,0 +1,52 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pmulhw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pmulhw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + int tmp;
> + int i;
> + for (i = 0; i < 4; i++)
> + {
> + tmp = dst[i] * src[i];
> + tmp >>= 16;
> + res[i] = tmp;
> + }
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pmulhw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
> new file mode 100644
> index 00000000000..732687e8cd7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pmullw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pmullw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + int tmp;
> + int i;
> + for (i = 0; i < 4; i++)
> + {
> + tmp = dst[i] * src[i];
> + res[i] = tmp;
> + }
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pmullw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
> new file mode 100644
> index 00000000000..eec4d970d63
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pmuludq (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _mm_mul_su32 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned int *dst = (unsigned int*) dst_p;
> + unsigned int *src = (unsigned int *) src_p;
> + unsigned long long *res = (unsigned long long *) res_p;
> + res[0] = dst[0];
> + res[0] *= src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pmuludq (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-por.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
> new file mode 100644
> index 00000000000..79d3a9a548f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
> @@ -0,0 +1,43 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_por (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_por (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (unsigned long long *dst,
> + unsigned long long *src,
> + unsigned long long *res)
> +{
> + res[0] = dst[0] | src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_por (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
> new file mode 100644
> index 00000000000..324ce8c51a2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
> @@ -0,0 +1,57 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psadbw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psadbw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned char *dst = (unsigned char *) dst_p;
> + unsigned char *src = (unsigned char *) src_p;
> + unsigned short *res = (unsigned short *) res_p;
> + int i;
> + int tmp;
> + unsigned int sum = 0;
> + for (i = 0; i < 8; i++)
> + {
> + tmp = dst[i] - src[i];
> + if (tmp < 0)
> + tmp = -tmp;
> + sum += tmp;
> + }
> + res[0] = sum;
> + for (i = 1; i < 3; i++)
> + res[i] = 0;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psadbw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
> new file mode 100644
> index 00000000000..8da4b7afac7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
> @@ -0,0 +1,247 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pshufw (long long *ll1, unsigned int imm, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + switch (imm)
> + {
> + case 0:
> + *(__m64 *) r = _m_pshufw (t1, 0);
> + break;
> + case 1:
> + *(__m64 *) r = _m_pshufw (t1, 1);
> + break;
> + case 2:
> + *(__m64 *) r = _m_pshufw (t1, 2);
> + break;
> + case 3:
> + *(__m64 *) r = _m_pshufw (t1, 3);
> + break;
> + case 4:
> + *(__m64 *) r = _m_pshufw (t1, 4);
> + break;
> + case 5:
> + *(__m64 *) r = _m_pshufw (t1, 5);
> + break;
> + case 6:
> + *(__m64 *) r = _m_pshufw (t1, 6);
> + break;
> + case 7:
> + *(__m64 *) r = _m_pshufw (t1, 7);
> + break;
> + case 8:
> + *(__m64 *) r = _m_pshufw (t1, 8);
> + break;
> + case 9:
> + *(__m64 *) r = _m_pshufw (t1, 9);
> + break;
> + case 10:
> + *(__m64 *) r = _m_pshufw (t1, 10);
> + break;
> + case 11:
> + *(__m64 *) r = _m_pshufw (t1, 11);
> + break;
> + case 12:
> + *(__m64 *) r = _m_pshufw (t1, 12);
> + break;
> + case 13:
> + *(__m64 *) r = _m_pshufw (t1, 13);
> + break;
> + case 14:
> + *(__m64 *) r = _m_pshufw (t1, 14);
> + break;
> + case 15:
> + *(__m64 *) r = _m_pshufw (t1, 15);
> + break;
> + case 16:
> + *(__m64 *) r = _m_pshufw (t1, 16);
> + break;
> + case 17:
> + *(__m64 *) r = _m_pshufw (t1, 17);
> + break;
> + case 18:
> + *(__m64 *) r = _m_pshufw (t1, 18);
> + break;
> + case 19:
> + *(__m64 *) r = _m_pshufw (t1, 19);
> + break;
> + case 20:
> + *(__m64 *) r = _m_pshufw (t1, 20);
> + break;
> + case 21:
> + *(__m64 *) r = _m_pshufw (t1, 21);
> + break;
> + case 22:
> + *(__m64 *) r = _m_pshufw (t1, 22);
> + break;
> + case 23:
> + *(__m64 *) r = _m_pshufw (t1, 23);
> + break;
> + case 24:
> + *(__m64 *) r = _m_pshufw (t1, 24);
> + break;
> + case 25:
> + *(__m64 *) r = _m_pshufw (t1, 25);
> + break;
> + case 26:
> + *(__m64 *) r = _m_pshufw (t1, 26);
> + break;
> + case 27:
> + *(__m64 *) r = _m_pshufw (t1, 27);
> + break;
> + case 28:
> + *(__m64 *) r = _m_pshufw (t1, 28);
> + break;
> + case 29:
> + *(__m64 *) r = _m_pshufw (t1, 29);
> + break;
> + case 30:
> + *(__m64 *) r = _m_pshufw (t1, 30);
> + break;
> + case 31:
> + *(__m64 *) r = _m_pshufw (t1, 31);
> + break;
> + case 32:
> + *(__m64 *) r = _m_pshufw (t1, 32);
> + break;
> + case 33:
> + *(__m64 *) r = _m_pshufw (t1, 33);
> + break;
> + case 34:
> + *(__m64 *) r = _m_pshufw (t1, 34);
> + break;
> + case 35:
> + *(__m64 *) r = _m_pshufw (t1, 35);
> + break;
> + case 36:
> + *(__m64 *) r = _m_pshufw (t1, 36);
> + break;
> + case 37:
> + *(__m64 *) r = _m_pshufw (t1, 37);
> + break;
> + case 38:
> + *(__m64 *) r = _m_pshufw (t1, 38);
> + break;
> + case 39:
> + *(__m64 *) r = _m_pshufw (t1, 39);
> + break;
> + case 40:
> + *(__m64 *) r = _m_pshufw (t1, 40);
> + break;
> + case 41:
> + *(__m64 *) r = _m_pshufw (t1, 41);
> + break;
> + case 42:
> + *(__m64 *) r = _m_pshufw (t1, 42);
> + break;
> + case 43:
> + *(__m64 *) r = _m_pshufw (t1, 43);
> + break;
> + case 44:
> + *(__m64 *) r = _m_pshufw (t1, 44);
> + break;
> + case 45:
> + *(__m64 *) r = _m_pshufw (t1, 45);
> + break;
> + case 46:
> + *(__m64 *) r = _m_pshufw (t1, 46);
> + break;
> + case 47:
> + *(__m64 *) r = _m_pshufw (t1, 47);
> + break;
> + case 48:
> + *(__m64 *) r = _m_pshufw (t1, 48);
> + break;
> + case 49:
> + *(__m64 *) r = _m_pshufw (t1, 49);
> + break;
> + case 50:
> + *(__m64 *) r = _m_pshufw (t1, 50);
> + break;
> + case 51:
> + *(__m64 *) r = _m_pshufw (t1, 51);
> + break;
> + case 52:
> + *(__m64 *) r = _m_pshufw (t1, 52);
> + break;
> + case 53:
> + *(__m64 *) r = _m_pshufw (t1, 53);
> + break;
> + case 54:
> + *(__m64 *) r = _m_pshufw (t1, 54);
> + break;
> + case 55:
> + *(__m64 *) r = _m_pshufw (t1, 55);
> + break;
> + case 56:
> + *(__m64 *) r = _m_pshufw (t1, 56);
> + break;
> + case 57:
> + *(__m64 *) r = _m_pshufw (t1, 57);
> + break;
> + case 58:
> + *(__m64 *) r = _m_pshufw (t1, 58);
> + break;
> + case 59:
> + *(__m64 *) r = _m_pshufw (t1, 59);
> + break;
> + case 60:
> + *(__m64 *) r = _m_pshufw (t1, 60);
> + break;
> + case 61:
> + *(__m64 *) r = _m_pshufw (t1, 61);
> + break;
> + case 62:
> + *(__m64 *) r = _m_pshufw (t1, 62);
> + break;
> + case 63:
> + *(__m64 *) r = _m_pshufw (t1, 63);
> + break;
> + default:
> + break;
> + }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *src_p, unsigned int imm,
> + long long *res_p)
> +{
> + unsigned long long src = *(unsigned long long *) src_p;
> + unsigned short *res = (unsigned short *) res_p;
> + int i;
> + unsigned int shift;
> + for (i = 0; i < 4; i++)
> + {
> + shift = ((imm >> (2 * i)) & 0x3) * 16;
> + res[i] = (src >> shift) & 0xffff;
> + }
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + if (i > 63)
> + break;
> + test_pshufw (&MMXops[i], i, &r);
> + compute_correct_result (&MMXops[i], i, &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
> new file mode 100644
> index 00000000000..0eead0822fc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pslld (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pslld (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned int *dst = (unsigned int *) dst_p;
> + unsigned int *src = (unsigned int *) src_p;
> + unsigned int *res = (unsigned int *) res_p;
> + int i;
> + if (src[1] || src[0] > 31)
> + for (i = 0; i < 2; i++)
> + res[i] = 0;
> + else
> + for (i = 0; i < 2; i++)
> + res[i] = dst[i] << src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pslld (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
> new file mode 100644
> index 00000000000..6ae1d4a78e2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
> @@ -0,0 +1,152 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psllwi (long long *ll1, unsigned int imm, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + switch (imm)
> + {
> + case 0:
> + *(__m64 *) r = _m_pslldi (t1, 0);
> + break;
> + case 1:
> + *(__m64 *) r = _m_pslldi (t1, 1);
> + break;
> + case 2:
> + *(__m64 *) r = _m_pslldi (t1, 2);
> + break;
> + case 3:
> + *(__m64 *) r = _m_pslldi (t1, 3);
> + break;
> + case 4:
> + *(__m64 *) r = _m_pslldi (t1, 4);
> + break;
> + case 5:
> + *(__m64 *) r = _m_pslldi (t1, 5);
> + break;
> + case 6:
> + *(__m64 *) r = _m_pslldi (t1, 6);
> + break;
> + case 7:
> + *(__m64 *) r = _m_pslldi (t1, 7);
> + break;
> + case 8:
> + *(__m64 *) r = _m_pslldi (t1, 8);
> + break;
> + case 9:
> + *(__m64 *) r = _m_pslldi (t1, 9);
> + break;
> + case 10:
> + *(__m64 *) r = _m_pslldi (t1, 10);
> + break;
> + case 11:
> + *(__m64 *) r = _m_pslldi (t1, 11);
> + break;
> + case 12:
> + *(__m64 *) r = _m_pslldi (t1, 12);
> + break;
> + case 13:
> + *(__m64 *) r = _m_pslldi (t1, 13);
> + break;
> + case 14:
> + *(__m64 *) r = _m_pslldi (t1, 14);
> + break;
> + case 15:
> + *(__m64 *) r = _m_pslldi (t1, 15);
> + break;
> + case 16:
> + *(__m64 *) r = _m_pslldi (t1, 16);
> + break;
> + case 17:
> + *(__m64 *) r = _m_pslldi (t1, 17);
> + break;
> + case 18:
> + *(__m64 *) r = _m_pslldi (t1, 18);
> + break;
> + case 19:
> + *(__m64 *) r = _m_pslldi (t1, 19);
> + break;
> + case 20:
> + *(__m64 *) r = _m_pslldi (t1, 20);
> + break;
> + case 21:
> + *(__m64 *) r = _m_pslldi (t1, 21);
> + break;
> + case 22:
> + *(__m64 *) r = _m_pslldi (t1, 22);
> + break;
> + case 23:
> + *(__m64 *) r = _m_pslldi (t1, 23);
> + break;
> + case 24:
> + *(__m64 *) r = _m_pslldi (t1, 24);
> + break;
> + case 25:
> + *(__m64 *) r = _m_pslldi (t1, 25);
> + break;
> + case 26:
> + *(__m64 *) r = _m_pslldi (t1, 26);
> + break;
> + case 27:
> + *(__m64 *) r = _m_pslldi (t1, 27);
> + break;
> + case 28:
> + *(__m64 *) r = _m_pslldi (t1, 28);
> + break;
> + case 29:
> + *(__m64 *) r = _m_pslldi (t1, 29);
> + break;
> + case 30:
> + *(__m64 *) r = _m_pslldi (t1, 30);
> + break;
> + case 31:
> + *(__m64 *) r = _m_pslldi (t1, 31);
> + break;
> + default:
> + *(__m64 *) r = _m_pslldi (t1, 32);
> + break;
> + }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *src_p, unsigned int imm,
> + long long *res_p)
> +{
> + unsigned int *src = (unsigned int *) src_p;
> + unsigned int *res = (unsigned int *) res_p;
> + int i;
> + if (imm > 31)
> + for (i = 0; i < 2; i++)
> + res[i] = 0;
> + else
> + for (i = 0; i < 2; i++)
> + res[i] = src[i] << imm;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + unsigned int count;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + count = MMXops[i];
> + test_psllwi (&MMXops[i], count, &r);
> + compute_correct_result (&MMXops[i], count, &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
> new file mode 100644
> index 00000000000..0283ba0f329
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psllq (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psllq (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (unsigned long long *dst,
> + unsigned long long *src,
> + unsigned long long *res)
> +{
> + if (src[0] > 63)
> + res[0] = 0;
> + else
> + res[0] = dst[0] << src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psllq (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
> new file mode 100644
> index 00000000000..48d7e5f2e31
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
> @@ -0,0 +1,244 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psllwi (long long *ll1, unsigned int imm, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + switch (imm)
> + {
> + case 0:
> + *(__m64 *) r = _m_psllqi (t1, 0);
> + break;
> + case 1:
> + *(__m64 *) r = _m_psllqi (t1, 1);
> + break;
> + case 2:
> + *(__m64 *) r = _m_psllqi (t1, 2);
> + break;
> + case 3:
> + *(__m64 *) r = _m_psllqi (t1, 3);
> + break;
> + case 4:
> + *(__m64 *) r = _m_psllqi (t1, 4);
> + break;
> + case 5:
> + *(__m64 *) r = _m_psllqi (t1, 5);
> + break;
> + case 6:
> + *(__m64 *) r = _m_psllqi (t1, 6);
> + break;
> + case 7:
> + *(__m64 *) r = _m_psllqi (t1, 7);
> + break;
> + case 8:
> + *(__m64 *) r = _m_psllqi (t1, 8);
> + break;
> + case 9:
> + *(__m64 *) r = _m_psllqi (t1, 9);
> + break;
> + case 10:
> + *(__m64 *) r = _m_psllqi (t1, 10);
> + break;
> + case 11:
> + *(__m64 *) r = _m_psllqi (t1, 11);
> + break;
> + case 12:
> + *(__m64 *) r = _m_psllqi (t1, 12);
> + break;
> + case 13:
> + *(__m64 *) r = _m_psllqi (t1, 13);
> + break;
> + case 14:
> + *(__m64 *) r = _m_psllqi (t1, 14);
> + break;
> + case 15:
> + *(__m64 *) r = _m_psllqi (t1, 15);
> + break;
> + case 16:
> + *(__m64 *) r = _m_psllqi (t1, 16);
> + break;
> + case 17:
> + *(__m64 *) r = _m_psllqi (t1, 17);
> + break;
> + case 18:
> + *(__m64 *) r = _m_psllqi (t1, 18);
> + break;
> + case 19:
> + *(__m64 *) r = _m_psllqi (t1, 19);
> + break;
> + case 20:
> + *(__m64 *) r = _m_psllqi (t1, 20);
> + break;
> + case 21:
> + *(__m64 *) r = _m_psllqi (t1, 21);
> + break;
> + case 22:
> + *(__m64 *) r = _m_psllqi (t1, 22);
> + break;
> + case 23:
> + *(__m64 *) r = _m_psllqi (t1, 23);
> + break;
> + case 24:
> + *(__m64 *) r = _m_psllqi (t1, 24);
> + break;
> + case 25:
> + *(__m64 *) r = _m_psllqi (t1, 25);
> + break;
> + case 26:
> + *(__m64 *) r = _m_psllqi (t1, 26);
> + break;
> + case 27:
> + *(__m64 *) r = _m_psllqi (t1, 27);
> + break;
> + case 28:
> + *(__m64 *) r = _m_psllqi (t1, 28);
> + break;
> + case 29:
> + *(__m64 *) r = _m_psllqi (t1, 29);
> + break;
> + case 30:
> + *(__m64 *) r = _m_psllqi (t1, 30);
> + break;
> + case 31:
> + *(__m64 *) r = _m_psllqi (t1, 31);
> + break;
> + case 32:
> + *(__m64 *) r = _m_psllqi (t1, 32);
> + break;
> + case 33:
> + *(__m64 *) r = _m_psllqi (t1, 33);
> + break;
> + case 34:
> + *(__m64 *) r = _m_psllqi (t1, 34);
> + break;
> + case 35:
> + *(__m64 *) r = _m_psllqi (t1, 35);
> + break;
> + case 36:
> + *(__m64 *) r = _m_psllqi (t1, 36);
> + break;
> + case 37:
> + *(__m64 *) r = _m_psllqi (t1, 37);
> + break;
> + case 38:
> + *(__m64 *) r = _m_psllqi (t1, 38);
> + break;
> + case 39:
> + *(__m64 *) r = _m_psllqi (t1, 39);
> + break;
> + case 40:
> + *(__m64 *) r = _m_psllqi (t1, 40);
> + break;
> + case 41:
> + *(__m64 *) r = _m_psllqi (t1, 41);
> + break;
> + case 42:
> + *(__m64 *) r = _m_psllqi (t1, 42);
> + break;
> + case 43:
> + *(__m64 *) r = _m_psllqi (t1, 43);
> + break;
> + case 44:
> + *(__m64 *) r = _m_psllqi (t1, 44);
> + break;
> + case 45:
> + *(__m64 *) r = _m_psllqi (t1, 45);
> + break;
> + case 46:
> + *(__m64 *) r = _m_psllqi (t1, 46);
> + break;
> + case 47:
> + *(__m64 *) r = _m_psllqi (t1, 47);
> + break;
> + case 48:
> + *(__m64 *) r = _m_psllqi (t1, 48);
> + break;
> + case 49:
> + *(__m64 *) r = _m_psllqi (t1, 49);
> + break;
> + case 50:
> + *(__m64 *) r = _m_psllqi (t1, 50);
> + break;
> + case 51:
> + *(__m64 *) r = _m_psllqi (t1, 51);
> + break;
> + case 52:
> + *(__m64 *) r = _m_psllqi (t1, 52);
> + break;
> + case 53:
> + *(__m64 *) r = _m_psllqi (t1, 53);
> + break;
> + case 54:
> + *(__m64 *) r = _m_psllqi (t1, 54);
> + break;
> + case 55:
> + *(__m64 *) r = _m_psllqi (t1, 55);
> + break;
> + case 56:
> + *(__m64 *) r = _m_psllqi (t1, 56);
> + break;
> + case 57:
> + *(__m64 *) r = _m_psllqi (t1, 57);
> + break;
> + case 58:
> + *(__m64 *) r = _m_psllqi (t1, 58);
> + break;
> + case 59:
> + *(__m64 *) r = _m_psllqi (t1, 59);
> + break;
> + case 60:
> + *(__m64 *) r = _m_psllqi (t1, 60);
> + break;
> + case 61:
> + *(__m64 *) r = _m_psllqi (t1, 61);
> + break;
> + case 62:
> + *(__m64 *) r = _m_psllqi (t1, 62);
> + break;
> + case 63:
> + *(__m64 *) r = _m_psllqi (t1, 63);
> + break;
> + default:
> + *(__m64 *) r = _m_psllqi (t1, 64);
> + break;
> + }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (unsigned long long *src, unsigned int imm,
> + unsigned long long *res)
> +{
> + int i;
> + if (imm > 63)
> + res[0] = 0;
> + else
> + res[0] = src[0] << imm;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + unsigned int count;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + count = MMXops[i];
> + test_psllwi (&MMXops[i], count, &r);
> + compute_correct_result (&MMXops[i], count, &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
> new file mode 100644
> index 00000000000..10d1b79bf26
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psllw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psllw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned short *dst = (unsigned short *) dst_p;
> + unsigned int *src = (unsigned int *) src_p;
> + unsigned short *res = (unsigned short *) res_p;
> + int i;
> + if (src[1] || src[0] > 15)
> + for (i = 0; i < 4; i++)
> + res[i] = 0;
> + else
> + for (i = 0; i < 4; i++)
> + res[i] = dst[i] << src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psllw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
> new file mode 100644
> index 00000000000..373fa5c146b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
> @@ -0,0 +1,104 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psllwi (long long *ll1, unsigned int imm, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + switch (imm)
> + {
> + case 0:
> + *(__m64 *) r = _m_psllwi (t1, 0);
> + break;
> + case 1:
> + *(__m64 *) r = _m_psllwi (t1, 1);
> + break;
> + case 2:
> + *(__m64 *) r = _m_psllwi (t1, 2);
> + break;
> + case 3:
> + *(__m64 *) r = _m_psllwi (t1, 3);
> + break;
> + case 4:
> + *(__m64 *) r = _m_psllwi (t1, 4);
> + break;
> + case 5:
> + *(__m64 *) r = _m_psllwi (t1, 5);
> + break;
> + case 6:
> + *(__m64 *) r = _m_psllwi (t1, 6);
> + break;
> + case 7:
> + *(__m64 *) r = _m_psllwi (t1, 7);
> + break;
> + case 8:
> + *(__m64 *) r = _m_psllwi (t1, 8);
> + break;
> + case 9:
> + *(__m64 *) r = _m_psllwi (t1, 9);
> + break;
> + case 10:
> + *(__m64 *) r = _m_psllwi (t1, 10);
> + break;
> + case 11:
> + *(__m64 *) r = _m_psllwi (t1, 11);
> + break;
> + case 12:
> + *(__m64 *) r = _m_psllwi (t1, 12);
> + break;
> + case 13:
> + *(__m64 *) r = _m_psllwi (t1, 13);
> + break;
> + case 14:
> + *(__m64 *) r = _m_psllwi (t1, 14);
> + break;
> + case 15:
> + *(__m64 *) r = _m_psllwi (t1, 15);
> + break;
> + default:
> + *(__m64 *) r = _m_psllwi (t1, 16);
> + break;
> + }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *src_p, unsigned int imm,
> + long long *res_p)
> +{
> + unsigned short *src = (unsigned short *) src_p;
> + unsigned short *res = (unsigned short *) res_p;
> + int i;
> + if (imm > 15)
> + for (i = 0; i < 4; i++)
> + res[i] = 0;
> + else
> + for (i = 0; i < 4; i++)
> + res[i] = src[i] << imm;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + unsigned int count;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + count = MMXops[i];
> + test_psllwi (&MMXops[i], count, &r);
> + compute_correct_result (&MMXops[i], count, &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
> new file mode 100644
> index 00000000000..a9d41c273cc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psrad (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psrad (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + int *dst = (int *) dst_p;
> + unsigned int *src = (unsigned int *) src_p;
> + int *res = (int *) res_p;
> + int i;
> + if (src[1] || src[0] > 31)
> + for (i = 0; i < 2; i++)
> + res[i] = dst[i] < 0 ? -1 : 0;
> + else
> + for (i = 0; i < 2; i++)
> + res[i] = dst[i] >> src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psrad (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
> new file mode 100644
> index 00000000000..8237250c48f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
> @@ -0,0 +1,152 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psradi (long long *ll1, unsigned int imm, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + switch (imm)
> + {
> + case 0:
> + *(__m64 *) r = _m_psradi (t1, 0);
> + break;
> + case 1:
> + *(__m64 *) r = _m_psradi (t1, 1);
> + break;
> + case 2:
> + *(__m64 *) r = _m_psradi (t1, 2);
> + break;
> + case 3:
> + *(__m64 *) r = _m_psradi (t1, 3);
> + break;
> + case 4:
> + *(__m64 *) r = _m_psradi (t1, 4);
> + break;
> + case 5:
> + *(__m64 *) r = _m_psradi (t1, 5);
> + break;
> + case 6:
> + *(__m64 *) r = _m_psradi (t1, 6);
> + break;
> + case 7:
> + *(__m64 *) r = _m_psradi (t1, 7);
> + break;
> + case 8:
> + *(__m64 *) r = _m_psradi (t1, 8);
> + break;
> + case 9:
> + *(__m64 *) r = _m_psradi (t1, 9);
> + break;
> + case 10:
> + *(__m64 *) r = _m_psradi (t1, 10);
> + break;
> + case 11:
> + *(__m64 *) r = _m_psradi (t1, 11);
> + break;
> + case 12:
> + *(__m64 *) r = _m_psradi (t1, 12);
> + break;
> + case 13:
> + *(__m64 *) r = _m_psradi (t1, 13);
> + break;
> + case 14:
> + *(__m64 *) r = _m_psradi (t1, 14);
> + break;
> + case 15:
> + *(__m64 *) r = _m_psradi (t1, 15);
> + break;
> + case 16:
> + *(__m64 *) r = _m_psradi (t1, 16);
> + break;
> + case 17:
> + *(__m64 *) r = _m_psradi (t1, 17);
> + break;
> + case 18:
> + *(__m64 *) r = _m_psradi (t1, 18);
> + break;
> + case 19:
> + *(__m64 *) r = _m_psradi (t1, 19);
> + break;
> + case 20:
> + *(__m64 *) r = _m_psradi (t1, 20);
> + break;
> + case 21:
> + *(__m64 *) r = _m_psradi (t1, 21);
> + break;
> + case 22:
> + *(__m64 *) r = _m_psradi (t1, 22);
> + break;
> + case 23:
> + *(__m64 *) r = _m_psradi (t1, 23);
> + break;
> + case 24:
> + *(__m64 *) r = _m_psradi (t1, 24);
> + break;
> + case 25:
> + *(__m64 *) r = _m_psradi (t1, 25);
> + break;
> + case 26:
> + *(__m64 *) r = _m_psradi (t1, 26);
> + break;
> + case 27:
> + *(__m64 *) r = _m_psradi (t1, 27);
> + break;
> + case 28:
> + *(__m64 *) r = _m_psradi (t1, 28);
> + break;
> + case 29:
> + *(__m64 *) r = _m_psradi (t1, 29);
> + break;
> + case 30:
> + *(__m64 *) r = _m_psradi (t1, 30);
> + break;
> + case 31:
> + *(__m64 *) r = _m_psradi (t1, 31);
> + break;
> + default:
> + *(__m64 *) r = _m_psradi (t1, 32);
> + break;
> + }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *src_p, unsigned int imm,
> + long long *res_p)
> +{
> + int *src = (int *) src_p;
> + int *res = (int *) res_p;
> + int i;
> + if (imm > 31)
> + for (i = 0; i < 2; i++)
> + res[i] = src[i] < 0 ? -1 : 0;
> + else
> + for (i = 0; i < 2; i++)
> + res[i] = src[i] >> imm;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + unsigned int count;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + count = MMXops[i];
> + test_psradi (&MMXops[i], count, &r);
> + compute_correct_result (&MMXops[i], count, &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
> new file mode 100644
> index 00000000000..3fed516b811
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psraw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psraw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + unsigned int *src = (unsigned int *) src_p;
> + short *res = (short *) res_p;
> + int i;
> + if (src[1] || src[0] > 15)
> + for (i = 0; i < 4; i++)
> + res[i] = dst[i] < 0 ? -1 : 0;
> + else
> + for (i = 0; i < 4; i++)
> + res[i] = dst[i] >> src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psraw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
> new file mode 100644
> index 00000000000..1c8973db3db
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
> @@ -0,0 +1,104 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psrawi (long long *ll1, unsigned int imm, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + switch (imm)
> + {
> + case 0:
> + *(__m64 *) r = _m_psrawi (t1, 0);
> + break;
> + case 1:
> + *(__m64 *) r = _m_psrawi (t1, 1);
> + break;
> + case 2:
> + *(__m64 *) r = _m_psrawi (t1, 2);
> + break;
> + case 3:
> + *(__m64 *) r = _m_psrawi (t1, 3);
> + break;
> + case 4:
> + *(__m64 *) r = _m_psrawi (t1, 4);
> + break;
> + case 5:
> + *(__m64 *) r = _m_psrawi (t1, 5);
> + break;
> + case 6:
> + *(__m64 *) r = _m_psrawi (t1, 6);
> + break;
> + case 7:
> + *(__m64 *) r = _m_psrawi (t1, 7);
> + break;
> + case 8:
> + *(__m64 *) r = _m_psrawi (t1, 8);
> + break;
> + case 9:
> + *(__m64 *) r = _m_psrawi (t1, 9);
> + break;
> + case 10:
> + *(__m64 *) r = _m_psrawi (t1, 10);
> + break;
> + case 11:
> + *(__m64 *) r = _m_psrawi (t1, 11);
> + break;
> + case 12:
> + *(__m64 *) r = _m_psrawi (t1, 12);
> + break;
> + case 13:
> + *(__m64 *) r = _m_psrawi (t1, 13);
> + break;
> + case 14:
> + *(__m64 *) r = _m_psrawi (t1, 14);
> + break;
> + case 15:
> + *(__m64 *) r = _m_psrawi (t1, 15);
> + break;
> + default:
> + *(__m64 *) r = _m_psrawi (t1, 16);
> + break;
> + }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *src_p, unsigned int imm,
> + long long *res_p)
> +{
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + int i;
> + if (imm > 15)
> + for (i = 0; i < 4; i++)
> + res[i] = src[i] < 0 ? -1 : 0;
> + else
> + for (i = 0; i < 4; i++)
> + res[i] = src[i] >> imm;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + unsigned int count;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + count = MMXops[i];
> + test_psrawi (&MMXops[i], count, &r);
> + compute_correct_result (&MMXops[i], count, &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
> new file mode 100644
> index 00000000000..b7c9565cb24
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psrld (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psrld (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + int *dst = (int *) dst_p;
> + unsigned int *src = (unsigned int *) src_p;
> + int *res = (int *) res_p;
> + int i;
> + if (src[1] || src[0] > 31)
> + for (i = 0; i < 2; i++)
> + res[i] = 0;
> + else
> + for (i = 0; i < 2; i++)
> + res[i] = dst[i] >> src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psrld (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
> new file mode 100644
> index 00000000000..6a150ee2eff
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
> @@ -0,0 +1,152 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psrldi (long long *ll1, unsigned int imm, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + switch (imm)
> + {
> + case 0:
> + *(__m64 *) r = _m_psrldi (t1, 0);
> + break;
> + case 1:
> + *(__m64 *) r = _m_psrldi (t1, 1);
> + break;
> + case 2:
> + *(__m64 *) r = _m_psrldi (t1, 2);
> + break;
> + case 3:
> + *(__m64 *) r = _m_psrldi (t1, 3);
> + break;
> + case 4:
> + *(__m64 *) r = _m_psrldi (t1, 4);
> + break;
> + case 5:
> + *(__m64 *) r = _m_psrldi (t1, 5);
> + break;
> + case 6:
> + *(__m64 *) r = _m_psrldi (t1, 6);
> + break;
> + case 7:
> + *(__m64 *) r = _m_psrldi (t1, 7);
> + break;
> + case 8:
> + *(__m64 *) r = _m_psrldi (t1, 8);
> + break;
> + case 9:
> + *(__m64 *) r = _m_psrldi (t1, 9);
> + break;
> + case 10:
> + *(__m64 *) r = _m_psrldi (t1, 10);
> + break;
> + case 11:
> + *(__m64 *) r = _m_psrldi (t1, 11);
> + break;
> + case 12:
> + *(__m64 *) r = _m_psrldi (t1, 12);
> + break;
> + case 13:
> + *(__m64 *) r = _m_psrldi (t1, 13);
> + break;
> + case 14:
> + *(__m64 *) r = _m_psrldi (t1, 14);
> + break;
> + case 15:
> + *(__m64 *) r = _m_psrldi (t1, 15);
> + break;
> + case 16:
> + *(__m64 *) r = _m_psrldi (t1, 16);
> + break;
> + case 17:
> + *(__m64 *) r = _m_psrldi (t1, 17);
> + break;
> + case 18:
> + *(__m64 *) r = _m_psrldi (t1, 18);
> + break;
> + case 19:
> + *(__m64 *) r = _m_psrldi (t1, 19);
> + break;
> + case 20:
> + *(__m64 *) r = _m_psrldi (t1, 20);
> + break;
> + case 21:
> + *(__m64 *) r = _m_psrldi (t1, 21);
> + break;
> + case 22:
> + *(__m64 *) r = _m_psrldi (t1, 22);
> + break;
> + case 23:
> + *(__m64 *) r = _m_psrldi (t1, 23);
> + break;
> + case 24:
> + *(__m64 *) r = _m_psrldi (t1, 24);
> + break;
> + case 25:
> + *(__m64 *) r = _m_psrldi (t1, 25);
> + break;
> + case 26:
> + *(__m64 *) r = _m_psrldi (t1, 26);
> + break;
> + case 27:
> + *(__m64 *) r = _m_psrldi (t1, 27);
> + break;
> + case 28:
> + *(__m64 *) r = _m_psrldi (t1, 28);
> + break;
> + case 29:
> + *(__m64 *) r = _m_psrldi (t1, 29);
> + break;
> + case 30:
> + *(__m64 *) r = _m_psrldi (t1, 30);
> + break;
> + case 31:
> + *(__m64 *) r = _m_psrldi (t1, 31);
> + break;
> + default:
> + *(__m64 *) r = _m_psrldi (t1, 32);
> + break;
> + }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *src_p, unsigned int imm,
> + long long *res_p)
> +{
> + int *src = (int *) src_p;
> + int *res = (int *) res_p;
> + int i;
> + if (imm > 31)
> + for (i = 0; i < 2; i++)
> + res[i] = 0;
> + else
> + for (i = 0; i < 2; i++)
> + res[i] = src[i] >> imm;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + unsigned int count;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + count = MMXops[i];
> + test_psrldi (&MMXops[i], count, &r);
> + compute_correct_result (&MMXops[i], count, &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
> new file mode 100644
> index 00000000000..c9fa8b45671
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psrlq (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psrlq (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (unsigned long long *dst,
> + unsigned long long *src,
> + unsigned long long *res)
> +{
> + if (src[0] > 63)
> + res[0] = 0;
> + else
> + res[0] = dst[0] >> src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psrlq (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
> new file mode 100644
> index 00000000000..bdbecd6ab6b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
> @@ -0,0 +1,244 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psllwi (long long *ll1, unsigned int imm, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + switch (imm)
> + {
> + case 0:
> + *(__m64 *) r = _m_psrlqi (t1, 0);
> + break;
> + case 1:
> + *(__m64 *) r = _m_psrlqi (t1, 1);
> + break;
> + case 2:
> + *(__m64 *) r = _m_psrlqi (t1, 2);
> + break;
> + case 3:
> + *(__m64 *) r = _m_psrlqi (t1, 3);
> + break;
> + case 4:
> + *(__m64 *) r = _m_psrlqi (t1, 4);
> + break;
> + case 5:
> + *(__m64 *) r = _m_psrlqi (t1, 5);
> + break;
> + case 6:
> + *(__m64 *) r = _m_psrlqi (t1, 6);
> + break;
> + case 7:
> + *(__m64 *) r = _m_psrlqi (t1, 7);
> + break;
> + case 8:
> + *(__m64 *) r = _m_psrlqi (t1, 8);
> + break;
> + case 9:
> + *(__m64 *) r = _m_psrlqi (t1, 9);
> + break;
> + case 10:
> + *(__m64 *) r = _m_psrlqi (t1, 10);
> + break;
> + case 11:
> + *(__m64 *) r = _m_psrlqi (t1, 11);
> + break;
> + case 12:
> + *(__m64 *) r = _m_psrlqi (t1, 12);
> + break;
> + case 13:
> + *(__m64 *) r = _m_psrlqi (t1, 13);
> + break;
> + case 14:
> + *(__m64 *) r = _m_psrlqi (t1, 14);
> + break;
> + case 15:
> + *(__m64 *) r = _m_psrlqi (t1, 15);
> + break;
> + case 16:
> + *(__m64 *) r = _m_psrlqi (t1, 16);
> + break;
> + case 17:
> + *(__m64 *) r = _m_psrlqi (t1, 17);
> + break;
> + case 18:
> + *(__m64 *) r = _m_psrlqi (t1, 18);
> + break;
> + case 19:
> + *(__m64 *) r = _m_psrlqi (t1, 19);
> + break;
> + case 20:
> + *(__m64 *) r = _m_psrlqi (t1, 20);
> + break;
> + case 21:
> + *(__m64 *) r = _m_psrlqi (t1, 21);
> + break;
> + case 22:
> + *(__m64 *) r = _m_psrlqi (t1, 22);
> + break;
> + case 23:
> + *(__m64 *) r = _m_psrlqi (t1, 23);
> + break;
> + case 24:
> + *(__m64 *) r = _m_psrlqi (t1, 24);
> + break;
> + case 25:
> + *(__m64 *) r = _m_psrlqi (t1, 25);
> + break;
> + case 26:
> + *(__m64 *) r = _m_psrlqi (t1, 26);
> + break;
> + case 27:
> + *(__m64 *) r = _m_psrlqi (t1, 27);
> + break;
> + case 28:
> + *(__m64 *) r = _m_psrlqi (t1, 28);
> + break;
> + case 29:
> + *(__m64 *) r = _m_psrlqi (t1, 29);
> + break;
> + case 30:
> + *(__m64 *) r = _m_psrlqi (t1, 30);
> + break;
> + case 31:
> + *(__m64 *) r = _m_psrlqi (t1, 31);
> + break;
> + case 32:
> + *(__m64 *) r = _m_psrlqi (t1, 32);
> + break;
> + case 33:
> + *(__m64 *) r = _m_psrlqi (t1, 33);
> + break;
> + case 34:
> + *(__m64 *) r = _m_psrlqi (t1, 34);
> + break;
> + case 35:
> + *(__m64 *) r = _m_psrlqi (t1, 35);
> + break;
> + case 36:
> + *(__m64 *) r = _m_psrlqi (t1, 36);
> + break;
> + case 37:
> + *(__m64 *) r = _m_psrlqi (t1, 37);
> + break;
> + case 38:
> + *(__m64 *) r = _m_psrlqi (t1, 38);
> + break;
> + case 39:
> + *(__m64 *) r = _m_psrlqi (t1, 39);
> + break;
> + case 40:
> + *(__m64 *) r = _m_psrlqi (t1, 40);
> + break;
> + case 41:
> + *(__m64 *) r = _m_psrlqi (t1, 41);
> + break;
> + case 42:
> + *(__m64 *) r = _m_psrlqi (t1, 42);
> + break;
> + case 43:
> + *(__m64 *) r = _m_psrlqi (t1, 43);
> + break;
> + case 44:
> + *(__m64 *) r = _m_psrlqi (t1, 44);
> + break;
> + case 45:
> + *(__m64 *) r = _m_psrlqi (t1, 45);
> + break;
> + case 46:
> + *(__m64 *) r = _m_psrlqi (t1, 46);
> + break;
> + case 47:
> + *(__m64 *) r = _m_psrlqi (t1, 47);
> + break;
> + case 48:
> + *(__m64 *) r = _m_psrlqi (t1, 48);
> + break;
> + case 49:
> + *(__m64 *) r = _m_psrlqi (t1, 49);
> + break;
> + case 50:
> + *(__m64 *) r = _m_psrlqi (t1, 50);
> + break;
> + case 51:
> + *(__m64 *) r = _m_psrlqi (t1, 51);
> + break;
> + case 52:
> + *(__m64 *) r = _m_psrlqi (t1, 52);
> + break;
> + case 53:
> + *(__m64 *) r = _m_psrlqi (t1, 53);
> + break;
> + case 54:
> + *(__m64 *) r = _m_psrlqi (t1, 54);
> + break;
> + case 55:
> + *(__m64 *) r = _m_psrlqi (t1, 55);
> + break;
> + case 56:
> + *(__m64 *) r = _m_psrlqi (t1, 56);
> + break;
> + case 57:
> + *(__m64 *) r = _m_psrlqi (t1, 57);
> + break;
> + case 58:
> + *(__m64 *) r = _m_psrlqi (t1, 58);
> + break;
> + case 59:
> + *(__m64 *) r = _m_psrlqi (t1, 59);
> + break;
> + case 60:
> + *(__m64 *) r = _m_psrlqi (t1, 60);
> + break;
> + case 61:
> + *(__m64 *) r = _m_psrlqi (t1, 61);
> + break;
> + case 62:
> + *(__m64 *) r = _m_psrlqi (t1, 62);
> + break;
> + case 63:
> + *(__m64 *) r = _m_psrlqi (t1, 63);
> + break;
> + default:
> + *(__m64 *) r = _m_psrlqi (t1, 64);
> + break;
> + }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (unsigned long long *src, unsigned int imm,
> + unsigned long long *res)
> +{
> + int i;
> + if (imm > 63)
> + res[0] = 0;
> + else
> + res[0] = src[0] >> imm;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + unsigned int count;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + count = MMXops[i];
> + test_psllwi (&MMXops[i], count, &r);
> + compute_correct_result (&MMXops[i], count, &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
> new file mode 100644
> index 00000000000..6382448b1a6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psrlw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psrlw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + unsigned int *src = (unsigned int *) src_p;
> + short *res = (short *) res_p;
> + int i;
> + if (src[1] || src[0] > 15)
> + for (i = 0; i < 4; i++)
> + res[i] = 0;
> + else
> + for (i = 0; i < 4; i++)
> + res[i] = dst[i] >> src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psrlw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
> new file mode 100644
> index 00000000000..98c6df35e5f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
> @@ -0,0 +1,104 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psrlwi (long long *ll1, unsigned int imm, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + switch (imm)
> + {
> + case 0:
> + *(__m64 *) r = _m_psrlwi (t1, 0);
> + break;
> + case 1:
> + *(__m64 *) r = _m_psrlwi (t1, 1);
> + break;
> + case 2:
> + *(__m64 *) r = _m_psrlwi (t1, 2);
> + break;
> + case 3:
> + *(__m64 *) r = _m_psrlwi (t1, 3);
> + break;
> + case 4:
> + *(__m64 *) r = _m_psrlwi (t1, 4);
> + break;
> + case 5:
> + *(__m64 *) r = _m_psrlwi (t1, 5);
> + break;
> + case 6:
> + *(__m64 *) r = _m_psrlwi (t1, 6);
> + break;
> + case 7:
> + *(__m64 *) r = _m_psrlwi (t1, 7);
> + break;
> + case 8:
> + *(__m64 *) r = _m_psrlwi (t1, 8);
> + break;
> + case 9:
> + *(__m64 *) r = _m_psrlwi (t1, 9);
> + break;
> + case 10:
> + *(__m64 *) r = _m_psrlwi (t1, 10);
> + break;
> + case 11:
> + *(__m64 *) r = _m_psrlwi (t1, 11);
> + break;
> + case 12:
> + *(__m64 *) r = _m_psrlwi (t1, 12);
> + break;
> + case 13:
> + *(__m64 *) r = _m_psrlwi (t1, 13);
> + break;
> + case 14:
> + *(__m64 *) r = _m_psrlwi (t1, 14);
> + break;
> + case 15:
> + *(__m64 *) r = _m_psrlwi (t1, 15);
> + break;
> + default:
> + *(__m64 *) r = _m_psrlwi (t1, 16);
> + break;
> + }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *src_p, unsigned int imm,
> + long long *res_p)
> +{
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + int i;
> + if (imm > 15)
> + for (i = 0; i < 4; i++)
> + res[i] = 0;
> + else
> + for (i = 0; i < 4; i++)
> + res[i] = src[i] >> imm;
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + unsigned int count;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i++)
> + {
> + count = MMXops[i];
> + test_psrlwi (&MMXops[i], count, &r);
> + compute_correct_result (&MMXops[i], count, &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
> new file mode 100644
> index 00000000000..b3637353879
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psubb (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psubb (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + char *dst = (char *) dst_p;
> + char *src = (char *) src_p;
> + char *res = (char *) res_p;
> + int i;
> + for (i = 0; i < 8; i++)
> + res[i] = dst[i] - src[i];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psubb (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
> new file mode 100644
> index 00000000000..b091d7f590f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psubd (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psubd (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + int *dst = (int *) dst_p;
> + int *src = (int *) src_p;
> + int *res = (int *) res_p;
> + int i;
> + for (i = 0; i < 2; i++)
> + res[i] = dst[i] - src[i];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psubd (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
> new file mode 100644
> index 00000000000..767bf8ea303
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psubq (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _mm_sub_si64 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + res_p[0] = dst_p[0] - src_p[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psubq (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
> new file mode 100644
> index 00000000000..29a5f708e12
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psubusb (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psubusb (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned char *dst = (unsigned char *) dst_p;
> + unsigned char *src = (unsigned char *) src_p;
> + unsigned char *res = (unsigned char *) res_p;
> + int i;
> + for (i = 0; i < 8; i++)
> + res[i] = saturate_ub (dst[i] - src[i]);
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psubusb (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
> new file mode 100644
> index 00000000000..279051f7303
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psubusw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psubusw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + unsigned short *dst = (unsigned short *) dst_p;
> + unsigned short *src = (unsigned short *) src_p;
> + unsigned short *res = (unsigned short *) res_p;
> + int i;
> + for (i = 0; i < 4; i++)
> + res[i] = saturate_uw (dst[i] - src[i]);
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psubusw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
> new file mode 100644
> index 00000000000..dde5fce50ad
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
> @@ -0,0 +1,47 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_psubw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_psubw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + int i;
> + for (i = 0; i < 4; i++)
> + res[i] = dst[i] - src[i];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_psubw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
> new file mode 100644
> index 00000000000..5059d74d6c5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
> @@ -0,0 +1,52 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_punpckhbw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_punpckhbw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + char *dst = (char *) dst_p;
> + char *src = (char *) src_p;
> + char *res = (char *) res_p;
> + res[0] = dst[4];
> + res[1] = src[4];
> + res[2] = dst[5];
> + res[3] = src[5];
> + res[4] = dst[6];
> + res[5] = src[6];
> + res[6] = dst[7];
> + res[7] = src[7];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_punpckhbw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
> new file mode 100644
> index 00000000000..9c4690dee0b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_punpckhdq (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_punpckhdq (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + int *dst = (int *) dst_p;
> + int *src = (int *) src_p;
> + int *res = (int *) res_p;
> + res[0] = dst[1];
> + res[1] = src[1];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_punpckhdq (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
> new file mode 100644
> index 00000000000..7525a2bba63
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_punpckhwd (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_punpckhwd (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + res[0] = dst[2];
> + res[1] = src[2];
> + res[2] = dst[3];
> + res[3] = src[3];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_punpckhwd (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
> new file mode 100644
> index 00000000000..14bdc433ed3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
> @@ -0,0 +1,52 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_punpcklbw (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_punpcklbw (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + char *dst = (char *) dst_p;
> + char *src = (char *) src_p;
> + char *res = (char *) res_p;
> + res[0] = dst[0];
> + res[1] = src[0];
> + res[2] = dst[1];
> + res[3] = src[1];
> + res[4] = dst[2];
> + res[5] = src[2];
> + res[6] = dst[3];
> + res[7] = src[3];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_punpcklbw (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
> new file mode 100644
> index 00000000000..1d8a932ba7c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_punpckldq (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_punpckldq (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + int *dst = (int *) dst_p;
> + int *src = (int *) src_p;
> + int *res = (int *) res_p;
> + res[0] = dst[0];
> + res[1] = src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_punpckldq (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
> new file mode 100644
> index 00000000000..6b2a9d56a89
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_punpcklwd (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_punpcklwd (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (long long *dst_p, long long *src_p,
> + long long *res_p)
> +{
> + short *dst = (short *) dst_p;
> + short *src = (short *) src_p;
> + short *res = (short *) res_p;
> + res[0] = dst[0];
> + res[1] = src[0];
> + res[2] = dst[1];
> + res[3] = src[1];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_punpcklwd (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c
> new file mode 100644
> index 00000000000..7858c2f6856
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c
> @@ -0,0 +1,43 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
> +
> +#include "sse2-check.h"
> +#include "mmx-vals.h"
> +
> +__attribute__((noinline, noclone))
> +static void
> +test_pxor (long long *ll1, long long *ll2, long long *r)
> +{
> + __m64 t1 = *(__m64 *) ll1;
> + __m64 t2 = *(__m64 *) ll2;
> + *(__m64 *) r = _m_pxor (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (unsigned long long *dst,
> + unsigned long long *src,
> + unsigned long long *res)
> +{
> + res[0] = dst[0] ^ src[0];
> +}
> +
> +static void
> +sse2_test (void)
> +{
> + int i;
> + long long r, ck;
> + int fail = 0;
> +
> + /* Run the MMX tests */
> + for (i = 0; i < MMX_num_ops; i += 2)
> + {
> + test_pxor (&MMXops[i], &MMXops[i + 1], &r);
> + compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
> + if (ck != r)
> + fail++;
> + }
> +
> + if (fail != 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx.c b/gcc/testsuite/gcc.target/i386/sse2-mmx.c
> index fb226a8e8f3..338cb9da289 100644
> --- a/gcc/testsuite/gcc.target/i386/sse2-mmx.c
> +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx.c
> @@ -4,7 +4,6 @@
>
> #include "sse2-check.h"
>
> -#include <mmintrin.h>
>
> #define N 4
>
> --
> 2.20.1
>
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 24/40] i386: Emulate MMX mmx_psadbw with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (22 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 39/40] i386: Add tests for MMX intrinsic emulations " H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 15/40] i386: Emulate MMX sse_cvtpi2ps " H.J. Lu
` (16 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_psadbw with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_psadbw): Add SSE emulation.
---
gcc/config/i386/mmx.md | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e1432edcd3d..0c08aebb071 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1782,14 +1782,19 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_psadbw"
- [(set (match_operand:V1DI 0 "register_operand" "=y")
- (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+ [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
+ (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,x,Yv")]
UNSPEC_PSADBW))]
- "TARGET_SSE || TARGET_3DNOW_A"
- "psadbw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "@
+ psadbw\t{%2, %0|%0, %2}
+ psadbw\t{%2, %0|%0, %2}
+ vpsadbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sseiadd,sseiadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "mmx_pmovmskb"
[(set (match_operand:SI 0 "register_operand" "=r,r")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 15/40] i386: Emulate MMX sse_cvtpi2ps with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (23 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 24/40] i386: Emulate MMX mmx_psadbw " H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 14:14 ` Uros Bizjak
2019-02-14 12:33 ` [PATCH 19/40] i386: Emulate MMX mmx_pmovmskb " H.J. Lu
` (15 subsequent siblings)
40 siblings, 1 reply; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX sse_cvtpi2ps with SSE2 cvtdq2ps, preserving upper 64 bits of
destination XMM register. Only SSE register source operand is allowed.
PR target/89021
* config/i386/mmx.md (sse_cvtpi2ps): Renamed to ...
(*mmx_cvtpi2ps): This. Disabled for TARGET_MMX_WITH_SSE.
(sse_cvtpi2ps): New.
(mmx_cvtpi2ps_sse): Likewise.
---
gcc/config/i386/sse.md | 77 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 75 insertions(+), 2 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 083f9ef0f44..b1bab15af41 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4561,14 +4561,87 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "sse_cvtpi2ps"
+(define_expand "sse_cvtpi2ps"
+ [(set (match_operand:V4SF 0 "register_operand")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand")))
+ (match_operand:V4SF 1 "register_operand")
+ (const_int 3)))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
+{
+ if (TARGET_MMX_WITH_SSE)
+ {
+ rtx op2 = force_reg (V2SImode, operands[2]);
+ emit_insn (gen_mmx_cvtpi2ps_sse (operands[0], operands[1], op2));
+ DONE;
+ }
+})
+
+(define_insn_and_split "mmx_cvtpi2ps_sse"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,Yv")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:V2SF (match_operand:V2SI 2 "register_operand" "x,Yv")))
+ (match_operand:V4SF 1 "register_operand" "0,Yv")
+ (const_int 3)))
+ (clobber (match_scratch:V4SF 3 "=x,Yv"))]
+ "TARGET_MMX_WITH_SSE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op2 = lowpart_subreg (V4SImode, operands[2],
+ GET_MODE (operands[2]));
+ /* Generate SSE2 cvtdq2ps. */
+ rtx insn = gen_floatv4siv4sf2 (operands[3], op2);
+ emit_insn (insn);
+
+ /* Merge operands[3] with operands[0]. */
+ rtx mask, op1;
+ if (TARGET_AVX)
+ {
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (0), GEN_INT (1),
+ GEN_INT (6), GEN_INT (7)));
+ op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
+ op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+ insn = gen_rtx_SET (operands[0], op2);
+ }
+ else
+ {
+ /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (4), GEN_INT (5)));
+ op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
+ op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+ insn = gen_rtx_SET (operands[0], op2);
+ emit_insn (insn);
+
+ /* Swap bits 0:63 with bits 64:127. */
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (1)));
+ rtx dest = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+ op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+ insn = gen_rtx_SET (dest, op1);
+ }
+ emit_insn (insn);
+ DONE;
+}
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*mmx_cvtpi2ps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
(vec_duplicate:V4SF
(float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
(match_operand:V4SF 1 "register_operand" "0")
(const_int 3)))]
- "TARGET_SSE"
+ "TARGET_SSE && !TARGET_MMX_WITH_SSE"
"cvtpi2ps\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 15/40] i386: Emulate MMX sse_cvtpi2ps with SSE
2019-02-14 12:33 ` [PATCH 15/40] i386: Emulate MMX sse_cvtpi2ps " H.J. Lu
@ 2019-02-14 14:14 ` Uros Bizjak
0 siblings, 0 replies; 62+ messages in thread
From: Uros Bizjak @ 2019-02-14 14:14 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 1:30 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Emulate MMX sse_cvtpi2ps with SSE2 cvtdq2ps, preserving upper 64 bits of
> destination XMM register. Only SSE register source operand is allowed.
>
> PR target/89021
> * config/i386/mmx.md (sse_cvtpi2ps): Renamed to ...
> (*mmx_cvtpi2ps): This. Disabled for TARGET_MMX_WITH_SSE.
> (sse_cvtpi2ps): New.
> (mmx_cvtpi2ps_sse): Likewise.
Now you can merge both instructions together using:
(clobber (match_scratch:V4SF 3 "=X,x,Yv"))
Please note "X" for the original case where scratch is not needed.
Uros.
> ---
> gcc/config/i386/sse.md | 77 ++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 75 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 083f9ef0f44..b1bab15af41 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -4561,14 +4561,87 @@
> ;;
> ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>
> -(define_insn "sse_cvtpi2ps"
> +(define_expand "sse_cvtpi2ps"
> + [(set (match_operand:V4SF 0 "register_operand")
> + (vec_merge:V4SF
> + (vec_duplicate:V4SF
> + (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand")))
> + (match_operand:V4SF 1 "register_operand")
> + (const_int 3)))]
> + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
> +{
> + if (TARGET_MMX_WITH_SSE)
> + {
> + rtx op2 = force_reg (V2SImode, operands[2]);
> + emit_insn (gen_mmx_cvtpi2ps_sse (operands[0], operands[1], op2));
> + DONE;
> + }
> +})
> +
> +(define_insn_and_split "mmx_cvtpi2ps_sse"
> + [(set (match_operand:V4SF 0 "register_operand" "=x,Yv")
> + (vec_merge:V4SF
> + (vec_duplicate:V4SF
> + (float:V2SF (match_operand:V2SI 2 "register_operand" "x,Yv")))
> + (match_operand:V4SF 1 "register_operand" "0,Yv")
> + (const_int 3)))
> + (clobber (match_scratch:V4SF 3 "=x,Yv"))]
> + "TARGET_MMX_WITH_SSE"
> + "#"
> + "&& reload_completed"
> + [(const_int 0)]
> +{
> + rtx op2 = lowpart_subreg (V4SImode, operands[2],
> + GET_MODE (operands[2]));
> + /* Generate SSE2 cvtdq2ps. */
> + rtx insn = gen_floatv4siv4sf2 (operands[3], op2);
> + emit_insn (insn);
> +
> + /* Merge operands[3] with operands[0]. */
> + rtx mask, op1;
> + if (TARGET_AVX)
> + {
> + mask = gen_rtx_PARALLEL (VOIDmode,
> + gen_rtvec (4, GEN_INT (0), GEN_INT (1),
> + GEN_INT (6), GEN_INT (7)));
> + op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
> + op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
> + insn = gen_rtx_SET (operands[0], op2);
> + }
> + else
> + {
> + /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
> + mask = gen_rtx_PARALLEL (VOIDmode,
> + gen_rtvec (4, GEN_INT (2), GEN_INT (3),
> + GEN_INT (4), GEN_INT (5)));
> + op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
> + op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
> + insn = gen_rtx_SET (operands[0], op2);
> + emit_insn (insn);
> +
> + /* Swap bits 0:63 with bits 64:127. */
> + mask = gen_rtx_PARALLEL (VOIDmode,
> + gen_rtvec (4, GEN_INT (2), GEN_INT (3),
> + GEN_INT (0), GEN_INT (1)));
> + rtx dest = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> + op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
> + insn = gen_rtx_SET (dest, op1);
> + }
> + emit_insn (insn);
> + DONE;
> +}
> + [(set_attr "isa" "noavx,avx")
> + (set_attr "type" "ssecvt")
> + (set_attr "mode" "V4SF")])
> +
> +(define_insn "*mmx_cvtpi2ps"
> [(set (match_operand:V4SF 0 "register_operand" "=x")
> (vec_merge:V4SF
> (vec_duplicate:V4SF
> (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
> (match_operand:V4SF 1 "register_operand" "0")
> (const_int 3)))]
> - "TARGET_SSE"
> + "TARGET_SSE && !TARGET_MMX_WITH_SSE"
> "cvtpi2ps\t{%2, %0|%0, %2}"
> [(set_attr "type" "ssecvt")
> (set_attr "mode" "V4SF")])
> --
> 2.20.1
>
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 19/40] i386: Emulate MMX mmx_pmovmskb with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (24 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 15/40] i386: Emulate MMX sse_cvtpi2ps " H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 38/40] i386: Enable TM MMX intrinsics with SSE2 H.J. Lu
` (14 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_pmovmskb with SSE by zero-extending result of SSE pmovmskb
from QImode to SImode. Only SSE register source operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_pmovmskb): Changed to
define_insn_and_split to support SSE emulation.
---
gcc/config/i386/mmx.md | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index dcc1bd1becf..9ff0db9c2ed 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1774,14 +1774,30 @@
[(set_attr "type" "mmxshft")
(set_attr "mode" "DI")])
-(define_insn "mmx_pmovmskb"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+(define_insn_and_split "mmx_pmovmskb"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")]
UNSPEC_MOVMSK))]
- "TARGET_SSE || TARGET_3DNOW_A"
- "pmovmskb\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "@
+ pmovmskb\t{%1, %0|%0, %1}
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(set (match_dup 0)
+ (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
+ (set (match_dup 0)
+ (zero_extend:SI (match_dup 2)))]
+{
+ /* Generate SSE pmovmskb and zero-extend from QImode to SImode. */
+ operands[1] = lowpart_subreg (V16QImode, operands[1],
+ GET_MODE (operands[1]));
+ operands[2] = lowpart_subreg (QImode, operands[0],
+ GET_MODE (operands[0]));
+}
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "mmxcvt,ssemov")
+ (set_attr "mode" "DI,TI")])
(define_expand "mmx_maskmovq"
[(set (match_operand:V8QI 0 "memory_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 38/40] i386: Enable TM MMX intrinsics with SSE2
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (25 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 19/40] i386: Emulate MMX mmx_pmovmskb " H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 36/40] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE H.J. Lu
` (13 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
This pach enables TM MMX intrinsics with SSE2 when MMX is disabled.
PR target/89021
* config/i386/i386.c (bdesc_tm): Enable MMX intrinsics with
SSE2.
---
gcc/config/i386/i386.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 1d417e08734..20219983462 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -31075,13 +31075,13 @@ static const struct builtin_description bdesc_##kind[] = \
we're lazy. Add casts to make them fit. */
static const struct builtin_description bdesc_tm[] =
{
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
{ OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
{ OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
@@ -31099,7 +31099,7 @@ static const struct builtin_description bdesc_tm[] =
{ OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
{ OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
{ OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
{ OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
};
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 36/40] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (26 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 38/40] i386: Enable TM MMX intrinsics with SSE2 H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 32/40] i386: Emulate MMX ssse3_psign<mode>3 with SSE H.J. Lu
` (12 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
PR target/89021
* config/i386/i386.c (ix86_expand_vector_init_duplicate): Set
mmx_ok to true if TARGET_MMX_WITH_SSE is true.
(ix86_expand_vector_init_one_nonzero): Likewise.
(ix86_expand_vector_init_one_var): Likewise.
(ix86_expand_vector_init_general): Likewise.
(ix86_expand_vector_init): Likewise.
(ix86_expand_vector_set): Likewise.
(ix86_expand_vector_extract): Likewise.
* config/i386/mmx.md (*vec_dupv2sf): Changed to
define_insn_and_split to support SSE emulation.
(*vec_extractv2sf_0): Likewise.
(*vec_extractv2sf_1): Likewise.
(*vec_extractv2si_0): Likewise.
(*vec_extractv2si_1): Likewise.
(*vec_extractv2si_zext_mem): Likewise.
(vec_setv2sf): Also allow TARGET_MMX_WITH_SSE.
(vec_extractv2sf_1 splitter): Likewise.
(vec_extractv2sfsf): Likewise.
(vec_setv2si): Likewise.
(vec_extractv2si_1 splitter): Likewise.
(vec_extractv2sisi): Likewise.
(vec_setv4hi): Likewise.
(vec_extractv4hihi): Likewise.
(vec_setv8qi): Likewise.
(vec_extractv8qiqi): Likewise.
---
gcc/config/i386/i386.c | 8 +++++
gcc/config/i386/mmx.md | 69 +++++++++++++++++++++++++++---------------
2 files changed, 52 insertions(+), 25 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index dce4038685e..a9abbe8706b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -42625,6 +42625,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
{
bool ok;
+ mmx_ok |= TARGET_MMX_WITH_SSE;
switch (mode)
{
case E_V2SImode:
@@ -42784,6 +42785,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
bool use_vector_set = false;
rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
+ mmx_ok |= TARGET_MMX_WITH_SSE;
switch (mode)
{
case E_V2DImode:
@@ -42977,6 +42979,7 @@ ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
+ mmx_ok |= TARGET_MMX_WITH_SSE;
switch (mode)
{
case E_V2DFmode:
@@ -43362,6 +43365,7 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
machine_mode quarter_mode = VOIDmode;
int n, i;
+ mmx_ok |= TARGET_MMX_WITH_SSE;
switch (mode)
{
case E_V2SFmode:
@@ -43561,6 +43565,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
int i;
rtx x;
+ mmx_ok |= TARGET_MMX_WITH_SSE;
+
/* Handle first initialization from vector elts. */
if (n_elts != XVECLEN (vals, 0))
{
@@ -43660,6 +43666,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
machine_mode mmode = VOIDmode;
rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
+ mmx_ok |= TARGET_MMX_WITH_SSE;
switch (mode)
{
case E_V2SFmode:
@@ -44015,6 +44022,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
bool use_vec_extr = false;
rtx tmp;
+ mmx_ok |= TARGET_MMX_WITH_SSE;
switch (mode)
{
case E_V2SImode:
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 81ee6250051..867d87ce644 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -555,14 +555,23 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
-(define_insn "*vec_dupv2sf"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
(vec_duplicate:V2SF
- (match_operand:SF 1 "register_operand" "0")))]
- "TARGET_MMX"
- "punpckldq\t%0, %0"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ (match_operand:SF 1 "register_operand" "0,0,Yv")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckldq\t%0, %0
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(set (match_dup 0)
+ (vec_duplicate:V4SF (match_dup 1)))]
+ "operands[0] = lowpart_subreg (V4SFmode, operands[0],
+ GET_MODE (operands[0]));"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,ssemov,ssemov")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "*mmx_concatv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=y,y")
@@ -580,7 +589,7 @@
[(match_operand:V2SF 0 "register_operand")
(match_operand:SF 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_set (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -594,11 +603,13 @@
(vec_select:SF
(match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
(parallel [(const_int 0)])))]
- "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 1))]
- "operands[1] = gen_lowpart (SFmode, operands[1]);")
+ "operands[1] = gen_lowpart (SFmode, operands[1]);"
+ [(set_attr "mmx_isa" "*,*,native,native,*,*")])
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.c
@@ -607,7 +618,8 @@
(vec_select:SF
(match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o")
(parallel [(const_int 1)])))]
- "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
punpckhdq\t%0, %0
%vmovshdup\t{%1, %0|%0, %1}
@@ -617,6 +629,7 @@
#
#"
[(set_attr "isa" "*,sse3,noavx,*,*,*,*")
+ (set_attr "mmx_isa" "native,*,*,native,*,*,*")
(set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov")
(set (attr "length_immediate")
(if_then_else (eq_attr "alternative" "2")
@@ -634,7 +647,7 @@
(vec_select:SF
(match_operand:V2SF 1 "memory_operand")
(parallel [(const_int 1)])))]
- "TARGET_MMX && reload_completed"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
[(set (match_dup 0) (match_dup 1))]
"operands[1] = adjust_address (operands[1], SFmode, 4);")
@@ -642,7 +655,7 @@
[(match_operand:SF 0 "register_operand")
(match_operand:V2SF 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_extract (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -1538,7 +1551,7 @@
[(match_operand:V2SI 0 "register_operand")
(match_operand:SI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_set (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -1552,11 +1565,13 @@
(vec_select:SI
(match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m")
(parallel [(const_int 0)])))]
- "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 1))]
- "operands[1] = gen_lowpart (SImode, operands[1]);")
+ "operands[1] = gen_lowpart (SImode, operands[1]);"
+ [(set_attr "mmx_isa" "*,*,native,native,*")])
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.c
@@ -1565,7 +1580,8 @@
(vec_select:SI
(match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o")
(parallel [(const_int 1)])))]
- "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
punpckhdq\t%0, %0
%vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5}
@@ -1574,6 +1590,7 @@
#
#"
[(set_attr "isa" "*,sse2,noavx,*,*,*")
+ (set_attr "mmx_isa" "native,*,*,native,*,*")
(set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov")
(set (attr "length_immediate")
(if_then_else (eq_attr "alternative" "1,2")
@@ -1587,7 +1604,7 @@
(vec_select:SI
(match_operand:V2SI 1 "memory_operand")
(parallel [(const_int 1)])))]
- "TARGET_MMX && reload_completed"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
[(set (match_dup 0) (match_dup 1))]
"operands[1] = adjust_address (operands[1], SImode, 4);")
@@ -1597,19 +1614,21 @@
(vec_select:SI
(match_operand:V2SI 1 "memory_operand" "o,o,o")
(parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))]
- "TARGET_64BIT && TARGET_MMX"
+ "TARGET_64BIT"
"#"
"&& reload_completed"
[(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
{
operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
-})
+}
+ [(set_attr "isa" "*,sse2,*")
+ (set_attr "mmx_isa" "native,*,*")])
(define_expand "vec_extractv2sisi"
[(match_operand:SI 0 "register_operand")
(match_operand:V2SI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_extract (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -1629,7 +1648,7 @@
[(match_operand:V4HI 0 "register_operand")
(match_operand:HI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_set (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -1640,7 +1659,7 @@
[(match_operand:HI 0 "register_operand")
(match_operand:V4HI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_extract (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -1660,7 +1679,7 @@
[(match_operand:V8QI 0 "register_operand")
(match_operand:QI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_set (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -1671,7 +1690,7 @@
[(match_operand:QI 0 "register_operand")
(match_operand:V8QI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_extract (false, operands[0], operands[1],
INTVAL (operands[2]));
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 32/40] i386: Emulate MMX ssse3_psign<mode>3 with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (27 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 36/40] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 34/40] i386: Emulate MMX abs<mode>2 " H.J. Lu
` (11 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX ssse3_psign<mode>3 with SSE. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/sse.md (ssse3_psign<mode>3): Add SSE emulation.
---
gcc/config/i386/sse.md | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a92505c54a1..f235fe36a2d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15804,17 +15804,21 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "ssse3_psign<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(unspec:MMXMODEI
- [(match_operand:MMXMODEI 1 "register_operand" "0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+ [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,x,Yv")]
UNSPEC_PSIGN))]
- "TARGET_SSSE3"
- "psign<mmxvecsize>\t{%2, %0|%0, %2}";
- [(set_attr "type" "sselog1")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ psign<mmxvecsize>\t{%2, %0|%0, %2}
+ psign<mmxvecsize>\t{%2, %0|%0, %2}
+ vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "<ssse3_avx2>_palignr<mode>_mask"
[(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 34/40] i386: Emulate MMX abs<mode>2 with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (28 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 32/40] i386: Emulate MMX ssse3_psign<mode>3 with SSE H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 11/40] i386: Emulate MMX mmx_eq/mmx_gt<mode>3 " H.J. Lu
` (10 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX abs<mode>2 with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/sse.md (abs<mode>2): Add SSE emulation.
---
gcc/config/i386/sse.md | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1a0549c66fb..91e46fcfba4 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15991,16 +15991,19 @@
})
(define_insn "abs<mode>2"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
(abs:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
- "TARGET_SSSE3"
- "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
- [(set_attr "type" "sselog1")
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ pabs<mmxvecsize>\t{%1, %0|%0, %1}
+ %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "sselog1")
(set_attr "prefix_rep" "0")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 11/40] i386: Emulate MMX mmx_eq/mmx_gt<mode>3 with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (29 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 34/40] i386: Emulate MMX abs<mode>2 " H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 33/40] i386: Emulate MMX ssse3_palignrdi " H.J. Lu
` (9 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_eq/mmx_gt<mode>3 with SSE. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_eq<mode>3): Also allow
TARGET_MMX_WITH_SSE.
(*mmx_eq<mode>3): Also allow TARGET_MMX_WITH_SSE. Add SSE
support.
(mmx_gt<mode>3): Likewise.
---
gcc/config/i386/mmx.md | 39 ++++++++++++++++++++++++---------------
1 file changed, 24 insertions(+), 15 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 2a9972e79d9..132ce7af802 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1033,28 +1033,37 @@
(eq:MMXMODEI
(match_operand:MMXMODEI 1 "nonimmediate_operand")
(match_operand:MMXMODEI 2 "nonimmediate_operand")))]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
(define_insn "*mmx_eq<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(eq:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
- "pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcmp")
- (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0,0,Yv")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+ "@
+ pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
+ pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
+ vpcmpeq<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_gt<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(gt:MMXMODEI
- (match_operand:MMXMODEI 1 "register_operand" "0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX"
- "pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcmp")
- (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,x,Yv")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
+ pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
+ vpcmpgt<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+ (set_attr "mode" "DI,TI,TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 33/40] i386: Emulate MMX ssse3_palignrdi with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (30 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 11/40] i386: Emulate MMX mmx_eq/mmx_gt<mode>3 " H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 26/40] i386: Emulate MMX umulv1siv1di3 with SSE2 H.J. Lu
` (8 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX version of palignrq with SSE version by concatenating 2
64-bit MMX operands into a single 128-bit SSE operand, followed by
SSE psrldq. Only SSE register source operand is allowed.
PR target/89021
* config/i386/sse.md (ssse3_palignrdi): Changed to
define_insn_and_split to support SSE emulation.
---
gcc/config/i386/sse.md | 58 ++++++++++++++++++++++++++++++++++--------
1 file changed, 48 insertions(+), 10 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f235fe36a2d..1a0549c66fb 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15873,23 +15873,61 @@
(set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "ssse3_palignrdi"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI [(match_operand:DI 1 "register_operand" "0")
- (match_operand:DI 2 "nonimmediate_operand" "ym")
- (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+(define_insn_and_split "ssse3_palignrdi"
+ [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
+ (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
+ (match_operand:DI 2 "nonimmediate_operand" "ym,x,Yv")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
UNSPEC_PALIGNR))]
- "TARGET_SSSE3"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
{
- operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
- return "palignr\t{%3, %2, %0|%0, %2, %3}";
+ switch (which_alternative)
+ {
+ case 0:
+ operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+ return "palignr\t{%3, %2, %0|%0, %2, %3}";
+ case 1:
+ case 2:
+ return "#";
+ default:
+ gcc_unreachable ();
+ }
}
- [(set_attr "type" "sseishft")
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(set (match_dup 0)
+ (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
+{
+ /* Emulate MMX palignrdi with SSE psrldq. */
+ rtx op0 = lowpart_subreg (V2DImode, operands[0],
+ GET_MODE (operands[0]));
+ rtx insn;
+ if (TARGET_AVX)
+ insn = gen_vec_concatv2di (op0, operands[2], operands[1]);
+ else
+ {
+ /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
+ insn = gen_vec_concatv2di (op0, operands[1], operands[2]);
+ emit_insn (insn);
+ /* Swap bits 0:63 with bits 64:127. */
+ rtx mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (2),
+ GEN_INT (3),
+ GEN_INT (0),
+ GEN_INT (1)));
+ rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
+ rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
+ insn = gen_rtx_SET (op1, op2);
+ }
+ emit_insn (insn);
+ operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
+}
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "sseishft")
(set_attr "atom_unit" "sishuf")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
;; modes for abs instruction on pre AVX-512 targets.
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 26/40] i386: Emulate MMX umulv1siv1di3 with SSE2
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (31 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 33/40] i386: Emulate MMX ssse3_palignrdi " H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 28/40] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE H.J. Lu
` (7 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX umulv1siv1di3 with SSE2. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/mmx.md (sse2_umulv1siv1di3): Add SSE emulation
support.
(*sse2_umulv1siv1di3): Add SSE2 emulation.
---
gcc/config/i386/mmx.md | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 274e895f51e..a618a620eb1 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -911,24 +911,30 @@
(vec_select:V1SI
(match_operand:V2SI 2 "nonimmediate_operand")
(parallel [(const_int 0)])))))]
- "TARGET_SSE2"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
(define_insn "*sse2_umulv1siv1di3"
- [(set (match_operand:V1DI 0 "register_operand" "=y")
+ [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
(mult:V1DI
(zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SI 1 "nonimmediate_operand" "%0,0,Yv")
(parallel [(const_int 0)])))
(zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym,x,Yv")
(parallel [(const_int 0)])))))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
- "pmuludq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxmul")
- (set_attr "mode" "DI")])
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && TARGET_SSE2
+ && ix86_binary_operator_ok (MULT, V2SImode, operands)"
+ "@
+ pmuludq\t{%2, %0|%0, %2}
+ pmuludq\t{%2, %0|%0, %2}
+ vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxmul,ssemul,ssemul")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_<code>v4hi3"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 28/40] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (32 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 26/40] i386: Emulate MMX umulv1siv1di3 with SSE2 H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 30/40] i386: Emulate MMX ssse3_pmulhrswv4hi3 " H.J. Lu
` (6 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE by moving bits
64:95 to bits 32:63 in SSE register. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/sse.md (ssse3_ph<plusminus_mnemonic>dv2si3):
Changed to define_insn_and_split to support SSE emulation.
---
gcc/config/i386/sse.md | 34 ++++++++++++++++++++++++++--------
1 file changed, 26 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 97cbd250dd4..af6a305d63e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15381,26 +15381,44 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_concat:V2SI
(plusminus:SI
(vec_select:SI
- (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
(plusminus:SI
(vec_select:SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym,x,Yv")
(parallel [(const_int 0)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
- "TARGET_SSSE3"
- "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseiadd")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+{
+ /* Generate SSE version of the operation. */
+ rtx op0 = lowpart_subreg (V4SImode, operands[0],
+ GET_MODE (operands[0]));
+ rtx op1 = lowpart_subreg (V4SImode, operands[1],
+ GET_MODE (operands[1]));
+ rtx op2 = lowpart_subreg (V4SImode, operands[2],
+ GET_MODE (operands[2]));
+ emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
+ ix86_move_vector_high_sse_to_mmx (op0);
+ DONE;
+}
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "avx2_pmaddubsw256"
[(set (match_operand:V16HI 0 "register_operand" "=x,v")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 30/40] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (33 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 28/40] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 27/40] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 " H.J. Lu
` (5 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX ssse3_pmulhrswv4hi3 with SSE. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/sse.md (*ssse3_pmulhrswv4hi3): Add SSE emulation.
---
gcc/config/i386/sse.md | 20 +++++++++++++-------
1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a7d0889f3e1..cc7dbe79fa7 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15677,25 +15677,31 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*ssse3_pmulhrswv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
(lshiftrt:V4SI
(plus:V4SI
(lshiftrt:V4SI
(mult:V4SI
(sign_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv"))
(sign_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv")))
(const_int 14))
(match_operand:V4HI 3 "const1_operand"))
(const_int 1))))]
- "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "pmulhrsw\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseimul")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && TARGET_SSSE3
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ pmulhrsw\t{%2, %0|%0, %2}
+ pmulhrsw\t{%2, %0|%0, %2}
+ vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
[(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 27/40] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (34 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 30/40] i386: Emulate MMX ssse3_pmulhrswv4hi3 " H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 29/40] i386: Emulate MMX ssse3_pmaddubsw " H.J. Lu
` (4 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 with SSE by moving bits
64:95 to bits 32:63 in SSE register. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/sse.md (ssse3_ph<plusminus_mnemonic>wv4hi3):
Changed to define_insn_and_split to support SSE emulation.
---
gcc/config/i386/sse.md | 34 ++++++++++++++++++++++++++--------
1 file changed, 26 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b1bab15af41..97cbd250dd4 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15257,13 +15257,13 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_concat:V4HI
(vec_concat:V2HI
(ssse3_plusminus:HI
(vec_select:HI
- (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
@@ -15272,19 +15272,37 @@
(vec_concat:V2HI
(ssse3_plusminus:HI
(vec_select:HI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
- "TARGET_SSSE3"
- "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseiadd")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+{
+ /* Generate SSE version of the operation. */
+ rtx op0 = lowpart_subreg (V8HImode, operands[0],
+ GET_MODE (operands[0]));
+ rtx op1 = lowpart_subreg (V8HImode, operands[1],
+ GET_MODE (operands[1]));
+ rtx op2 = lowpart_subreg (V8HImode, operands[2],
+ GET_MODE (operands[2]));
+ emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
+ ix86_move_vector_high_sse_to_mmx (op0);
+ DONE;
+}
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
[(set (match_operand:V8SI 0 "register_operand" "=x")
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 29/40] i386: Emulate MMX ssse3_pmaddubsw with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (35 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 27/40] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 " H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 01/40] i386: Allow MMX register modes in SSE registers H.J. Lu
` (3 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX ssse3_pmaddubsw with SSE. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/sse.md (ssse3_pmaddubsw): Add SSE emulation.
---
gcc/config/i386/sse.md | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index af6a305d63e..a7d0889f3e1 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15569,17 +15569,17 @@
(set_attr "mode" "TI")])
(define_insn "ssse3_pmaddubsw"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(ss_plus:V4HI
(mult:V4HI
(zero_extend:V4HI
(vec_select:V4QI
- (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))
(sign_extend:V4HI
(vec_select:V4QI
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,x,Yv")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)]))))
(mult:V4HI
@@ -15591,13 +15591,17 @@
(vec_select:V4QI (match_dup 2)
(parallel [(const_int 1) (const_int 3)
(const_int 5) (const_int 7)]))))))]
- "TARGET_SSSE3"
- "pmaddubsw\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseiadd")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ pmaddubsw\t{%2, %0|%0, %2}
+ pmaddubsw\t{%2, %0|%0, %2}
+ vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_mode_iterator PMULHRSW
[V4HI V8HI (V16HI "TARGET_AVX2")])
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 01/40] i386: Allow MMX register modes in SSE registers
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (36 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 29/40] i386: Emulate MMX ssse3_pmaddubsw " H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 12:33 ` [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE H.J. Lu
` (2 subsequent siblings)
40 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW. We can use SSE2 to support MMX register modes.
PR target/89021
* config/i386/i386.c (ix86_set_reg_reg_cost): Add support for
TARGET_MMX_WITH_SSE with VALID_MMX_REG_MODE.
(ix86_vector_mode_supported_p): Likewise.
* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
(TARGET_MMX_WITH_SSE_P): Likewise.
---
gcc/config/i386/i386.c | 5 +++--
gcc/config/i386/i386.h | 5 +++++
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4efb6ae0e44..83d3117f46d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -40495,7 +40495,8 @@ ix86_set_reg_reg_cost (machine_mode mode)
|| (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
|| (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
|| (TARGET_SSE && VALID_SSE_REG_MODE (mode))
- || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
+ || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && VALID_MMX_REG_MODE (mode)))
units = GET_MODE_SIZE (mode);
}
@@ -44321,7 +44322,7 @@ ix86_vector_mode_supported_p (machine_mode mode)
return true;
if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
return true;
- if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
+ if ((TARGET_MMX ||TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
return true;
if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
return true;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5..db814d9ed17 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -201,6 +201,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_16BIT TARGET_CODE16
#define TARGET_16BIT_P(x) TARGET_CODE16_P(x)
+#define TARGET_MMX_WITH_SSE \
+ (TARGET_64BIT && TARGET_SSE2)
+#define TARGET_MMX_WITH_SSE_P(x) \
+ (TARGET_64BIT_P (x) && TARGET_SSE2_P (x))
+
#include "config/vxworks-dummy.h"
#include "config/i386/i386-opts.h"
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (37 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 01/40] i386: Allow MMX register modes in SSE registers H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 20:07 ` Uros Bizjak
2019-02-15 12:04 ` Uros Bizjak
2019-02-14 12:33 ` [PATCH 25/40] i386: Emulate MMX movntq with SSE2 movntidi H.J. Lu
2019-02-14 18:18 ` [PATCH 41/40] Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE Uros Bizjak
40 siblings, 2 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Allow MMX intrinsic emulation with SSE/SSE2/SSSE3. Don't enable MMX ISA
by default with TARGET_MMX_WITH_SSE.
For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit
mode since MMX intrinsics can be emulated wit SSE.
gcc/
PR target/89021
* config/i386/i386-builtin.def: Enable MMX intrinsics with
SSE/SSE2/SSSE3.
* config/i386/i386.c (ix86_option_override_internal): Don't
enable MMX ISA with TARGET_MMX_WITH_SSE by default.
(ix86_init_mmx_sse_builtins): Enable MMX intrinsics with
SSE/SSE2/SSSE3.
(ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
intrinsics with TARGET_MMX_WITH_SSE.
* config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
gcc/testsuite/
PR target/89021
* gcc.target/i386/pr82483-1.c: Error only on ia32.
* gcc.target/i386/pr82483-2.c: Likewise.
---
gcc/config/i386/i386-builtin.def | 126 +++++++++++-----------
gcc/config/i386/i386.c | 46 ++++++--
gcc/config/i386/mmintrin.h | 10 +-
gcc/testsuite/gcc.target/i386/pr82483-1.c | 2 +-
gcc/testsuite/gcc.target/i386/pr82483-2.c | 2 +-
5 files changed, 110 insertions(+), 76 deletions(-)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 88005f4687f..10a9d631f29 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKN
BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID)
/* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
/* 3DNow! */
BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
@@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNO
BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT)
/* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
/* 3DNow! */
BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a9abbe8706b..1d417e08734 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -4165,12 +4165,15 @@ ix86_option_override_internal (bool main_args_p,
opts->x_target_flags
|= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
- /* Enable by default the SSE and MMX builtins. Do allow the user to
- explicitly disable any of these. In particular, disabling SSE and
- MMX for kernel code is extremely useful. */
+ /* Enable the SSE and MMX builtins by default. Don't enable MMX
+ ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
+ explicitly disable any of these. In particular, disabling SSE
+ and MMX for kernel code is extremely useful. */
if (!ix86_arch_specified)
opts->x_ix86_isa_flags
- |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
+ |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
+ | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
+ ? 0 : OPTION_MASK_ISA_MMX)
| TARGET_SUBTARGET64_ISA_DEFAULT)
& ~opts->x_ix86_isa_flags_explicit);
@@ -4216,8 +4219,10 @@ ix86_option_override_internal (bool main_args_p,
if (!TARGET_80387_P (opts->x_target_flags))
opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
- /* Turn on MMX builtins for -msse. */
- if (TARGET_SSE_P (opts->x_ix86_isa_flags))
+ /* Turn on MMX builtins for -msse. Don't enable MMX ISA with
+ TARGET_MMX_WITH_SSE. */
+ if (TARGET_SSE_P (opts->x_ix86_isa_flags)
+ && !TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags))
opts->x_ix86_isa_flags
|= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
@@ -31769,14 +31774,17 @@ ix86_init_mmx_sse_builtins (void)
VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
/* MMX access to the vec_init patterns. */
- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v2si",
+ def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
+ "__builtin_ia32_vec_init_v2si",
V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v4hi",
+ def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
+ "__builtin_ia32_vec_init_v4hi",
V4HI_FTYPE_HI_HI_HI_HI,
IX86_BUILTIN_VEC_INIT_V4HI);
- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v8qi",
+ def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
+ "__builtin_ia32_vec_init_v8qi",
V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
IX86_BUILTIN_VEC_INIT_V8QI);
@@ -31798,7 +31806,8 @@ ix86_init_mmx_sse_builtins (void)
"__builtin_ia32_vec_ext_v4hi",
HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_ext_v2si",
+ def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
+ "__builtin_ia32_vec_ext_v2si",
SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v16qi",
@@ -36931,6 +36940,23 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
== (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
&& (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
+ /* Use SSE/SSE2/SSSE3 to emulate MMX intrinsics in 64-bit mode when
+ MMX is disabled. */
+ if (TARGET_MMX_WITH_SSE)
+ {
+ if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
+ == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
+ && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX)) != 0)
+ isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX);
+ if (((bisa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
+ == (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
+ && (isa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX)) != 0)
+ isa |= (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX);
+ if (((bisa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
+ == (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
+ && (isa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX)) != 0)
+ isa |= (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX);
+ }
if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
{
char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index 238b3df3121..7b613658111 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -30,7 +30,7 @@
#if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
#pragma GCC push_options
#ifdef __x86_64__
-#pragma GCC target("sse,mmx")
+#pragma GCC target("sse2")
#else
#pragma GCC target("mmx")
#endif
@@ -315,7 +315,11 @@ _m_paddd (__m64 __m1, __m64 __m2)
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
#ifndef __SSE2__
#pragma GCC push_options
+#ifdef __x86_64__
+#pragma GCC target("sse2")
+#else
#pragma GCC target("sse2,mmx")
+#endif
#define __DISABLE_SSE2__
#endif /* __SSE2__ */
@@ -427,7 +431,11 @@ _m_psubd (__m64 __m1, __m64 __m2)
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
#ifndef __SSE2__
#pragma GCC push_options
+#ifdef __x86_64__
+#pragma GCC target("sse2")
+#else
#pragma GCC target("sse2,mmx")
+#endif
#define __DISABLE_SSE2__
#endif /* __SSE2__ */
diff --git a/gcc/testsuite/gcc.target/i386/pr82483-1.c b/gcc/testsuite/gcc.target/i386/pr82483-1.c
index 59a59dc8dfe..b2028d8dc5e 100644
--- a/gcc/testsuite/gcc.target/i386/pr82483-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr82483-1.c
@@ -1,7 +1,7 @@
/* PR target/82483 */
/* { dg-do compile } */
/* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
-/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
+/* { dg-error "needs isa option" "" { target ia32 } 0 } */
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/pr82483-2.c b/gcc/testsuite/gcc.target/i386/pr82483-2.c
index 305ddbd6c64..c92de405cb3 100644
--- a/gcc/testsuite/gcc.target/i386/pr82483-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr82483-2.c
@@ -1,7 +1,7 @@
/* PR target/82483 */
/* { dg-do compile } */
/* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
-/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
+/* { dg-error "needs isa option" "" { target ia32 } 0 } */
#include <x86intrin.h>
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE
2019-02-14 12:33 ` [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE H.J. Lu
@ 2019-02-14 20:07 ` Uros Bizjak
2019-02-14 20:50 ` H.J. Lu
2019-02-15 12:04 ` Uros Bizjak
1 sibling, 1 reply; 62+ messages in thread
From: Uros Bizjak @ 2019-02-14 20:07 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 1:33 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Allow MMX intrinsic emulation with SSE/SSE2/SSSE3. Don't enable MMX ISA
> by default with TARGET_MMX_WITH_SSE.
>
> For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit
> mode since MMX intrinsics can be emulated wit SSE.
>
> gcc/
>
> PR target/89021
> * config/i386/i386-builtin.def: Enable MMX intrinsics with
> SSE/SSE2/SSSE3.
> * config/i386/i386.c (ix86_option_override_internal): Don't
> enable MMX ISA with TARGET_MMX_WITH_SSE by default.
> (ix86_init_mmx_sse_builtins): Enable MMX intrinsics with
> SSE/SSE2/SSSE3.
> (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
> intrinsics with TARGET_MMX_WITH_SSE.
> * config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
>
> gcc/testsuite/
>
> PR target/89021
> * gcc.target/i386/pr82483-1.c: Error only on ia32.
> * gcc.target/i386/pr82483-2.c: Likewise.
> ---
> gcc/config/i386/i386-builtin.def | 126 +++++++++++-----------
> gcc/config/i386/i386.c | 46 ++++++--
> gcc/config/i386/mmintrin.h | 10 +-
> gcc/testsuite/gcc.target/i386/pr82483-1.c | 2 +-
> gcc/testsuite/gcc.target/i386/pr82483-2.c | 2 +-
> 5 files changed, 110 insertions(+), 76 deletions(-)
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index 88005f4687f..10a9d631f29 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKN
> BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID)
>
> /* MMX */
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
>
> /* 3DNow! */
> BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
> @@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNO
> BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT)
>
> /* MMX */
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
>
> /* 3DNow! */
> BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF)
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index a9abbe8706b..1d417e08734 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -4165,12 +4165,15 @@ ix86_option_override_internal (bool main_args_p,
> opts->x_target_flags
> |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
>
> - /* Enable by default the SSE and MMX builtins. Do allow the user to
> - explicitly disable any of these. In particular, disabling SSE and
> - MMX for kernel code is extremely useful. */
> + /* Enable the SSE and MMX builtins by default. Don't enable MMX
> + ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
> + explicitly disable any of these. In particular, disabling SSE
> + and MMX for kernel code is extremely useful. */
> if (!ix86_arch_specified)
> opts->x_ix86_isa_flags
> - |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
> + |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> + | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> + ? 0 : OPTION_MASK_ISA_MMX)
> | TARGET_SUBTARGET64_ISA_DEFAULT)
> & ~opts->x_ix86_isa_flags_explicit);
Please split the above into two clauses, the first that sets SSE and
MMX by default, and the second to or with
opts->x_ix86_isa_flags
|= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit
> @@ -4216,8 +4219,10 @@ ix86_option_override_internal (bool main_args_p,
> if (!TARGET_80387_P (opts->x_target_flags))
> opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
>
> - /* Turn on MMX builtins for -msse. */
> - if (TARGET_SSE_P (opts->x_ix86_isa_flags))
> + /* Turn on MMX builtins for -msse. Don't enable MMX ISA with
> + TARGET_MMX_WITH_SSE. */
> + if (TARGET_SSE_P (opts->x_ix86_isa_flags)
> + && !TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags))
> opts->x_ix86_isa_flags
> |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
>
> @@ -31769,14 +31774,17 @@ ix86_init_mmx_sse_builtins (void)
> VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
>
> /* MMX access to the vec_init patterns. */
> - def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v2si",
> + def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> + "__builtin_ia32_vec_init_v2si",
> V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
>
> - def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v4hi",
> + def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> + "__builtin_ia32_vec_init_v4hi",
> V4HI_FTYPE_HI_HI_HI_HI,
> IX86_BUILTIN_VEC_INIT_V4HI);
>
> - def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v8qi",
> + def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> + "__builtin_ia32_vec_init_v8qi",
> V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
> IX86_BUILTIN_VEC_INIT_V8QI);
>
> @@ -31798,7 +31806,8 @@ ix86_init_mmx_sse_builtins (void)
> "__builtin_ia32_vec_ext_v4hi",
> HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
>
> - def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_ext_v2si",
> + def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> + "__builtin_ia32_vec_ext_v2si",
> SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
>
> def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v16qi",
> @@ -36931,6 +36940,23 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
> == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
> && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
> isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
> + /* Use SSE/SSE2/SSSE3 to emulate MMX intrinsics in 64-bit mode when
> + MMX is disabled. */
> + if (TARGET_MMX_WITH_SSE)
> + {
> + if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
> + == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
> + && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX)) != 0)
> + isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX);
> + if (((bisa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
> + == (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
> + && (isa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX)) != 0)
> + isa |= (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX);
> + if (((bisa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
> + == (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
> + && (isa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX)) != 0)
> + isa |= (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX);
> + }
> if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
> {
> char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
> diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
> index 238b3df3121..7b613658111 100644
> --- a/gcc/config/i386/mmintrin.h
> +++ b/gcc/config/i386/mmintrin.h
> @@ -30,7 +30,7 @@
> #if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
> #pragma GCC push_options
> #ifdef __x86_64__
> -#pragma GCC target("sse,mmx")
> +#pragma GCC target("sse2")
You will need to involve __MMX_WITH_SSE__ here, probably to something like:
#ifdef __MMX_WITH_SSE__
#pragma GCC target("sse2")
#elif defined __x86_64__
#pragma GCC target("sse,mmx")
#else
#pragma GCC target("mmx")
#endif
> #else
> #pragma GCC target("mmx")
> #endif
> @@ -315,7 +315,11 @@ _m_paddd (__m64 __m1, __m64 __m2)
> /* Add the 64-bit values in M1 to the 64-bit values in M2. */
> #ifndef __SSE2__
> #pragma GCC push_options
> +#ifdef __x86_64__
#ifdef __MMX_WITH_SSE__
> +#pragma GCC target("sse2")
> +#else
> #pragma GCC target("sse2,mmx")
> +#endif
> #define __DISABLE_SSE2__
> #endif /* __SSE2__ */
>
> @@ -427,7 +431,11 @@ _m_psubd (__m64 __m1, __m64 __m2)
> /* Add the 64-bit values in M1 to the 64-bit values in M2. */
> #ifndef __SSE2__
> #pragma GCC push_options
> +#ifdef __x86_64__
#ifdef __MMX_WITH_SSE__
> +#pragma GCC target("sse2")
> +#else
> #pragma GCC target("sse2,mmx")
> +#endif
> #define __DISABLE_SSE2__
> #endif /* __SSE2__ */
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr82483-1.c b/gcc/testsuite/gcc.target/i386/pr82483-1.c
> index 59a59dc8dfe..b2028d8dc5e 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82483-1.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82483-1.c
> @@ -1,7 +1,7 @@
> /* PR target/82483 */
> /* { dg-do compile } */
> /* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
> -/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
> +/* { dg-error "needs isa option" "" { target ia32 } 0 } */
>
> #include <x86intrin.h>
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr82483-2.c b/gcc/testsuite/gcc.target/i386/pr82483-2.c
> index 305ddbd6c64..c92de405cb3 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82483-2.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82483-2.c
> @@ -1,7 +1,7 @@
> /* PR target/82483 */
> /* { dg-do compile } */
> /* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
> -/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
> +/* { dg-error "needs isa option" "" { target ia32 } 0 } */
>
> #include <x86intrin.h>
>
> --
> 2.20.1
>
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE
2019-02-14 20:07 ` Uros Bizjak
@ 2019-02-14 20:50 ` H.J. Lu
2019-02-14 20:54 ` Uros Bizjak
0 siblings, 1 reply; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 20:50 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 12:07 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Feb 14, 2019 at 1:33 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Allow MMX intrinsic emulation with SSE/SSE2/SSSE3. Don't enable MMX ISA
> > by default with TARGET_MMX_WITH_SSE.
> >
> > For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit
> > mode since MMX intrinsics can be emulated wit SSE.
> >
> > gcc/
> >
> > PR target/89021
> > * config/i386/i386-builtin.def: Enable MMX intrinsics with
> > SSE/SSE2/SSSE3.
> > * config/i386/i386.c (ix86_option_override_internal): Don't
> > enable MMX ISA with TARGET_MMX_WITH_SSE by default.
> > (ix86_init_mmx_sse_builtins): Enable MMX intrinsics with
> > SSE/SSE2/SSSE3.
> > (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
> > intrinsics with TARGET_MMX_WITH_SSE.
> > * config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
> >
>
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index a9abbe8706b..1d417e08734 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -4165,12 +4165,15 @@ ix86_option_override_internal (bool main_args_p,
> > opts->x_target_flags
> > |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
> >
> > - /* Enable by default the SSE and MMX builtins. Do allow the user to
> > - explicitly disable any of these. In particular, disabling SSE and
> > - MMX for kernel code is extremely useful. */
> > + /* Enable the SSE and MMX builtins by default. Don't enable MMX
> > + ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
> > + explicitly disable any of these. In particular, disabling SSE
> > + and MMX for kernel code is extremely useful. */
> > if (!ix86_arch_specified)
> > opts->x_ix86_isa_flags
> > - |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
> > + |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> > + | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> > + ? 0 : OPTION_MASK_ISA_MMX)
> > | TARGET_SUBTARGET64_ISA_DEFAULT)
> > & ~opts->x_ix86_isa_flags_explicit);
>
> Please split the above into two clauses, the first that sets SSE and
> MMX by default, and the second to or with
>
> opts->x_ix86_isa_flags
> |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit
>
Like this?
/* Enable the SSE and MMX builtins by default. Don't enable MMX
ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
explicitly disable any of these. In particular, disabling SSE
and MMX for kernel code is extremely useful. */
if (!ix86_arch_specified)
{
opts->x_ix86_isa_flags
|= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
| (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
? 0 : OPTION_MASK_ISA_MMX))
& ~opts->x_ix86_isa_flags_explicit);
opts->x_ix86_isa_flags
|= (TARGET_SUBTARGET64_ISA_DEFAULT
& ~opts->x_ix86_isa_flags_explicit);
}
> > diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
> > index 238b3df3121..7b613658111 100644
> > --- a/gcc/config/i386/mmintrin.h
> > +++ b/gcc/config/i386/mmintrin.h
> > @@ -30,7 +30,7 @@
> > #if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
> > #pragma GCC push_options
> > #ifdef __x86_64__
> > -#pragma GCC target("sse,mmx")
> > +#pragma GCC target("sse2")
>
> You will need to involve __MMX_WITH_SSE__ here, probably to something like:
>
> #ifdef __MMX_WITH_SSE__
> #pragma GCC target("sse2")
> #elif defined __x86_64__
> #pragma GCC target("sse,mmx")
> #else
> #pragma GCC target("mmx")
> #endif
>
> > #else
> > #pragma GCC target("mmx")
> > #endif
> > @@ -315,7 +315,11 @@ _m_paddd (__m64 __m1, __m64 __m2)
> > /* Add the 64-bit values in M1 to the 64-bit values in M2. */
> > #ifndef __SSE2__
> > #pragma GCC push_options
> > +#ifdef __x86_64__
>
> #ifdef __MMX_WITH_SSE__
>
> > +#pragma GCC target("sse2")
> > +#else
> > #pragma GCC target("sse2,mmx")
> > +#endif
> > #define __DISABLE_SSE2__
> > #endif /* __SSE2__ */
> >
> > @@ -427,7 +431,11 @@ _m_psubd (__m64 __m1, __m64 __m2)
> > /* Add the 64-bit values in M1 to the 64-bit values in M2. */
> > #ifndef __SSE2__
> > #pragma GCC push_options
> > +#ifdef __x86_64__
>
> #ifdef __MMX_WITH_SSE__
>
> > +#pragma GCC target("sse2")
> > +#else
> > #pragma GCC target("sse2,mmx")
> > +#endif
> > #define __DISABLE_SSE2__
> > #endif /* __SSE2__ */
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pr82483-1.c b/gcc/testsuite/gcc.target/i386/pr82483-1.c
I will do
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index 238b3df3121..c4b2e0c7b25 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -29,7 +29,9 @@
#if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
#pragma GCC push_options
-#ifdef __x86_64__
+#ifdef __MMX_WITH_SSE__
+#pragma GCC target("sse2")
+#elif defined __x86_64__
#pragma GCC target("sse,mmx")
#else
#pragma GCC target("mmx")
@@ -315,7 +317,11 @@ _m_paddd (__m64 __m1, __m64 __m2)
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
#ifndef __SSE2__
#pragma GCC push_options
+#ifdef __MMX_WITH_SSE__
+#pragma GCC target("sse2")
+#else
#pragma GCC target("sse2,mmx")
+#endif
#define __DISABLE_SSE2__
#endif /* __SSE2__ */
@@ -427,7 +433,11 @@ _m_psubd (__m64 __m1, __m64 __m2)
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
#ifndef __SSE2__
#pragma GCC push_options
+#ifdef __MMX_WITH_SSE__
+#pragma GCC target("sse2")
+#else
#pragma GCC target("sse2,mmx")
+#endif
#define __DISABLE_SSE2__
#endif
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE
2019-02-14 20:50 ` H.J. Lu
@ 2019-02-14 20:54 ` Uros Bizjak
2019-02-14 21:02 ` H.J. Lu
0 siblings, 1 reply; 62+ messages in thread
From: Uros Bizjak @ 2019-02-14 20:54 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 9:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Feb 14, 2019 at 12:07 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Thu, Feb 14, 2019 at 1:33 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > Allow MMX intrinsic emulation with SSE/SSE2/SSSE3. Don't enable MMX ISA
> > > by default with TARGET_MMX_WITH_SSE.
> > >
> > > For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit
> > > mode since MMX intrinsics can be emulated wit SSE.
> > >
> > > gcc/
> > >
> > > PR target/89021
> > > * config/i386/i386-builtin.def: Enable MMX intrinsics with
> > > SSE/SSE2/SSSE3.
> > > * config/i386/i386.c (ix86_option_override_internal): Don't
> > > enable MMX ISA with TARGET_MMX_WITH_SSE by default.
> > > (ix86_init_mmx_sse_builtins): Enable MMX intrinsics with
> > > SSE/SSE2/SSSE3.
> > > (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
> > > intrinsics with TARGET_MMX_WITH_SSE.
> > > * config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
> > >
>
> >
> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > index a9abbe8706b..1d417e08734 100644
> > > --- a/gcc/config/i386/i386.c
> > > +++ b/gcc/config/i386/i386.c
> > > @@ -4165,12 +4165,15 @@ ix86_option_override_internal (bool main_args_p,
> > > opts->x_target_flags
> > > |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
> > >
> > > - /* Enable by default the SSE and MMX builtins. Do allow the user to
> > > - explicitly disable any of these. In particular, disabling SSE and
> > > - MMX for kernel code is extremely useful. */
> > > + /* Enable the SSE and MMX builtins by default. Don't enable MMX
> > > + ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
> > > + explicitly disable any of these. In particular, disabling SSE
> > > + and MMX for kernel code is extremely useful. */
> > > if (!ix86_arch_specified)
> > > opts->x_ix86_isa_flags
> > > - |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
> > > + |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> > > + | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> > > + ? 0 : OPTION_MASK_ISA_MMX)
> > > | TARGET_SUBTARGET64_ISA_DEFAULT)
> > > & ~opts->x_ix86_isa_flags_explicit);
> >
> > Please split the above into two clauses, the first that sets SSE and
> > MMX by default, and the second to or with
> >
> > opts->x_ix86_isa_flags
> > |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit
> >
>
> Like this?
Yes, but also split the comment.
Thanks,
Uros.
> /* Enable the SSE and MMX builtins by default. Don't enable MMX
> ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
> explicitly disable any of these. In particular, disabling SSE
> and MMX for kernel code is extremely useful. */
> if (!ix86_arch_specified)
> {
> opts->x_ix86_isa_flags
> |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> ? 0 : OPTION_MASK_ISA_MMX))
> & ~opts->x_ix86_isa_flags_explicit);
> opts->x_ix86_isa_flags
> |= (TARGET_SUBTARGET64_ISA_DEFAULT
> & ~opts->x_ix86_isa_flags_explicit);
> }
>
>
> > > diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
> > > index 238b3df3121..7b613658111 100644
> > > --- a/gcc/config/i386/mmintrin.h
> > > +++ b/gcc/config/i386/mmintrin.h
> > > @@ -30,7 +30,7 @@
> > > #if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
> > > #pragma GCC push_options
> > > #ifdef __x86_64__
> > > -#pragma GCC target("sse,mmx")
> > > +#pragma GCC target("sse2")
> >
> > You will need to involve __MMX_WITH_SSE__ here, probably to something like:
> >
> > #ifdef __MMX_WITH_SSE__
> > #pragma GCC target("sse2")
> > #elif defined __x86_64__
> > #pragma GCC target("sse,mmx")
> > #else
> > #pragma GCC target("mmx")
> > #endif
> >
> > > #else
> > > #pragma GCC target("mmx")
> > > #endif
> > > @@ -315,7 +315,11 @@ _m_paddd (__m64 __m1, __m64 __m2)
> > > /* Add the 64-bit values in M1 to the 64-bit values in M2. */
> > > #ifndef __SSE2__
> > > #pragma GCC push_options
> > > +#ifdef __x86_64__
> >
> > #ifdef __MMX_WITH_SSE__
> >
> > > +#pragma GCC target("sse2")
> > > +#else
> > > #pragma GCC target("sse2,mmx")
> > > +#endif
> > > #define __DISABLE_SSE2__
> > > #endif /* __SSE2__ */
> > >
> > > @@ -427,7 +431,11 @@ _m_psubd (__m64 __m1, __m64 __m2)
> > > /* Add the 64-bit values in M1 to the 64-bit values in M2. */
> > > #ifndef __SSE2__
> > > #pragma GCC push_options
> > > +#ifdef __x86_64__
> >
> > #ifdef __MMX_WITH_SSE__
> >
> > > +#pragma GCC target("sse2")
> > > +#else
> > > #pragma GCC target("sse2,mmx")
> > > +#endif
> > > #define __DISABLE_SSE2__
> > > #endif /* __SSE2__ */
> > >
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr82483-1.c b/gcc/testsuite/gcc.target/i386/pr82483-1.c
>
> I will do
>
> diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
> index 238b3df3121..c4b2e0c7b25 100644
> --- a/gcc/config/i386/mmintrin.h
> +++ b/gcc/config/i386/mmintrin.h
> @@ -29,7 +29,9 @@
>
> #if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
> #pragma GCC push_options
> -#ifdef __x86_64__
> +#ifdef __MMX_WITH_SSE__
> +#pragma GCC target("sse2")
> +#elif defined __x86_64__
> #pragma GCC target("sse,mmx")
> #else
> #pragma GCC target("mmx")
> @@ -315,7 +317,11 @@ _m_paddd (__m64 __m1, __m64 __m2)
> /* Add the 64-bit values in M1 to the 64-bit values in M2. */
> #ifndef __SSE2__
> #pragma GCC push_options
> +#ifdef __MMX_WITH_SSE__
> +#pragma GCC target("sse2")
> +#else
> #pragma GCC target("sse2,mmx")
> +#endif
> #define __DISABLE_SSE2__
> #endif /* __SSE2__ */
>
> @@ -427,7 +433,11 @@ _m_psubd (__m64 __m1, __m64 __m2)
> /* Add the 64-bit values in M1 to the 64-bit values in M2. */
> #ifndef __SSE2__
> #pragma GCC push_options
> +#ifdef __MMX_WITH_SSE__
> +#pragma GCC target("sse2")
> +#else
> #pragma GCC target("sse2,mmx")
> +#endif
> #define __DISABLE_SSE2__
> #endif
>
> Thanks.
>
> --
> H.J.
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE
2019-02-14 20:54 ` Uros Bizjak
@ 2019-02-14 21:02 ` H.J. Lu
2019-02-14 22:57 ` Uros Bizjak
0 siblings, 1 reply; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 21:02 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 12:54 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Feb 14, 2019 at 9:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Thu, Feb 14, 2019 at 12:07 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > On Thu, Feb 14, 2019 at 1:33 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > Allow MMX intrinsic emulation with SSE/SSE2/SSSE3. Don't enable MMX ISA
> > > > by default with TARGET_MMX_WITH_SSE.
> > > >
> > > > For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit
> > > > mode since MMX intrinsics can be emulated wit SSE.
> > > >
> > > > gcc/
> > > >
> > > > PR target/89021
> > > > * config/i386/i386-builtin.def: Enable MMX intrinsics with
> > > > SSE/SSE2/SSSE3.
> > > > * config/i386/i386.c (ix86_option_override_internal): Don't
> > > > enable MMX ISA with TARGET_MMX_WITH_SSE by default.
> > > > (ix86_init_mmx_sse_builtins): Enable MMX intrinsics with
> > > > SSE/SSE2/SSSE3.
> > > > (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
> > > > intrinsics with TARGET_MMX_WITH_SSE.
> > > > * config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
> > > >
> >
> > >
> > > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > > index a9abbe8706b..1d417e08734 100644
> > > > --- a/gcc/config/i386/i386.c
> > > > +++ b/gcc/config/i386/i386.c
> > > > @@ -4165,12 +4165,15 @@ ix86_option_override_internal (bool main_args_p,
> > > > opts->x_target_flags
> > > > |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
> > > >
> > > > - /* Enable by default the SSE and MMX builtins. Do allow the user to
> > > > - explicitly disable any of these. In particular, disabling SSE and
> > > > - MMX for kernel code is extremely useful. */
> > > > + /* Enable the SSE and MMX builtins by default. Don't enable MMX
> > > > + ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
> > > > + explicitly disable any of these. In particular, disabling SSE
> > > > + and MMX for kernel code is extremely useful. */
> > > > if (!ix86_arch_specified)
> > > > opts->x_ix86_isa_flags
> > > > - |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
> > > > + |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> > > > + | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> > > > + ? 0 : OPTION_MASK_ISA_MMX)
> > > > | TARGET_SUBTARGET64_ISA_DEFAULT)
> > > > & ~opts->x_ix86_isa_flags_explicit);
> > >
> > > Please split the above into two clauses, the first that sets SSE and
> > > MMX by default, and the second to or with
> > >
> > > opts->x_ix86_isa_flags
> > > |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit
> > >
> >
> > Like this?
>
> Yes, but also split the comment.
I will go with
/* Enable by default the SSE and MMX builtins. Do allow the user to
explicitly disable any of these. In particular, disabling SSE and
MMX for kernel code is extremely useful. */
if (!ix86_arch_specified)
{
/* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
opts->x_ix86_isa_flags
|= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
| (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
? 0 : OPTION_MASK_ISA_MMX))
& ~opts->x_ix86_isa_flags_explicit);
opts->x_ix86_isa_flags
|= (TARGET_SUBTARGET64_ISA_DEFAULT
& ~opts->x_ix86_isa_flags_explicit);
}
--
H.J.
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE
2019-02-14 21:02 ` H.J. Lu
@ 2019-02-14 22:57 ` Uros Bizjak
2019-02-14 23:13 ` H.J. Lu
0 siblings, 1 reply; 62+ messages in thread
From: Uros Bizjak @ 2019-02-14 22:57 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 3729 bytes --]
On Thu, Feb 14, 2019 at 10:02 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > gcc/
> > > > >
> > > > > PR target/89021
> > > > > * config/i386/i386-builtin.def: Enable MMX intrinsics with
> > > > > SSE/SSE2/SSSE3.
> > > > > * config/i386/i386.c (ix86_option_override_internal): Don't
> > > > > enable MMX ISA with TARGET_MMX_WITH_SSE by default.
> > > > > (ix86_init_mmx_sse_builtins): Enable MMX intrinsics with
> > > > > SSE/SSE2/SSSE3.
> > > > > (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
> > > > > intrinsics with TARGET_MMX_WITH_SSE.
> > > > > * config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
> > > > >
> > >
> > > >
> > > > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > > > index a9abbe8706b..1d417e08734 100644
> > > > > --- a/gcc/config/i386/i386.c
> > > > > +++ b/gcc/config/i386/i386.c
> > > > > @@ -4165,12 +4165,15 @@ ix86_option_override_internal (bool main_args_p,
> > > > > opts->x_target_flags
> > > > > |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
> > > > >
> > > > > - /* Enable by default the SSE and MMX builtins. Do allow the user to
> > > > > - explicitly disable any of these. In particular, disabling SSE and
> > > > > - MMX for kernel code is extremely useful. */
> > > > > + /* Enable the SSE and MMX builtins by default. Don't enable MMX
> > > > > + ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
> > > > > + explicitly disable any of these. In particular, disabling SSE
> > > > > + and MMX for kernel code is extremely useful. */
> > > > > if (!ix86_arch_specified)
> > > > > opts->x_ix86_isa_flags
> > > > > - |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
> > > > > + |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> > > > > + | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> > > > > + ? 0 : OPTION_MASK_ISA_MMX)
> > > > > | TARGET_SUBTARGET64_ISA_DEFAULT)
> > > > > & ~opts->x_ix86_isa_flags_explicit);
> > > >
> > > > Please split the above into two clauses, the first that sets SSE and
> > > > MMX by default, and the second to or with
> > > >
> > > > opts->x_ix86_isa_flags
> > > > |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit
> > > >
> > >
> > > Like this?
> >
> > Yes, but also split the comment.
>
> I will go with
>
> /* Enable by default the SSE and MMX builtins. Do allow the user to
> explicitly disable any of these. In particular, disabling SSE and
> MMX for kernel code is extremely useful. */
> if (!ix86_arch_specified)
> {
> /* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
> opts->x_ix86_isa_flags
> |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> ? 0 : OPTION_MASK_ISA_MMX))
> & ~opts->x_ix86_isa_flags_explicit);
> opts->x_ix86_isa_flags
> |= (TARGET_SUBTARGET64_ISA_DEFAULT
> & ~opts->x_ix86_isa_flags_explicit);
> }
I'll commit the following patch that finally defines
TARGET_SUBTARGET64_ISA_DEFAULT. You could then simply clear the MMX
bit from x_i86_isa_flags, like:
if (!ix86_arch_specified)
opts->x_ix86_isa_flags
|= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
/* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
if (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags))
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX;
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 1537 bytes --]
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 268907)
+++ config/i386/i386.c (working copy)
@@ -4165,14 +4165,9 @@ ix86_option_override_internal (bool main_args_p,
opts->x_target_flags
|= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
- /* Enable by default the SSE and MMX builtins. Do allow the user to
- explicitly disable any of these. In particular, disabling SSE and
- MMX for kernel code is extremely useful. */
if (!ix86_arch_specified)
- opts->x_ix86_isa_flags
- |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
- | TARGET_SUBTARGET64_ISA_DEFAULT)
- & ~opts->x_ix86_isa_flags_explicit);
+ opts->x_ix86_isa_flags
+ |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
if (TARGET_RTD_P (opts->x_target_flags))
warning (0,
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h (revision 268907)
+++ config/i386/i386.h (working copy)
@@ -633,7 +633,9 @@ extern tree x86_mfence;
/* Extra bits to force on w/ 64-bit mode. */
#define TARGET_SUBTARGET64_DEFAULT 0
-#define TARGET_SUBTARGET64_ISA_DEFAULT 0
+/* Enable MMX, SSE and SSE2 by default. */
+#define TARGET_SUBTARGET64_ISA_DEFAULT \
+ (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2)
/* Replace MACH-O, ifdefs by in-line tests, where possible.
(a) Macros defined in config/i386/darwin.h */
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE
2019-02-14 22:57 ` Uros Bizjak
@ 2019-02-14 23:13 ` H.J. Lu
2019-02-14 23:14 ` H.J. Lu
0 siblings, 1 reply; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 23:13 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 2:57 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Feb 14, 2019 at 10:02 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> > > > > > gcc/
> > > > > >
> > > > > > PR target/89021
> > > > > > * config/i386/i386-builtin.def: Enable MMX intrinsics with
> > > > > > SSE/SSE2/SSSE3.
> > > > > > * config/i386/i386.c (ix86_option_override_internal): Don't
> > > > > > enable MMX ISA with TARGET_MMX_WITH_SSE by default.
> > > > > > (ix86_init_mmx_sse_builtins): Enable MMX intrinsics with
> > > > > > SSE/SSE2/SSSE3.
> > > > > > (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
> > > > > > intrinsics with TARGET_MMX_WITH_SSE.
> > > > > > * config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
> > > > > >
> > > >
> > > > >
> > > > > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > > > > index a9abbe8706b..1d417e08734 100644
> > > > > > --- a/gcc/config/i386/i386.c
> > > > > > +++ b/gcc/config/i386/i386.c
> > > > > > @@ -4165,12 +4165,15 @@ ix86_option_override_internal (bool main_args_p,
> > > > > > opts->x_target_flags
> > > > > > |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
> > > > > >
> > > > > > - /* Enable by default the SSE and MMX builtins. Do allow the user to
> > > > > > - explicitly disable any of these. In particular, disabling SSE and
> > > > > > - MMX for kernel code is extremely useful. */
> > > > > > + /* Enable the SSE and MMX builtins by default. Don't enable MMX
> > > > > > + ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
> > > > > > + explicitly disable any of these. In particular, disabling SSE
> > > > > > + and MMX for kernel code is extremely useful. */
> > > > > > if (!ix86_arch_specified)
> > > > > > opts->x_ix86_isa_flags
> > > > > > - |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
> > > > > > + |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> > > > > > + | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> > > > > > + ? 0 : OPTION_MASK_ISA_MMX)
> > > > > > | TARGET_SUBTARGET64_ISA_DEFAULT)
> > > > > > & ~opts->x_ix86_isa_flags_explicit);
> > > > >
> > > > > Please split the above into two clauses, the first that sets SSE and
> > > > > MMX by default, and the second to or with
> > > > >
> > > > > opts->x_ix86_isa_flags
> > > > > |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit
> > > > >
> > > >
> > > > Like this?
> > >
> > > Yes, but also split the comment.
> >
> > I will go with
> >
> > /* Enable by default the SSE and MMX builtins. Do allow the user to
> > explicitly disable any of these. In particular, disabling SSE and
> > MMX for kernel code is extremely useful. */
> > if (!ix86_arch_specified)
> > {
> > /* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
> > opts->x_ix86_isa_flags
> > |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> > | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> > ? 0 : OPTION_MASK_ISA_MMX))
> > & ~opts->x_ix86_isa_flags_explicit);
> > opts->x_ix86_isa_flags
> > |= (TARGET_SUBTARGET64_ISA_DEFAULT
> > & ~opts->x_ix86_isa_flags_explicit);
> > }
>
> I'll commit the following patch that finally defines
> TARGET_SUBTARGET64_ISA_DEFAULT. You could then simply clear the MMX
> bit from x_i86_isa_flags, like:
>
> if (!ix86_arch_specified)
> opts->x_ix86_isa_flags
> |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
>
> /* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
> if (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags))
> opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX;
I think it should be:
/* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
if (!(opts->x_ix86_isa_flags & OPTION_MASK_ISA_MMX)
&& TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags))
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX;
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE
2019-02-14 23:13 ` H.J. Lu
@ 2019-02-14 23:14 ` H.J. Lu
2019-02-14 23:21 ` Uros Bizjak
0 siblings, 1 reply; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 23:14 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 3:12 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Feb 14, 2019 at 2:57 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Thu, Feb 14, 2019 at 10:02 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > > > > > > gcc/
> > > > > > >
> > > > > > > PR target/89021
> > > > > > > * config/i386/i386-builtin.def: Enable MMX intrinsics with
> > > > > > > SSE/SSE2/SSSE3.
> > > > > > > * config/i386/i386.c (ix86_option_override_internal): Don't
> > > > > > > enable MMX ISA with TARGET_MMX_WITH_SSE by default.
> > > > > > > (ix86_init_mmx_sse_builtins): Enable MMX intrinsics with
> > > > > > > SSE/SSE2/SSSE3.
> > > > > > > (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
> > > > > > > intrinsics with TARGET_MMX_WITH_SSE.
> > > > > > > * config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
> > > > > > >
> > > > >
> > > > > >
> > > > > > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > > > > > index a9abbe8706b..1d417e08734 100644
> > > > > > > --- a/gcc/config/i386/i386.c
> > > > > > > +++ b/gcc/config/i386/i386.c
> > > > > > > @@ -4165,12 +4165,15 @@ ix86_option_override_internal (bool main_args_p,
> > > > > > > opts->x_target_flags
> > > > > > > |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
> > > > > > >
> > > > > > > - /* Enable by default the SSE and MMX builtins. Do allow the user to
> > > > > > > - explicitly disable any of these. In particular, disabling SSE and
> > > > > > > - MMX for kernel code is extremely useful. */
> > > > > > > + /* Enable the SSE and MMX builtins by default. Don't enable MMX
> > > > > > > + ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
> > > > > > > + explicitly disable any of these. In particular, disabling SSE
> > > > > > > + and MMX for kernel code is extremely useful. */
> > > > > > > if (!ix86_arch_specified)
> > > > > > > opts->x_ix86_isa_flags
> > > > > > > - |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
> > > > > > > + |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> > > > > > > + | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> > > > > > > + ? 0 : OPTION_MASK_ISA_MMX)
> > > > > > > | TARGET_SUBTARGET64_ISA_DEFAULT)
> > > > > > > & ~opts->x_ix86_isa_flags_explicit);
> > > > > >
> > > > > > Please split the above into two clauses, the first that sets SSE and
> > > > > > MMX by default, and the second to or with
> > > > > >
> > > > > > opts->x_ix86_isa_flags
> > > > > > |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit
> > > > > >
> > > > >
> > > > > Like this?
> > > >
> > > > Yes, but also split the comment.
> > >
> > > I will go with
> > >
> > > /* Enable by default the SSE and MMX builtins. Do allow the user to
> > > explicitly disable any of these. In particular, disabling SSE and
> > > MMX for kernel code is extremely useful. */
> > > if (!ix86_arch_specified)
> > > {
> > > /* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
> > > opts->x_ix86_isa_flags
> > > |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> > > | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> > > ? 0 : OPTION_MASK_ISA_MMX))
> > > & ~opts->x_ix86_isa_flags_explicit);
> > > opts->x_ix86_isa_flags
> > > |= (TARGET_SUBTARGET64_ISA_DEFAULT
> > > & ~opts->x_ix86_isa_flags_explicit);
> > > }
> >
> > I'll commit the following patch that finally defines
> > TARGET_SUBTARGET64_ISA_DEFAULT. You could then simply clear the MMX
> > bit from x_i86_isa_flags, like:
> >
> > if (!ix86_arch_specified)
> > opts->x_ix86_isa_flags
> > |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
> >
> > /* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
> > if (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags))
> > opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX;
>
> I think it should be:
>
> /* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
> if (!(opts->x_ix86_isa_flags & OPTION_MASK_ISA_MMX)
I meant opts->x_ix86_isa_flags_explicit.
> && TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags))
> opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX;
>
> Thanks.
>
> --
> H.J.
--
H.J.
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE
2019-02-14 23:14 ` H.J. Lu
@ 2019-02-14 23:21 ` Uros Bizjak
2019-02-14 23:24 ` H.J. Lu
0 siblings, 1 reply; 62+ messages in thread
From: Uros Bizjak @ 2019-02-14 23:21 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On Fri, Feb 15, 2019 at 12:14 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > > > > gcc/
> > > > > > > >
> > > > > > > > PR target/89021
> > > > > > > > * config/i386/i386-builtin.def: Enable MMX intrinsics with
> > > > > > > > SSE/SSE2/SSSE3.
> > > > > > > > * config/i386/i386.c (ix86_option_override_internal): Don't
> > > > > > > > enable MMX ISA with TARGET_MMX_WITH_SSE by default.
> > > > > > > > (ix86_init_mmx_sse_builtins): Enable MMX intrinsics with
> > > > > > > > SSE/SSE2/SSSE3.
> > > > > > > > (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
> > > > > > > > intrinsics with TARGET_MMX_WITH_SSE.
> > > > > > > > * config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
> > > > > > > >
> > > > > >
> > > > > > >
> > > > > > > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > > > > > > index a9abbe8706b..1d417e08734 100644
> > > > > > > > --- a/gcc/config/i386/i386.c
> > > > > > > > +++ b/gcc/config/i386/i386.c
> > > > > > > > @@ -4165,12 +4165,15 @@ ix86_option_override_internal (bool main_args_p,
> > > > > > > > opts->x_target_flags
> > > > > > > > |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
> > > > > > > >
> > > > > > > > - /* Enable by default the SSE and MMX builtins. Do allow the user to
> > > > > > > > - explicitly disable any of these. In particular, disabling SSE and
> > > > > > > > - MMX for kernel code is extremely useful. */
> > > > > > > > + /* Enable the SSE and MMX builtins by default. Don't enable MMX
> > > > > > > > + ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
> > > > > > > > + explicitly disable any of these. In particular, disabling SSE
> > > > > > > > + and MMX for kernel code is extremely useful. */
> > > > > > > > if (!ix86_arch_specified)
> > > > > > > > opts->x_ix86_isa_flags
> > > > > > > > - |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
> > > > > > > > + |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> > > > > > > > + | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> > > > > > > > + ? 0 : OPTION_MASK_ISA_MMX)
> > > > > > > > | TARGET_SUBTARGET64_ISA_DEFAULT)
> > > > > > > > & ~opts->x_ix86_isa_flags_explicit);
> > > > > > >
> > > > > > > Please split the above into two clauses, the first that sets SSE and
> > > > > > > MMX by default, and the second to or with
> > > > > > >
> > > > > > > opts->x_ix86_isa_flags
> > > > > > > |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit
> > > > > > >
> > > > > >
> > > > > > Like this?
> > > > >
> > > > > Yes, but also split the comment.
> > > >
> > > > I will go with
> > > >
> > > > /* Enable by default the SSE and MMX builtins. Do allow the user to
> > > > explicitly disable any of these. In particular, disabling SSE and
> > > > MMX for kernel code is extremely useful. */
> > > > if (!ix86_arch_specified)
> > > > {
> > > > /* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
> > > > opts->x_ix86_isa_flags
> > > > |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> > > > | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> > > > ? 0 : OPTION_MASK_ISA_MMX))
> > > > & ~opts->x_ix86_isa_flags_explicit);
> > > > opts->x_ix86_isa_flags
> > > > |= (TARGET_SUBTARGET64_ISA_DEFAULT
> > > > & ~opts->x_ix86_isa_flags_explicit);
> > > > }
> > >
> > > I'll commit the following patch that finally defines
> > > TARGET_SUBTARGET64_ISA_DEFAULT. You could then simply clear the MMX
> > > bit from x_i86_isa_flags, like:
> > >
> > > if (!ix86_arch_specified)
> > > opts->x_ix86_isa_flags
> > > |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
> > >
> > > /* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
> > > if (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags))
> > > opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX;
> >
> > I think it should be:
> >
> > /* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
> > if (!(opts->x_ix86_isa_flags & OPTION_MASK_ISA_MMX)
> I meant opts->x_ix86_isa_flags_explicit.
> > && TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags))
> > opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX;
Well ... I didn't test this part. OTOH, maybe this part is not needed,
MMX disabling can go *after*
/* Turn on MMX builtins for -msse. */
if (TARGET_SSE_P (opts->x_ix86_isa_flags))
opts->x_ix86_isa_flags
|= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
Uros.
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE
2019-02-14 23:21 ` Uros Bizjak
@ 2019-02-14 23:24 ` H.J. Lu
0 siblings, 0 replies; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 23:24 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 3:21 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Fri, Feb 15, 2019 at 12:14 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> > > > > > > > > gcc/
> > > > > > > > >
> > > > > > > > > PR target/89021
> > > > > > > > > * config/i386/i386-builtin.def: Enable MMX intrinsics with
> > > > > > > > > SSE/SSE2/SSSE3.
> > > > > > > > > * config/i386/i386.c (ix86_option_override_internal): Don't
> > > > > > > > > enable MMX ISA with TARGET_MMX_WITH_SSE by default.
> > > > > > > > > (ix86_init_mmx_sse_builtins): Enable MMX intrinsics with
> > > > > > > > > SSE/SSE2/SSSE3.
> > > > > > > > > (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
> > > > > > > > > intrinsics with TARGET_MMX_WITH_SSE.
> > > > > > > > > * config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
> > > > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > > > > > > > index a9abbe8706b..1d417e08734 100644
> > > > > > > > > --- a/gcc/config/i386/i386.c
> > > > > > > > > +++ b/gcc/config/i386/i386.c
> > > > > > > > > @@ -4165,12 +4165,15 @@ ix86_option_override_internal (bool main_args_p,
> > > > > > > > > opts->x_target_flags
> > > > > > > > > |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
> > > > > > > > >
> > > > > > > > > - /* Enable by default the SSE and MMX builtins. Do allow the user to
> > > > > > > > > - explicitly disable any of these. In particular, disabling SSE and
> > > > > > > > > - MMX for kernel code is extremely useful. */
> > > > > > > > > + /* Enable the SSE and MMX builtins by default. Don't enable MMX
> > > > > > > > > + ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
> > > > > > > > > + explicitly disable any of these. In particular, disabling SSE
> > > > > > > > > + and MMX for kernel code is extremely useful. */
> > > > > > > > > if (!ix86_arch_specified)
> > > > > > > > > opts->x_ix86_isa_flags
> > > > > > > > > - |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
> > > > > > > > > + |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> > > > > > > > > + | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> > > > > > > > > + ? 0 : OPTION_MASK_ISA_MMX)
> > > > > > > > > | TARGET_SUBTARGET64_ISA_DEFAULT)
> > > > > > > > > & ~opts->x_ix86_isa_flags_explicit);
> > > > > > > >
> > > > > > > > Please split the above into two clauses, the first that sets SSE and
> > > > > > > > MMX by default, and the second to or with
> > > > > > > >
> > > > > > > > opts->x_ix86_isa_flags
> > > > > > > > |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit
> > > > > > > >
> > > > > > >
> > > > > > > Like this?
> > > > > >
> > > > > > Yes, but also split the comment.
> > > > >
> > > > > I will go with
> > > > >
> > > > > /* Enable by default the SSE and MMX builtins. Do allow the user to
> > > > > explicitly disable any of these. In particular, disabling SSE and
> > > > > MMX for kernel code is extremely useful. */
> > > > > if (!ix86_arch_specified)
> > > > > {
> > > > > /* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
> > > > > opts->x_ix86_isa_flags
> > > > > |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> > > > > | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> > > > > ? 0 : OPTION_MASK_ISA_MMX))
> > > > > & ~opts->x_ix86_isa_flags_explicit);
> > > > > opts->x_ix86_isa_flags
> > > > > |= (TARGET_SUBTARGET64_ISA_DEFAULT
> > > > > & ~opts->x_ix86_isa_flags_explicit);
> > > > > }
> > > >
> > > > I'll commit the following patch that finally defines
> > > > TARGET_SUBTARGET64_ISA_DEFAULT. You could then simply clear the MMX
> > > > bit from x_i86_isa_flags, like:
> > > >
> > > > if (!ix86_arch_specified)
> > > > opts->x_ix86_isa_flags
> > > > |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
> > > >
> > > > /* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
> > > > if (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags))
> > > > opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX;
> > >
> > > I think it should be:
> > >
> > > /* Don't enable MMX ISA with TARGET_MMX_WITH_SSE. */
> > > if (!(opts->x_ix86_isa_flags & OPTION_MASK_ISA_MMX)
> > I meant opts->x_ix86_isa_flags_explicit.
> > > && TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags))
> > > opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX;
>
> Well ... I didn't test this part. OTOH, maybe this part is not needed,
> MMX disabling can go *after*
>
> /* Turn on MMX builtins for -msse. */
> if (TARGET_SSE_P (opts->x_ix86_isa_flags))
> opts->x_ix86_isa_flags
> |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
>
It works only if TARGET_SUBTARGET64_ISA_DEFAULT doesn't
include OPTION_MASK_ISA_MMX.
--
H.J.
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE
2019-02-14 12:33 ` [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE H.J. Lu
2019-02-14 20:07 ` Uros Bizjak
@ 2019-02-15 12:04 ` Uros Bizjak
1 sibling, 0 replies; 62+ messages in thread
From: Uros Bizjak @ 2019-02-15 12:04 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 1:33 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Allow MMX intrinsic emulation with SSE/SSE2/SSSE3. Don't enable MMX ISA
> by default with TARGET_MMX_WITH_SSE.
>
> For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit
> mode since MMX intrinsics can be emulated wit SSE.
>
> gcc/
>
> PR target/89021
> * config/i386/i386-builtin.def: Enable MMX intrinsics with
> SSE/SSE2/SSSE3.
> * config/i386/i386.c (ix86_option_override_internal): Don't
> enable MMX ISA with TARGET_MMX_WITH_SSE by default.
> (ix86_init_mmx_sse_builtins): Enable MMX intrinsics with
> SSE/SSE2/SSSE3.
> (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
> intrinsics with TARGET_MMX_WITH_SSE.
> * config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
>
> gcc/testsuite/
>
> PR target/89021
> * gcc.target/i386/pr82483-1.c: Error only on ia32.
> * gcc.target/i386/pr82483-2.c: Likewise.
> ---
> gcc/config/i386/i386-builtin.def | 126 +++++++++++-----------
> gcc/config/i386/i386.c | 46 ++++++--
> gcc/config/i386/mmintrin.h | 10 +-
> gcc/testsuite/gcc.target/i386/pr82483-1.c | 2 +-
> gcc/testsuite/gcc.target/i386/pr82483-2.c | 2 +-
> 5 files changed, 110 insertions(+), 76 deletions(-)
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index 88005f4687f..10a9d631f29 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKN
> BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID)
>
> /* MMX */
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
>
> /* 3DNow! */
> BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
> @@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNO
> BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT)
>
> /* MMX */
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
>
> /* 3DNow! */
> BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF)
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index a9abbe8706b..1d417e08734 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -4165,12 +4165,15 @@ ix86_option_override_internal (bool main_args_p,
> opts->x_target_flags
> |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
>
> - /* Enable by default the SSE and MMX builtins. Do allow the user to
> - explicitly disable any of these. In particular, disabling SSE and
> - MMX for kernel code is extremely useful. */
> + /* Enable the SSE and MMX builtins by default. Don't enable MMX
> + ISA with TARGET_MMX_WITH_SSE by default. Do allow the user to
> + explicitly disable any of these. In particular, disabling SSE
> + and MMX for kernel code is extremely useful. */
> if (!ix86_arch_specified)
> opts->x_ix86_isa_flags
> - |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
> + |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE
> + | (TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags)
> + ? 0 : OPTION_MASK_ISA_MMX)
> | TARGET_SUBTARGET64_ISA_DEFAULT)
> & ~opts->x_ix86_isa_flags_explicit);
It looks to me that the above change is not needed at all if
__MMX_WITH_SSE__ is used in intrinsics headers. We have to be agnostic
to TARGET_MMX setting, the SSE2 alternatives in the instructions are
selected with TARGET_MMX_WITH_SSE enables.
> @@ -4216,8 +4219,10 @@ ix86_option_override_internal (bool main_args_p,
> if (!TARGET_80387_P (opts->x_target_flags))
> opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
>
> - /* Turn on MMX builtins for -msse. */
> - if (TARGET_SSE_P (opts->x_ix86_isa_flags))
> + /* Turn on MMX builtins for -msse. Don't enable MMX ISA with
> + TARGET_MMX_WITH_SSE. */
> + if (TARGET_SSE_P (opts->x_ix86_isa_flags)
> + && !TARGET_MMX_WITH_SSE_P (opts->x_ix86_isa_flags))
> opts->x_ix86_isa_flags
> |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
The above change is also not needed.
Uros.
>
> @@ -31769,14 +31774,17 @@ ix86_init_mmx_sse_builtins (void)
> VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
>
> /* MMX access to the vec_init patterns. */
> - def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v2si",
> + def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> + "__builtin_ia32_vec_init_v2si",
> V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
>
> - def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v4hi",
> + def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> + "__builtin_ia32_vec_init_v4hi",
> V4HI_FTYPE_HI_HI_HI_HI,
> IX86_BUILTIN_VEC_INIT_V4HI);
>
> - def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v8qi",
> + def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> + "__builtin_ia32_vec_init_v8qi",
> V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
> IX86_BUILTIN_VEC_INIT_V8QI);
>
> @@ -31798,7 +31806,8 @@ ix86_init_mmx_sse_builtins (void)
> "__builtin_ia32_vec_ext_v4hi",
> HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
>
> - def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_ext_v2si",
> + def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> + "__builtin_ia32_vec_ext_v2si",
> SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
>
> def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v16qi",
> @@ -36931,6 +36940,23 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
> == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
> && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
> isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
> + /* Use SSE/SSE2/SSSE3 to emulate MMX intrinsics in 64-bit mode when
> + MMX is disabled. */
> + if (TARGET_MMX_WITH_SSE)
> + {
> + if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
> + == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
> + && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX)) != 0)
> + isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX);
> + if (((bisa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
> + == (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
> + && (isa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX)) != 0)
> + isa |= (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX);
> + if (((bisa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
> + == (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
> + && (isa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX)) != 0)
> + isa |= (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX);
> + }
> if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
> {
> char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
> diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
> index 238b3df3121..7b613658111 100644
> --- a/gcc/config/i386/mmintrin.h
> +++ b/gcc/config/i386/mmintrin.h
> @@ -30,7 +30,7 @@
> #if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
> #pragma GCC push_options
> #ifdef __x86_64__
> -#pragma GCC target("sse,mmx")
> +#pragma GCC target("sse2")
> #else
> #pragma GCC target("mmx")
> #endif
> @@ -315,7 +315,11 @@ _m_paddd (__m64 __m1, __m64 __m2)
> /* Add the 64-bit values in M1 to the 64-bit values in M2. */
> #ifndef __SSE2__
> #pragma GCC push_options
> +#ifdef __x86_64__
> +#pragma GCC target("sse2")
> +#else
> #pragma GCC target("sse2,mmx")
> +#endif
> #define __DISABLE_SSE2__
> #endif /* __SSE2__ */
>
> @@ -427,7 +431,11 @@ _m_psubd (__m64 __m1, __m64 __m2)
> /* Add the 64-bit values in M1 to the 64-bit values in M2. */
> #ifndef __SSE2__
> #pragma GCC push_options
> +#ifdef __x86_64__
> +#pragma GCC target("sse2")
> +#else
> #pragma GCC target("sse2,mmx")
> +#endif
> #define __DISABLE_SSE2__
> #endif /* __SSE2__ */
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr82483-1.c b/gcc/testsuite/gcc.target/i386/pr82483-1.c
> index 59a59dc8dfe..b2028d8dc5e 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82483-1.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82483-1.c
> @@ -1,7 +1,7 @@
> /* PR target/82483 */
> /* { dg-do compile } */
> /* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
> -/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
> +/* { dg-error "needs isa option" "" { target ia32 } 0 } */
>
> #include <x86intrin.h>
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr82483-2.c b/gcc/testsuite/gcc.target/i386/pr82483-2.c
> index 305ddbd6c64..c92de405cb3 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82483-2.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82483-2.c
> @@ -1,7 +1,7 @@
> /* PR target/82483 */
> /* { dg-do compile } */
> /* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
> -/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
> +/* { dg-error "needs isa option" "" { target ia32 } 0 } */
>
> #include <x86intrin.h>
>
> --
> 2.20.1
>
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 25/40] i386: Emulate MMX movntq with SSE2 movntidi
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (38 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 37/40] i386: Allow MMX intrinsic emulation with SSE H.J. Lu
@ 2019-02-14 12:33 ` H.J. Lu
2019-02-14 14:17 ` Uros Bizjak
2019-02-14 18:18 ` [PATCH 41/40] Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE Uros Bizjak
40 siblings, 1 reply; 62+ messages in thread
From: H.J. Lu @ 2019-02-14 12:33 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX movntq with SSE2 movntidi. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/mmx.md (sse_movntq): Add SSE2 emulation.
---
gcc/config/i386/mmx.md | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 0c08aebb071..274e895f51e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -214,12 +214,16 @@
})
(define_insn "sse_movntq"
- [(set (match_operand:DI 0 "memory_operand" "=m")
- (unspec:DI [(match_operand:DI 1 "register_operand" "y")]
+ [(set (match_operand:DI 0 "memory_operand" "=m,m")
+ (unspec:DI [(match_operand:DI 1 "register_operand" "y,r")]
UNSPEC_MOVNTQ))]
- "TARGET_SSE || TARGET_3DNOW_A"
- "movntq\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmxmov")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "@
+ movntq\t{%1, %0|%0, %1}
+ movnti\t{%1, %0|%0, %1}"
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "mmxmov,ssemov")
(set_attr "mode" "DI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
--
2.20.1
^ permalink raw reply [flat|nested] 62+ messages in thread
* Re: [PATCH 25/40] i386: Emulate MMX movntq with SSE2 movntidi
2019-02-14 12:33 ` [PATCH 25/40] i386: Emulate MMX movntq with SSE2 movntidi H.J. Lu
@ 2019-02-14 14:17 ` Uros Bizjak
0 siblings, 0 replies; 62+ messages in thread
From: Uros Bizjak @ 2019-02-14 14:17 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On Thu, Feb 14, 2019 at 1:30 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Emulate MMX movntq with SSE2 movntidi. Only SSE register source operand
> is allowed.
There is no SSE register source operand. Probably "Only register
source operand is allowed."
Uros.
>
> PR target/89021
> * config/i386/mmx.md (sse_movntq): Add SSE2 emulation.
> ---
> gcc/config/i386/mmx.md | 14 +++++++++-----
> 1 file changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 0c08aebb071..274e895f51e 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -214,12 +214,16 @@
> })
>
> (define_insn "sse_movntq"
> - [(set (match_operand:DI 0 "memory_operand" "=m")
> - (unspec:DI [(match_operand:DI 1 "register_operand" "y")]
> + [(set (match_operand:DI 0 "memory_operand" "=m,m")
> + (unspec:DI [(match_operand:DI 1 "register_operand" "y,r")]
> UNSPEC_MOVNTQ))]
> - "TARGET_SSE || TARGET_3DNOW_A"
> - "movntq\t{%1, %0|%0, %1}"
> - [(set_attr "type" "mmxmov")
> + "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> + && (TARGET_SSE || TARGET_3DNOW_A)"
> + "@
> + movntq\t{%1, %0|%0, %1}
> + movnti\t{%1, %0|%0, %1}"
> + [(set_attr "mmx_isa" "native,x64")
> + (set_attr "type" "mmxmov,ssemov")
> (set_attr "mode" "DI")])
>
> ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> --
> 2.20.1
>
^ permalink raw reply [flat|nested] 62+ messages in thread
* [PATCH 41/40] Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE
2019-02-14 12:31 [PATCH 00/40] V5: Emulate MMX intrinsics with SSE H.J. Lu
` (39 preceding siblings ...)
2019-02-14 12:33 ` [PATCH 25/40] i386: Emulate MMX movntq with SSE2 movntidi H.J. Lu
@ 2019-02-14 18:18 ` Uros Bizjak
40 siblings, 0 replies; 62+ messages in thread
From: Uros Bizjak @ 2019-02-14 18:18 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 527 bytes --]
You will also need the following (untested) patch that prevents
allocation of MMX registers with TARGET_MMX_WITH_SSE in several insn
patterns.
2019-02-14 Uroš Bizjak <ubizjak@gmail.com>
PR target/89021
* config/i386/i386.md (*zero_extendsidi2): Add mmx_isa attribute.
* config/i386/sse.md (*vec_concatv2sf_sse4_1): Ditto.
(*vec_concatv2sf_sse): Ditto.
(*vec_concatv2si_sse4_1): Ditto.
(*vec_concatv2si): Ditto.
(*vec_concatv4si_0): Ditto.
(*vec_concatv2di_0): Ditto.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 2531 bytes --]
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 268854)
+++ config/i386/i386.md (working copy)
@@ -3855,6 +3855,10 @@
(const_string "avx512bw")
]
(const_string "*")))
+ (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "5,6")
+ (const_string "native")
+ (const_string "*")))
(set (attr "type")
(cond [(eq_attr "alternative" "0,1,2,4")
(const_string "multi")
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 268855)
+++ config/i386/sse.md (working copy)
@@ -7241,6 +7241,10 @@
(const_string "mmxmov")
]
(const_string "sselog")))
+ (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "7,8")
+ (const_string "native")
+ (const_string "*")))
(set (attr "prefix_data16")
(if_then_else (eq_attr "alternative" "3,4")
(const_string "1")
@@ -7276,7 +7280,8 @@
movss\t{%1, %0|%0, %1}
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+ [(set_attr "mmx_isa" "*,*,native,native")
+ (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "V4SF,SF,DI,DI")])
(define_insn "*vec_concatv4sf"
@@ -14549,6 +14554,10 @@
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
[(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
+ (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "8,9")
+ (const_string "native")
+ (const_string "*")))
(set (attr "type")
(cond [(eq_attr "alternative" "7")
(const_string "ssemov")
@@ -14586,6 +14595,7 @@
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
[(set_attr "isa" "sse2,sse2,*,*,*,*")
+ (set_attr "mmx_isa" "*,*,*,*,native,native")
(set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
@@ -14615,7 +14625,8 @@
"@
%vmovq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
+ [(set_attr "mmx_isa" "*,native")
+ (set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex,orig")
(set_attr "mode" "TI")])
@@ -14690,6 +14701,7 @@
%vmovq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
[(set_attr "isa" "x64,*,*")
+ (set_attr "mmx_isa" "*,*,native")
(set_attr "type" "ssemov")
(set_attr "prefix_rex" "1,*,*")
(set_attr "prefix" "maybe_vex,maybe_vex,orig")
^ permalink raw reply [flat|nested] 62+ messages in thread