public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 08/43] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
@ 2019-02-09 13:23 ` H.J. Lu
  2019-02-09 13:24 ` [PATCH 17/43] i386: Emulate MMX mmx_pinsrw " H.J. Lu
                   ` (42 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:23 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX ashr<mode>3/<shift_insn><mode>3 with SSE.  Only SSE register
source operand is allowed.

	PR target/89021
	* config/i386/mmx.md (mmx_ashr<mode>3): Disallow with
	TARGET_MMX_WITH_SSE.
	(mmx_<shift_insn><mode>3): Likewise.
	(ashr<mode>3): New.
	(<shift_insn><mode>3): Likewise.
---
 gcc/config/i386/mmx.md | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ec1c7402603..69c66e968b5 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -995,7 +995,7 @@
         (ashiftrt:MMXMODE24
 	  (match_operand:MMXMODE24 1 "register_operand" "0")
 	  (match_operand:DI 2 "nonmemory_operand" "yN")))]
-  "TARGET_MMX"
+  "TARGET_MMX && !TARGET_MMX_WITH_SSE"
   "psra<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
    (set (attr "length_immediate")
@@ -1009,7 +1009,7 @@
         (any_lshift:MMXMODE248
 	  (match_operand:MMXMODE248 1 "register_operand" "0")
 	  (match_operand:DI 2 "nonmemory_operand" "yN")))]
-  "TARGET_MMX"
+  "TARGET_MMX && !TARGET_MMX_WITH_SSE"
   "p<vshift><mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
    (set (attr "length_immediate")
@@ -1018,6 +1018,40 @@
        (const_string "0")))
    (set_attr "mode" "DI")])
 
+(define_insn "ashr<mode>3"
+  [(set (match_operand:MMXMODE24 0 "register_operand" "=Yx,Yy")
+        (ashiftrt:MMXMODE24
+	  (match_operand:MMXMODE24 1 "register_operand" "0,Yy")
+	  (match_operand:DI 2 "nonmemory_operand" "YxN,YyN")))]
+  "TARGET_MMX_WITH_SSE"
+  "@
+   psra<mmxvecsize>\t{%2, %0|%0, %2}
+   vpsra<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft,sseishft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "TI")])
+
+(define_insn "<shift_insn><mode>3"
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=Yx,Yy")
+        (any_lshift:MMXMODE248
+	  (match_operand:MMXMODE248 1 "register_operand" "0,Yy")
+	  (match_operand:DI 2 "nonmemory_operand" "YxN,YyN")))]
+  "TARGET_MMX_WITH_SSE"
+  "@
+   p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
+   vp<vshift><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft,sseishft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "TI")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel integral comparisons
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 07/43] i386: Emulate MMX mmx_pmaddwd with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (2 preceding siblings ...)
  2019-02-09 13:24 ` [PATCH 03/43] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX H.J. Lu
@ 2019-02-09 13:24 ` H.J. Lu
  2019-02-09 13:24 ` [PATCH 09/43] i386: Emulate MMX <any_logic><mode>3 " H.J. Lu
                   ` (39 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX pmaddwd with SSE.  Only SSE register source operand is
allowed.

	PR target/89021
	* config/i386/mmx.md (mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE.
	(*mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE.  Add SSE support.
---
 gcc/config/i386/mmx.md | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 65883a68531..ec1c7402603 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -855,20 +855,20 @@
 	    (sign_extend:V2SI
 	      (vec_select:V2HI (match_dup 2)
 		(parallel [(const_int 1) (const_int 3)]))))))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_pmaddwd"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yx,Yy")
         (plus:V2SI
 	  (mult:V2SI
 	    (sign_extend:V2SI
 	      (vec_select:V2HI
-		(match_operand:V4HI 1 "nonimmediate_operand" "%0")
+		(match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy")
 		(parallel [(const_int 0) (const_int 2)])))
 	    (sign_extend:V2SI
 	      (vec_select:V2HI
-		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")
 		(parallel [(const_int 0) (const_int 2)]))))
 	  (mult:V2SI
 	    (sign_extend:V2SI
@@ -877,10 +877,15 @@
 	    (sign_extend:V2SI
 	      (vec_select:V2HI (match_dup 2)
 		(parallel [(const_int 1) (const_int 3)]))))))]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmaddwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmaddwd\t{%2, %0|%0, %2}
+   pmaddwd\t{%2, %0|%0, %2}
+   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pmulhrwv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 11/43] i386: Emulate MMX mmx_eq/mmx_gt<mode>3 with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (9 preceding siblings ...)
  2019-02-09 13:24 ` [PATCH 14/43] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi " H.J. Lu
@ 2019-02-09 13:24 ` H.J. Lu
  2019-02-09 13:24 ` [PATCH 12/43] i386: Emulate MMX vec_dupv2si " H.J. Lu
                   ` (32 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX mmx_eq/mmx_gt<mode>3 with SSE.  Only SSE register source
operand is allowed.

	PR target/89021
	* config/i386/mmx.md (mmx_eq<mode>3): Also allow
	TARGET_MMX_WITH_SSE.
	(*mmx_eq<mode>3): Also allow TARGET_MMX_WITH_SSE.  Add SSE
	support.
	(mmx_gt<mode>3): Likewise.
---
 gcc/config/i386/mmx.md | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 1e235bfcde4..73fdef3ba1e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1063,28 +1063,37 @@
         (eq:MMXMODEI
 	  (match_operand:MMXMODEI 1 "nonimmediate_operand")
 	  (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
 
 (define_insn "*mmx_eq<mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yx,Yy")
         (eq:MMXMODEI
-	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
-	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
-  "pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0,0,Yy")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+  "@
+   pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
+   pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
+   vpcmpeq<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_gt<mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yx,Yy")
         (gt:MMXMODEI
-	  (match_operand:MMXMODEI 1 "register_operand" "0")
-	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
+	  (match_operand:MMXMODEI 1 "register_operand" "0,0,Yy")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
+   pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
+   vpcmpgt<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+   (set_attr "mode" "DI,TI,TI")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 00/43] V2: Emulate MMX intrinsics with SSE
@ 2019-02-09 13:24 H.J. Lu
  2019-02-09 13:23 ` [PATCH 08/43] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 " H.J. Lu
                   ` (43 more replies)
  0 siblings, 44 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

On x86-64, since __m64 is returned and passed in XMM registers, we can
emulate MMX intrinsics with SSE instructions. To support it, we added

 #define TARGET_MMX_WITH_SSE \
  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)

SSE emulation is disabled for 3DNOW since 3DNOW patterns haven't been
updated with SSE emulation.

;; Define instruction set of MMX instructions
(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" (const_string "base"))

         (eq_attr "mmx_isa" "native")
           (symbol_ref "!TARGET_MMX_WITH_SSE")
         (eq_attr "mmx_isa" "x64")
           (symbol_ref "TARGET_MMX_WITH_SSE")
         (eq_attr "mmx_isa" "x64_avx")
           (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
         (eq_attr "mmx_isa" "x64_noavx")
           (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")

(define_register_constraint "Yx" "TARGET_MMX_WITH_SSE ? SSE_REGS : NO_REGS"
 "@internal Any SSE register if MMX is disabled in 64-bit mode.")

(define_register_constraint "Yy"
 "TARGET_MMX_WITH_SSE ? (TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS) : NO_REGS"
 "@internal Any EVEX encodable SSE register for AVX512VL target, otherwise any SSE register if MMX is disabled in 64-bit mode.")

We added SSE emulation to MMX patterns and disabled MMX alternatives with
TARGET_MMX_WITH_SSE.

Most of MMX instructions have equivalent SSE versions and results of some
SSE versions need to be reshuffled to the right order for MMX.  Thee are
couple tricky cases:

1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
mask operand and handle unmapped bits 64:127 at memory address by
adjusting source and mask operands together with memory address.

2. MMX movntq is emulated with SSE2 DImode movnti, which is available
in 64-bit mode.

3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
SSE emulation must clear the bit 4 in the shuffle control mask.

4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve
the upper 64 bits of destination XMM register.

Tests are also added to check each SSE emulation of MMX intrinsics.

With SSE emulation in 64-bit mode, 8-byte vectorizer is enabled with SSE2.

There are no regressions on i686 and x86-64.  For x86-64, GCC is also
tested with

--with-arch=native --with-cpu=native

on AVX2 and AVX512F machines.

H.J. Lu (43):
  i386: Allow 64-bit vector modes in SSE registers
  i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
  i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
  i386: Emulate MMX plusminus/sat_plusminus with SSE
  i386: Emulate MMX mulv4hi3 with SSE
  i386: Emulate MMX smulv4hi3_highpart with SSE
  i386: Emulate MMX mmx_pmaddwd with SSE
  i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 with SSE
  i386: Emulate MMX <any_logic><mode>3 with SSE
  i386: Emulate MMX mmx_andnot<mode>3 with SSE
  i386: Emulate MMX mmx_eq/mmx_gt<mode>3 with SSE
  i386: Emulate MMX vec_dupv2si with SSE
  i386: Emulate MMX pshufw with SSE
  i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
  i386: Emulate MMX sse_cvtpi2ps with SSE
  i386: Emulate MMX mmx_pextrw with SSE
  i386: Emulate MMX mmx_pinsrw with SSE
  i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
  i386: Emulate MMX mmx_pmovmskb with SSE
  i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
  i386: Emulate MMX maskmovq with SSE2 maskmovdqu
  i386: Emulate MMX mmx_uavgv8qi3 with SSE
  i386: Emulate MMX mmx_uavgv4hi3 with SSE
  i386: Emulate MMX mmx_psadbw with SSE
  i386: Emulate MMX movntq with SSE2 movntidi
  i386: Emulate MMX umulv1siv1di3 with SSE2
  i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 with SSE
  i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE
  i386: Emulate MMX ssse3_pmaddubsw with SSE
  i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
  i386: Emulate MMX pshufb with SSE version
  i386: Emulate MMX ssse3_psign<mode>3 with SSE
  i386: Emulate MMX ssse3_palignrdi with SSE
  i386: Emulate MMX abs<mode>2 with SSE
  i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
  i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
  i386: Allow MMX intrinsic emulation with SSE
  i386: Add tests for MMX intrinsic emulations with SSE
  i386: Also enable SSSE3 __m64 tests in 64-bit mode
  i386: Enable 8-byte vectorizer for TARGET_MMX_WITH_SSE
  i386: Implement V2SF add/sub/mul with SEE
  i386: Implement V2SF <-> V2SI conversions with SEE
  i386: Implement V2SF comparisons with SSE

 gcc/config/i386/constraints.md                |  10 +
 gcc/config/i386/i386-builtin.def              | 126 +--
 gcc/config/i386/i386-protos.h                 |   4 +
 gcc/config/i386/i386.c                        | 186 +++-
 gcc/config/i386/i386.h                        |  20 +-
 gcc/config/i386/i386.md                       |  15 +-
 gcc/config/i386/mmintrin.h                    |  10 +-
 gcc/config/i386/mmx.md                        | 909 +++++++++++++-----
 gcc/config/i386/sse.md                        | 440 +++++++--
 gcc/config/i386/xmmintrin.h                   |  61 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr84512.c       |   2 +-
 gcc/testsuite/gcc.target/i386/mmx-vals.h      |  77 ++
 gcc/testsuite/gcc.target/i386/pr82483-1.c     |   2 +-
 gcc/testsuite/gcc.target/i386/pr82483-2.c     |   2 +-
 gcc/testsuite/gcc.target/i386/pr89028-1.c     |  10 +
 gcc/testsuite/gcc.target/i386/pr89028-10.c    |  39 +
 gcc/testsuite/gcc.target/i386/pr89028-11.c    |  39 +
 gcc/testsuite/gcc.target/i386/pr89028-12.c    |  39 +
 gcc/testsuite/gcc.target/i386/pr89028-13.c    |  39 +
 gcc/testsuite/gcc.target/i386/pr89028-2.c     |  11 +
 gcc/testsuite/gcc.target/i386/pr89028-3.c     |  14 +
 gcc/testsuite/gcc.target/i386/pr89028-4.c     |  14 +
 gcc/testsuite/gcc.target/i386/pr89028-5.c     |  11 +
 gcc/testsuite/gcc.target/i386/pr89028-6.c     |  14 +
 gcc/testsuite/gcc.target/i386/pr89028-7.c     |  14 +
 gcc/testsuite/gcc.target/i386/pr89028-8.c     |  12 +
 gcc/testsuite/gcc.target/i386/pr89028-9.c     |  12 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-10.c   |  42 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-11.c   |  39 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-12.c   |  41 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-13.c   |  40 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-14.c   |  30 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-15.c   |  35 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-16.c   |  39 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-17.c   |  50 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-18.c   |  13 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-19.c   |  11 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-2.c    |  12 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-20.c   |  11 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-21.c   |  13 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-3.c    |  12 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-4.c    |   4 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-5.c    |  12 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-6.c    |  12 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-7.c    |  12 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-8.c    |   4 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-9.c    |  79 ++
 .../gcc.target/i386/sse2-mmx-cvtpi2ps.c       |  42 +
 .../gcc.target/i386/sse2-mmx-cvtps2pi.c       |  35 +
 .../gcc.target/i386/sse2-mmx-cvttps2pi.c      |  35 +
 .../gcc.target/i386/sse2-mmx-maskmovq.c       |  98 ++
 .../gcc.target/i386/sse2-mmx-packssdw.c       |  51 +
 .../gcc.target/i386/sse2-mmx-packsswb.c       |  51 +
 .../gcc.target/i386/sse2-mmx-packuswb.c       |  51 +
 .../gcc.target/i386/sse2-mmx-paddb.c          |  47 +
 .../gcc.target/i386/sse2-mmx-paddd.c          |  47 +
 .../gcc.target/i386/sse2-mmx-paddq.c          |  42 +
 .../gcc.target/i386/sse2-mmx-paddsb.c         |  47 +
 .../gcc.target/i386/sse2-mmx-paddsw.c         |  47 +
 .../gcc.target/i386/sse2-mmx-paddusb.c        |  47 +
 .../gcc.target/i386/sse2-mmx-paddusw.c        |  47 +
 .../gcc.target/i386/sse2-mmx-paddw.c          |  47 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c |  43 +
 .../gcc.target/i386/sse2-mmx-pandn.c          |  43 +
 .../gcc.target/i386/sse2-mmx-pavgb.c          |  51 +
 .../gcc.target/i386/sse2-mmx-pavgw.c          |  51 +
 .../gcc.target/i386/sse2-mmx-pcmpeqb.c        |  47 +
 .../gcc.target/i386/sse2-mmx-pcmpeqd.c        |  47 +
 .../gcc.target/i386/sse2-mmx-pcmpeqw.c        |  47 +
 .../gcc.target/i386/sse2-mmx-pcmpgtb.c        |  47 +
 .../gcc.target/i386/sse2-mmx-pcmpgtd.c        |  47 +
 .../gcc.target/i386/sse2-mmx-pcmpgtw.c        |  47 +
 .../gcc.target/i386/sse2-mmx-pextrw.c         |  58 ++
 .../gcc.target/i386/sse2-mmx-pinsrw.c         |  60 ++
 .../gcc.target/i386/sse2-mmx-pmaddwd.c        |  46 +
 .../gcc.target/i386/sse2-mmx-pmaxsw.c         |  47 +
 .../gcc.target/i386/sse2-mmx-pmaxub.c         |  47 +
 .../gcc.target/i386/sse2-mmx-pminsw.c         |  47 +
 .../gcc.target/i386/sse2-mmx-pminub.c         |  47 +
 .../gcc.target/i386/sse2-mmx-pmovmskb.c       |  45 +
 .../gcc.target/i386/sse2-mmx-pmulhuw.c        |  50 +
 .../gcc.target/i386/sse2-mmx-pmulhw.c         |  52 +
 .../gcc.target/i386/sse2-mmx-pmullw.c         |  51 +
 .../gcc.target/i386/sse2-mmx-pmuludq.c        |  46 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-por.c  |  43 +
 .../gcc.target/i386/sse2-mmx-psadbw.c         |  57 ++
 .../gcc.target/i386/sse2-mmx-pshufw.c         | 247 +++++
 .../gcc.target/i386/sse2-mmx-pslld.c          |  51 +
 .../gcc.target/i386/sse2-mmx-pslldi.c         | 152 +++
 .../gcc.target/i386/sse2-mmx-psllq.c          |  46 +
 .../gcc.target/i386/sse2-mmx-psllqi.c         | 244 +++++
 .../gcc.target/i386/sse2-mmx-psllw.c          |  51 +
 .../gcc.target/i386/sse2-mmx-psllwi.c         | 104 ++
 .../gcc.target/i386/sse2-mmx-psrad.c          |  51 +
 .../gcc.target/i386/sse2-mmx-psradi.c         | 152 +++
 .../gcc.target/i386/sse2-mmx-psraw.c          |  51 +
 .../gcc.target/i386/sse2-mmx-psrawi.c         | 104 ++
 .../gcc.target/i386/sse2-mmx-psrld.c          |  51 +
 .../gcc.target/i386/sse2-mmx-psrldi.c         | 152 +++
 .../gcc.target/i386/sse2-mmx-psrlq.c          |  46 +
 .../gcc.target/i386/sse2-mmx-psrlqi.c         | 244 +++++
 .../gcc.target/i386/sse2-mmx-psrlw.c          |  51 +
 .../gcc.target/i386/sse2-mmx-psrlwi.c         | 104 ++
 .../gcc.target/i386/sse2-mmx-psubb.c          |  47 +
 .../gcc.target/i386/sse2-mmx-psubd.c          |  47 +
 .../gcc.target/i386/sse2-mmx-psubq.c          |  42 +
 .../gcc.target/i386/sse2-mmx-psubusb.c        |  47 +
 .../gcc.target/i386/sse2-mmx-psubusw.c        |  47 +
 .../gcc.target/i386/sse2-mmx-psubw.c          |  47 +
 .../gcc.target/i386/sse2-mmx-punpckhbw.c      |  52 +
 .../gcc.target/i386/sse2-mmx-punpckhdq.c      |  46 +
 .../gcc.target/i386/sse2-mmx-punpckhwd.c      |  48 +
 .../gcc.target/i386/sse2-mmx-punpcklbw.c      |  52 +
 .../gcc.target/i386/sse2-mmx-punpckldq.c      |  46 +
 .../gcc.target/i386/sse2-mmx-punpcklwd.c      |  48 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c |  43 +
 gcc/testsuite/gcc.target/i386/sse2-mmx.c      |   1 -
 gcc/testsuite/gcc.target/i386/ssse3-pabsb.c   |   4 +-
 gcc/testsuite/gcc.target/i386/ssse3-pabsd.c   |   4 +-
 gcc/testsuite/gcc.target/i386/ssse3-pabsw.c   |   4 +-
 gcc/testsuite/gcc.target/i386/ssse3-palignr.c |   6 +-
 gcc/testsuite/gcc.target/i386/ssse3-phaddd.c  |   4 +-
 gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c |   4 +-
 gcc/testsuite/gcc.target/i386/ssse3-phaddw.c  |   4 +-
 gcc/testsuite/gcc.target/i386/ssse3-phsubd.c  |   4 +-
 gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c |   4 +-
 gcc/testsuite/gcc.target/i386/ssse3-phsubw.c  |   4 +-
 .../gcc.target/i386/ssse3-pmaddubsw.c         |   4 +-
 .../gcc.target/i386/ssse3-pmulhrsw.c          |   4 +-
 gcc/testsuite/gcc.target/i386/ssse3-pshufb.c  |   6 +-
 gcc/testsuite/gcc.target/i386/ssse3-psignb.c  |   4 +-
 gcc/testsuite/gcc.target/i386/ssse3-psignd.c  |   4 +-
 gcc/testsuite/gcc.target/i386/ssse3-psignw.c  |   4 +-
 133 files changed, 6675 insertions(+), 450 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/mmx-vals.h
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-9.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-18.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c

-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 06/43] i386: Emulate MMX smulv4hi3_highpart with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (7 preceding siblings ...)
  2019-02-09 13:24 ` [PATCH 15/43] i386: Emulate MMX sse_cvtpi2ps with SSE H.J. Lu
@ 2019-02-09 13:24 ` H.J. Lu
  2019-02-09 13:24 ` [PATCH 14/43] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi " H.J. Lu
                   ` (34 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX mulv4hi3 with SSE.  Only SSE register source operand is
allowed.

	PR target/89021
	* config/i386/mmx.md (mmx_smulv4hi3_highpart): Also allow
	TARGET_MMX_WITH_SSE.
	(*mmx_smulv4hi3_highpart): Also allow TARGET_MMX_WITH_SSE. Add
	SSE support.
---
 gcc/config/i386/mmx.md | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 2712a86ea3c..65883a68531 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -784,23 +784,28 @@
 	      (sign_extend:V4SI
 		(match_operand:V4HI 2 "nonimmediate_operand")))
 	    (const_int 16))))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_smulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
 	(truncate:V4HI
 	  (lshiftrt:V4SI
 	    (mult:V4SI
 	      (sign_extend:V4SI
-		(match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+		(match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy"))
 	      (sign_extend:V4SI
-		(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))
 	    (const_int 16))))]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmulhw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmulhw\t{%2, %0|%0, %2}
+   pmulhw\t{%2, %0|%0, %2}
+   vpmulhw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_umulv4hi3_highpart"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 15/43] i386: Emulate MMX sse_cvtpi2ps with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (6 preceding siblings ...)
  2019-02-09 13:24 ` [PATCH 02/43] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 H.J. Lu
@ 2019-02-09 13:24 ` H.J. Lu
  2019-02-09 13:24 ` [PATCH 06/43] i386: Emulate MMX smulv4hi3_highpart " H.J. Lu
                   ` (35 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX sse_cvtpi2ps with SSE2 cvtdq2ps, preserving upper 64 bits of
destination XMM register.  Only SSE register source operand is allowed.

	PR target/89021
	* config/i386/mmx.md (UNSPEC_CVTPI2PS): New.
	(sse_cvtpi2ps): Renamed to ...
	(*mmx_cvtpi2ps): This.  Disabled for TARGET_MMX_WITH_SSE.
	(sse_cvtpi2ps): New.
	(mmx_cvtpi2ps_sse): Likewise.
---
 gcc/config/i386/sse.md | 83 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 81 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 4321c5c46db..4503d393dc9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -18,6 +18,9 @@
 ;; <http://www.gnu.org/licenses/>.
 
 (define_c_enum "unspec" [
+  ;; MMX with SSE
+  UNSPEC_CVTPI2PS
+
   ;; SSE
   UNSPEC_MOVNT
 
@@ -4655,14 +4658,90 @@
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "sse_cvtpi2ps"
+(define_expand "sse_cvtpi2ps"
+  [(set (match_operand:V4SF 0 "register_operand")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand")))
+	  (match_operand:V4SF 1 "register_operand")
+	  (const_int 3)))]
+  "TARGET_SSE"
+{
+  if (TARGET_MMX_WITH_SSE)
+    {
+      rtx op2 = force_reg (V2SImode, operands[2]);
+      rtx op3 = gen_reg_rtx (V4SFmode);
+      rtx op4 = gen_reg_rtx (V4SFmode);
+      rtx insn = gen_mmx_cvtpi2ps_sse (operands[0], operands[1], op2,
+				       op3, op4);
+      emit_insn (insn);
+      DONE;
+    }
+})
+
+(define_insn_and_split "mmx_cvtpi2ps_sse"
+  [(set (match_operand:V4SF 0 "register_operand" "=Yx,Yy")
+	(unspec:V4SF [(match_operand:V2SI 2 "register_operand" "Yx,Yy")
+		      (match_operand:V4SF 1 "register_operand" "0,Yy")]
+		     UNSPEC_CVTPI2PS))
+   (set (match_operand:V4SF 3 "register_operand" "=Yx,Yy")
+	(unspec:V4SF [(match_operand:V4SF 4 "register_operand" "3,3")]
+		     UNSPEC_CVTPI2PS))]
+  "TARGET_MMX_WITH_SSE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+  /* Generate SSE2 cvtdq2ps.  */
+  rtx insn = gen_floatv4siv4sf2 (operands[3], op2);
+  emit_insn (insn);
+
+  /* Merge operands[3] with operands[0].  */
+  rtx mask, op1;
+  if (TARGET_AVX)
+    {
+      mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (4, GEN_INT (0), GEN_INT (1),
+					  GEN_INT (6), GEN_INT (7)));
+      op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
+      op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+      insn = gen_rtx_SET (operands[0], op2);
+    }
+  else
+    {
+      /* NB: SSE can only concatenate OP0 and OP3 to OP0.  */
+      mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+					  GEN_INT (4), GEN_INT (5)));
+      op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
+      op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+      insn = gen_rtx_SET (operands[0], op2);
+      emit_insn (insn);
+
+      /* Swap bits 0:63 with bits 64:127.  */
+      mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+					  GEN_INT (0), GEN_INT (1)));
+      rtx dest = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+      op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+      insn = gen_rtx_SET (dest, op1);
+    }
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*mmx_cvtpi2ps"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
 	(vec_merge:V4SF
 	  (vec_duplicate:V4SF
 	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
 	  (match_operand:V4SF 1 "register_operand" "0")
 	  (const_int 3)))]
-  "TARGET_SSE"
+  "TARGET_SSE && !TARGET_MMX_WITH_SSE"
   "cvtpi2ps\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "V4SF")])
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 02/43] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (5 preceding siblings ...)
  2019-02-09 13:24 ` [PATCH 04/43] i386: Emulate MMX plusminus/sat_plusminus " H.J. Lu
@ 2019-02-09 13:24 ` H.J. Lu
  2019-02-09 13:24 ` [PATCH 15/43] i386: Emulate MMX sse_cvtpi2ps with SSE H.J. Lu
                   ` (36 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb
plus moving bits 64:95 to bits 32:63 in SSE register.  Only SSE register
source operand is allowed.

2019-02-08  H.J. Lu  <hongjiu.lu@intel.com>
	    Uros Bizjak  <ubizjak@gmail.com>

	PR target/89021
	* config/i386/constraints.md (Yx): Any SSE register if MMX is
	disabled in 64-bit mode.
	(Yy): Any EVEX encodable SSE register for AVX512VL target,
	otherwise any SSE register if MMX is disabled in 64-bit mode.
	* config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
	New prototype.
	(ix86_split_mmx_pack): Likewise.
	* config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New
	function.
	(ix86_split_mmx_pack): Likewise.
	* config/i386/i386.md (mmx_isa): New.
	(enabled): Also check mmx_isa.
	* config/i386/mmx.md (any_s_truncate): New code iterator.
	(s_trunsuffix): New code attr.
	(mmx_packsswb): Removed.
	(mmx_packssdw): Likewise.
	(mmx_packuswb): Likewise.
	(mmx_pack<s_trunsuffix>swb): New define_insn_and_split to emulate
	MMX packsswb/packuswb with SSE2.
	(mmx_packssdw): Likewise.
---
 gcc/config/i386/constraints.md | 10 +++++
 gcc/config/i386/i386-protos.h  |  3 ++
 gcc/config/i386/i386.c         | 54 +++++++++++++++++++++++++++
 gcc/config/i386/i386.md        | 12 ++++++
 gcc/config/i386/mmx.md         | 67 +++++++++++++++++++---------------
 5 files changed, 116 insertions(+), 30 deletions(-)

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 33921aea267..6e9244ad77f 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -110,6 +110,9 @@
 ;;  v	any EVEX encodable SSE register for AVX512VL target,
 ;;	otherwise any SSE register
 ;;  h	EVEX encodable SSE register with number factor of four
+;;  x	SSE register if MMX is disabled in 64-bit mode
+;;  y	any EVEX encodable SSE register for AVX512VL target, otherwise
+;;      any SSE register if MMX is disabled in 64-bit mode
 
 (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
  "First SSE register (@code{%xmm0}).")
@@ -146,6 +149,13 @@
  "TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
  "@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.")
 
+(define_register_constraint "Yx" "TARGET_MMX_WITH_SSE ? SSE_REGS : NO_REGS"
+ "@internal Any SSE register if MMX is disabled in 64-bit mode.")
+
+(define_register_constraint "Yy"
+ "TARGET_MMX_WITH_SSE ? (TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS) : NO_REGS"
+ "@internal Any EVEX encodable SSE register for AVX512VL target, otherwise any SSE register if MMX is disabled in 64-bit mode.")
+
 ;; We use the B prefix to denote any number of internal operands:
 ;;  f  FLAGS_REG
 ;;  g  GOT memory operand.
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 2d600173917..bb96a420a85 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -200,6 +200,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx);
 
 extern rtx ix86_split_stack_guard (void);
 
+extern void ix86_move_vector_high_sse_to_mmx (rtx);
+extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 #endif	/* TREE_CODE  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 12bc7926f86..cab35bb2242 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19955,6 +19955,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
     gcc_unreachable ();
 }
 
+/* Move bits 64:95 to bits 32:63.  */
+
+void
+ix86_move_vector_high_sse_to_mmx (rtx op)
+{
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (4, GEN_INT (0), GEN_INT (2),
+					  GEN_INT (0), GEN_INT (0)));
+  rtx dest = gen_rtx_REG (V4SImode, REGNO (op));
+  op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  rtx insn = gen_rtx_SET (dest, op);
+  emit_insn (insn);
+}
+
+/* Split MMX pack with signed/unsigned saturation with SSE/SSE2.  */
+
+void
+ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+
+  machine_mode dmode = GET_MODE (op0);
+  machine_mode smode = GET_MODE (op1);
+  machine_mode inner_dmode = GET_MODE_INNER (dmode);
+  machine_mode inner_smode = GET_MODE_INNER (smode);
+
+  /* Get the corresponding SSE mode for destination.  */
+  int nunits = 16 / GET_MODE_SIZE (inner_dmode);
+  machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+					    nunits).require ();
+  machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+						 nunits / 2).require ();
+
+  /* Get the corresponding SSE mode for source.  */
+  nunits = 16 / GET_MODE_SIZE (inner_smode);
+  machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
+					    nunits).require ();
+
+  /* Generate SSE pack with signed/unsigned saturation.  */
+  rtx dest = gen_rtx_REG (sse_dmode, REGNO (op0));
+  op1 = gen_rtx_REG (sse_smode, REGNO (op1));
+  op2 = gen_rtx_REG (sse_smode, REGNO (op2));
+
+  op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
+  op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
+  rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
+						    op1, op2));
+  emit_insn (insn);
+
+  ix86_move_vector_high_sse_to_mmx (op0);
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
    operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4a32144a71a..72685107fc0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -792,6 +792,9 @@
 		    avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
   (const_string "base"))
 
+;; Define instruction set of MMX instructions
+(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" (const_string "base"))
+
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
 	 (eq_attr "isa" "x64_sse2")
@@ -830,6 +833,15 @@
 	 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
 	 (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
 	 (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
+
+	 (eq_attr "mmx_isa" "native")
+	   (symbol_ref "!TARGET_MMX_WITH_SSE")
+	 (eq_attr "mmx_isa" "x64")
+	   (symbol_ref "TARGET_MMX_WITH_SSE")
+	 (eq_attr "mmx_isa" "x64_avx")
+	   (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
+	 (eq_attr "mmx_isa" "x64_noavx")
+	   (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
 	]
 	(const_int 1)))
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c1e0f2c411e..5c28d935e82 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -58,6 +58,11 @@
 ;; Mapping from integer vector mode to mnemonic suffix
 (define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
 
+;; Used in signed and unsigned truncations with saturation.
+(define_code_iterator any_s_truncate [ss_truncate us_truncate])
+;; Instruction suffix for truncations with saturation.
+(define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Move patterns
@@ -1046,41 +1051,43 @@
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "mmx_packsswb"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_pack<s_trunsuffix>swb"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
 	(vec_concat:V8QI
-	  (ss_truncate:V4QI
-	    (match_operand:V4HI 1 "register_operand" "0"))
-	  (ss_truncate:V4QI
-	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
-  "TARGET_MMX"
-  "packsswb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+	  (any_s_truncate:V4QI
+	    (match_operand:V4HI 1 "register_operand" "0,0,Yy"))
+	  (any_s_truncate:V4QI
+	    (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy"))))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pack<s_trunsuffix>swb\t{%2, %0|%0, %2}
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+  "ix86_split_mmx_pack (operands, <any_s_truncate:CODE>);"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_packssdw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_packssdw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
 	(vec_concat:V4HI
 	  (ss_truncate:V2HI
-	    (match_operand:V2SI 1 "register_operand" "0"))
+	    (match_operand:V2SI 1 "register_operand" "0,0,Yy"))
 	  (ss_truncate:V2HI
-	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))]
-  "TARGET_MMX"
-  "packssdw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_packuswb"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-	(vec_concat:V8QI
-	  (us_truncate:V4QI
-	    (match_operand:V4HI 1 "register_operand" "0"))
-	  (us_truncate:V4QI
-	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
-  "TARGET_MMX"
-  "packuswb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+	    (match_operand:V2SI 2 "nonimmediate_operand" "ym,Yx,Yy"))))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   packssdw\t{%2, %0|%0, %2}
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+  "ix86_split_mmx_pack (operands, SS_TRUNCATE);"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_punpckhbw"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 09/43] i386: Emulate MMX <any_logic><mode>3 with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (3 preceding siblings ...)
  2019-02-09 13:24 ` [PATCH 07/43] i386: Emulate MMX mmx_pmaddwd with SSE H.J. Lu
@ 2019-02-09 13:24 ` H.J. Lu
  2019-02-10 10:27   ` Uros Bizjak
  2019-02-09 13:24 ` [PATCH 04/43] i386: Emulate MMX plusminus/sat_plusminus " H.J. Lu
                   ` (38 subsequent siblings)
  43 siblings, 1 reply; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX <any_logic><mode>3 with SSE.  Only SSE register source
operand is allowed.

	PR target/89021
	* config/i386/mmx.md (any_logic:<code><mode>3): New.
	(any_logic:*mmx_<code><mode>3): Also allow TARGET_MMX_WITH_SSE.
	Add SSE support.
---
 gcc/config/i386/mmx.md | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 69c66e968b5..fae2e43af24 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1110,15 +1110,28 @@
   "TARGET_MMX"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
+(define_expand "<code><mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+	(any_logic:MMXMODEI
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
 (define_insn "*mmx_<code><mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yx,Yy")
         (any_logic:MMXMODEI
-	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
-	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "p<logic>\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0,0,Yy")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "@
+   p<logic>\t{%2, %0|%0, %2}
+   p<logic>\t{%2, %0|%0, %2}
+   vp<logic>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 12/43] i386: Emulate MMX vec_dupv2si with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (10 preceding siblings ...)
  2019-02-09 13:24 ` [PATCH 11/43] i386: Emulate MMX mmx_eq/mmx_gt<mode>3 " H.J. Lu
@ 2019-02-09 13:24 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 34/43] i386: Emulate MMX abs<mode>2 " H.J. Lu
                   ` (31 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX vec_dupv2si with SSE.  Only SSE register source operand is
allowed.

	PR target/89021
	* config/i386/mmx.md (*vec_dupv2si): Changed to
	define_insn_and_split and also allow TARGET_MMX_WITH_SSE to
	support SSE emulation.
	* config/i386/sse.md (*vec_dupv4si): Renamed to ...
	(vec_dupv4si): This.
---
 gcc/config/i386/mmx.md | 27 ++++++++++++++++++++-------
 gcc/config/i386/sse.md |  2 +-
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 73fdef3ba1e..e31c3f5c366 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1420,14 +1420,27 @@
    (set_attr "length_immediate" "1")
    (set_attr "mode" "DI")])
 
-(define_insn "*vec_dupv2si"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yx,Yy")
 	(vec_duplicate:V2SI
-	  (match_operand:SI 1 "register_operand" "0")))]
-  "TARGET_MMX"
-  "punpckldq\t%0, %0"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+	  (match_operand:SI 1 "register_operand" "0,0,Yy")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t%0, %0
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+{
+  /* Emulate MMX vec_dupv2si with SSE vec_dupv4si.  */
+  rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+  rtx insn = gen_vec_dupv4si (op0, operands[1]);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,ssemov,ssemov")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "*mmx_concatv2si"
   [(set (match_operand:V2SI 0 "register_operand"     "=y,y")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5dc0930ac1f..7d2c0367911 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -18976,7 +18976,7 @@
    (set_attr "prefix" "maybe_evex,maybe_evex,orig")
    (set_attr "mode" "V4SF")])
 
-(define_insn "*vec_dupv4si"
+(define_insn "vec_dupv4si"
   [(set (match_operand:V4SI 0 "register_operand"     "=v,v,x")
 	(vec_duplicate:V4SI
 	  (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 04/43] i386: Emulate MMX plusminus/sat_plusminus with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (4 preceding siblings ...)
  2019-02-09 13:24 ` [PATCH 09/43] i386: Emulate MMX <any_logic><mode>3 " H.J. Lu
@ 2019-02-09 13:24 ` H.J. Lu
  2019-02-09 14:19   ` Uros Bizjak
  2019-02-09 13:24 ` [PATCH 02/43] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 H.J. Lu
                   ` (37 subsequent siblings)
  43 siblings, 1 reply; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX plusminus/sat_plusminus with SSE.  Only SSE register source
operand is allowed.

2019-02-08  H.J. Lu  <hongjiu.lu@intel.com>
	    Uros Bizjak  <ubizjak@gmail.com>

	PR target/89021
	* config/i386/mmx.md (MMXMODEI8): Require TARGET_SSE2 for V1DI.
	(<plusminus_insn><mode>3): New.
	(*mmx_<plusminus_insn><mode>3): Changed to define_insn_and_split
	to support SSE emulation.
	(*mmx_<plusminus_insn><mode>3): Likewise.
	(mmx_<plusminus_insn><mode>3): Also allow TARGET_MMX_WITH_SSE.
---
 gcc/config/i386/mmx.md | 51 +++++++++++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 16 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 1d5ed83e7b2..01a71aa128b 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -45,7 +45,7 @@
 
 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
 (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
-(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
 
 ;; All 8-byte vector modes handled by MMX
 (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
@@ -698,34 +698,53 @@
   "TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
+(define_expand "<plusminus_insn><mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+	(plusminus:MMXMODEI
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
 (define_insn "*mmx_<plusminus_insn><mode>3"
-  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,Yx,Yy")
         (plusminus:MMXMODEI8
-	  (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0")
-	  (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
+	  (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,Yy")
+	  (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "(TARGET_MMX
+    || TARGET_MMX_WITH_SSE
+    || (TARGET_SSE2 && <MODE>mode == V1DImode))
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+   vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_<plusminus_insn><mode>3"
   [(set (match_operand:MMXMODE12 0 "register_operand")
 	(sat_plusminus:MMXMODE12
 	  (match_operand:MMXMODE12 1 "nonimmediate_operand")
 	  (match_operand:MMXMODE12 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*mmx_<plusminus_insn><mode>3"
-  [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODE12 0 "register_operand" "=y,Yx,Yy")
         (sat_plusminus:MMXMODE12
-	  (match_operand:MMXMODE12 1 "nonimmediate_operand" "<comm>0")
-	  (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+	  (match_operand:MMXMODE12 1 "nonimmediate_operand" "<comm>0,0,Yy")
+	  (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "@
+   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+   vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 14/43] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (8 preceding siblings ...)
  2019-02-09 13:24 ` [PATCH 06/43] i386: Emulate MMX smulv4hi3_highpart " H.J. Lu
@ 2019-02-09 13:24 ` H.J. Lu
  2019-02-09 14:28   ` Uros Bizjak
  2019-02-09 13:24 ` [PATCH 11/43] i386: Emulate MMX mmx_eq/mmx_gt<mode>3 " H.J. Lu
                   ` (33 subsequent siblings)
  43 siblings, 1 reply; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE.

	PR target/89021
	* config/i386/mmx.md (sse_cvtps2pi): Add SSE emulation.
	(sse_cvttps2pi): Likewise.
---
 gcc/config/i386/sse.md | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7d2c0367911..4321c5c46db 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4668,26 +4668,32 @@
    (set_attr "mode" "V4SF")])
 
 (define_insn "sse_cvtps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yy")
 	(vec_select:V2SI
-	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YyBm")]
 		       UNSPEC_FIX_NOTRUNC)
 	  (parallel [(const_int 0) (const_int 1)])))]
   "TARGET_SSE"
-  "cvtps2pi\t{%1, %0|%0, %q1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
+  "@
+   cvtps2pi\t{%1, %0|%0, %q1}
+   %vcvtps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx,*")
    (set_attr "mode" "DI")])
 
 (define_insn "sse_cvttps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yy")
 	(vec_select:V2SI
-	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YyBm"))
 	  (parallel [(const_int 0) (const_int 1)])))]
   "TARGET_SSE"
-  "cvttps2pi\t{%1, %0|%0, %q1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
+  "@
+   cvttps2pi\t{%1, %0|%0, %q1}
+   %vcvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx,*")
    (set_attr "prefix_rep" "0")
    (set_attr "mode" "SF")])
 
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 03/43] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
  2019-02-09 13:23 ` [PATCH 08/43] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 " H.J. Lu
  2019-02-09 13:24 ` [PATCH 17/43] i386: Emulate MMX mmx_pinsrw " H.J. Lu
@ 2019-02-09 13:24 ` H.J. Lu
  2019-02-09 13:24 ` [PATCH 07/43] i386: Emulate MMX mmx_pmaddwd with SSE H.J. Lu
                   ` (40 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX.  For MMX punpckhXX,
move bits 64:127 to bits 0:63 in SSE register.  Only SSE register source
operand is allowed.

	PR target/89021
	* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
	prototype.
	* config/i386/i386.c (ix86_split_mmx_punpck): New function.
	* config/i386/mmx.m (mmx_punpckhbw): Changed to
	define_insn_and_split to support SSE emulation.
	(mmx_punpcklbw): Likewise.
	(mmx_punpckhwd): Likewise.
	(mmx_punpcklwd): Likewise.
	(mmx_punpckhdq): Likewise.
	(mmx_punpckldq): Likewise.
---
 gcc/config/i386/i386-protos.h |   1 +
 gcc/config/i386/i386.c        |  77 +++++++++++++++++++
 gcc/config/i386/mmx.md        | 138 ++++++++++++++++++++++------------
 3 files changed, 168 insertions(+), 48 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index bb96a420a85..dc7fc38d8e4 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -202,6 +202,7 @@ extern rtx ix86_split_stack_guard (void);
 
 extern void ix86_move_vector_high_sse_to_mmx (rtx);
 extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+extern void ix86_split_mmx_punpck (rtx[], bool);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cab35bb2242..6e67ac346dd 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20009,6 +20009,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
   ix86_move_vector_high_sse_to_mmx (op0);
 }
 
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX.  */
+
+void
+ix86_split_mmx_punpck (rtx operands[], bool high_p)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  machine_mode mode = GET_MODE (op0);
+  rtx mask;
+  /* The corresponding SSE mode.  */
+  machine_mode sse_mode, double_sse_mode;
+
+  switch (mode)
+    {
+    case E_V8QImode:
+      sse_mode = V16QImode;
+      double_sse_mode = V32QImode;
+      mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (16,
+					  GEN_INT (0), GEN_INT (16),
+					  GEN_INT (1), GEN_INT (17),
+					  GEN_INT (2), GEN_INT (18),
+					  GEN_INT (3), GEN_INT (19),
+					  GEN_INT (4), GEN_INT (20),
+					  GEN_INT (5), GEN_INT (21),
+					  GEN_INT (6), GEN_INT (22),
+					  GEN_INT (7), GEN_INT (23)));
+      break;
+
+    case E_V4HImode:
+      sse_mode = V8HImode;
+      double_sse_mode = V16HImode;
+      mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (8,
+					  GEN_INT (0), GEN_INT (8),
+					  GEN_INT (1), GEN_INT (9),
+					  GEN_INT (2), GEN_INT (10),
+					  GEN_INT (3), GEN_INT (11)));
+      break;
+
+    case E_V2SImode:
+      sse_mode = V4SImode;
+      double_sse_mode = V8SImode;
+      mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (4,
+					  GEN_INT (0), GEN_INT (4),
+					  GEN_INT (1), GEN_INT (5)));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Generate SSE punpcklXX.  */
+  rtx dest = gen_rtx_REG (sse_mode, REGNO (op0));
+  op1 = gen_rtx_REG (sse_mode, REGNO (op1));
+  op2 = gen_rtx_REG (sse_mode, REGNO (op2));
+
+  op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
+  op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
+  rtx insn = gen_rtx_SET (dest, op2);
+  emit_insn (insn);
+
+  if (high_p)
+    {
+      /* Move bits 64:127 to bits 0:63.  */
+      mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+					  GEN_INT (0), GEN_INT (0)));
+      dest = gen_rtx_REG (V4SImode, REGNO (dest));
+      op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+      insn = gen_rtx_SET (dest, op1);
+      emit_insn (insn);
+    }
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
    operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 5c28d935e82..1d5ed83e7b2 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1089,87 +1089,129 @@
    (set_attr "type" "mmxshft,sselog,sselog")
    (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
-	    (match_operand:V8QI 1 "register_operand" "0")
-	    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+	    (match_operand:V8QI 1 "register_operand" "0,0,Yy")
+	    (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy"))
           (parallel [(const_int 4) (const_int 12)
                      (const_int 5) (const_int 13)
                      (const_int 6) (const_int 14)
                      (const_int 7) (const_int 15)])))]
-  "TARGET_MMX"
-  "punpckhbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckhbw\t{%2, %0|%0, %2}
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, true);"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpcklbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
-	    (match_operand:V8QI 1 "register_operand" "0")
-	    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+	    (match_operand:V8QI 1 "register_operand" "0,0,Yy")
+	    (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy"))
           (parallel [(const_int 0) (const_int 8)
                      (const_int 1) (const_int 9)
                      (const_int 2) (const_int 10)
                      (const_int 3) (const_int 11)])))]
-  "TARGET_MMX"
-  "punpcklbw\t{%2, %0|%0, %k2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpcklbw\t{%2, %0|%0, %k2}
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, false);"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhwd"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
 	(vec_select:V4HI
 	  (vec_concat:V8HI
-	    (match_operand:V4HI 1 "register_operand" "0")
-	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+	    (match_operand:V4HI 1 "register_operand" "0,0,Yy")
+	    (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy"))
           (parallel [(const_int 2) (const_int 6)
                      (const_int 3) (const_int 7)])))]
-  "TARGET_MMX"
-  "punpckhwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckhwd\t{%2, %0|%0, %2}
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, true);"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpcklwd"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
 	(vec_select:V4HI
 	  (vec_concat:V8HI
-	    (match_operand:V4HI 1 "register_operand" "0")
-	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+	    (match_operand:V4HI 1 "register_operand" "0,0,Yy")
+	    (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy"))
           (parallel [(const_int 0) (const_int 4)
                      (const_int 1) (const_int 5)])))]
-  "TARGET_MMX"
-  "punpcklwd\t{%2, %0|%0, %k2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpcklwd\t{%2, %0|%0, %k2}
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, false);"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhdq"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhdq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yx,Yy")
 	(vec_select:V2SI
 	  (vec_concat:V4SI
-	    (match_operand:V2SI 1 "register_operand" "0")
-	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+	    (match_operand:V2SI 1 "register_operand" "0,0,Yy")
+	    (match_operand:V2SI 2 "nonimmediate_operand" "ym,Yx,Yy"))
 	  (parallel [(const_int 1)
 		     (const_int 3)])))]
-  "TARGET_MMX"
-  "punpckhdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckhdq\t{%2, %0|%0, %2}
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, true);"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckldq"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckldq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yx,Yy")
 	(vec_select:V2SI
 	  (vec_concat:V4SI
-	    (match_operand:V2SI 1 "register_operand" "0")
-	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+	    (match_operand:V2SI 1 "register_operand" "0,0,Yy")
+	    (match_operand:V2SI 2 "nonimmediate_operand" "ym,Yx,Yy"))
 	  (parallel [(const_int 0)
 		     (const_int 2)])))]
-  "TARGET_MMX"
-  "punpckldq\t{%2, %0|%0, %k2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t{%2, %0|%0, %k2}
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, false);"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pinsrw"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 17/43] i386: Emulate MMX mmx_pinsrw with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
  2019-02-09 13:23 ` [PATCH 08/43] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 " H.J. Lu
@ 2019-02-09 13:24 ` H.J. Lu
  2019-02-09 13:24 ` [PATCH 03/43] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX H.J. Lu
                   ` (41 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX mmx_pinsrw with SSE.  Only SSE register source operand is
allowed.

	PR target/89021
	* config/i386/mmx.md (mmx_pinsrw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index fced8fd4a10..ad33e587352 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1328,25 +1328,36 @@
 })
 
 (define_insn "*mmx_pinsrw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
         (vec_merge:V4HI
           (vec_duplicate:V4HI
-            (match_operand:HI 2 "nonimmediate_operand" "rm"))
-	  (match_operand:V4HI 1 "register_operand" "0")
+            (match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
+	  (match_operand:V4HI 1 "register_operand" "0,0,Yy")
           (match_operand:SI 3 "const_int_operand")))]
   "(TARGET_SSE || TARGET_3DNOW_A)
    && ((unsigned) exact_log2 (INTVAL (operands[3]))
        < GET_MODE_NUNITS (V4HImode))"
 {
   operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
-  if (MEM_P (operands[2]))
-    return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+    {
+      if (MEM_P (operands[2]))
+	return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+      else
+	return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+    }
   else
-    return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+    {
+      if (MEM_P (operands[2]))
+	return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+      else
+	return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+    }
 }
-  [(set_attr "type" "mmxcvt")
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
    (set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_pextrw"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 43/43] i386: Implement V2SF comparisons with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (12 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 34/43] i386: Emulate MMX abs<mode>2 " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 23/43] i386: Emulate MMX mmx_uavgv4hi3 " H.J. Lu
                   ` (29 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

In 64-bit mode, implement V2SF comparisons with SEE.  Only SSE register
source operand is allowed.

gcc/

	PR target/89028
	* config/i386/sse.md (V_128_64): New mode iterator.
	(VF_128_64): Likewise.
	(sseintvecmode): Add V2SF.
	(sseintvecmodelower): Likewise.
	(*sse_maskcmpv2sf3_comm): New.
	(*sse_maskcmpv2sf3): Likewise.
	(vcond<V_128:mode><VF_128:mode>): Renamed to ...
	(vcond<V_128_64:mode><VF_128_64:mode>): This.

gcc/testsuite/

	PR target/89028
	* gcc.target/i386/pr89028-10.c: New test.
	* gcc.target/i386/pr89028-11.c: Likewise.
	* gcc.target/i386/pr89028-12.c: Likewise.
	* gcc.target/i386/pr89028-13.c: Likewise.
---
 gcc/config/i386/sse.md                     | 61 ++++++++++++++++++----
 gcc/testsuite/gcc.target/i386/pr89028-10.c | 39 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89028-11.c | 39 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89028-12.c | 39 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89028-13.c | 39 ++++++++++++++
 5 files changed, 208 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-13.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index fe63239f53f..90097b5aa83 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -246,6 +246,12 @@
 (define_mode_iterator V_128
   [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
 
+;; All 128bit and 64bit vector modes
+(define_mode_iterator V_128_64
+  [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")
+   (V8QI "TARGET_MMX_WITH_SSE") (V4HI "TARGET_MMX_WITH_SSE")
+   (V2SI "TARGET_MMX_WITH_SSE") (V2SF "TARGET_MMX_WITH_SSE")])
+
 ;; All 256bit vector modes
 (define_mode_iterator V_256
   [V32QI V16HI V8SI V4DI V8SF V4DF])
@@ -302,6 +308,10 @@
 (define_mode_iterator VF_128
   [V4SF (V2DF "TARGET_SSE2")])
 
+;; All 128bit and 64bit vector float modes
+(define_mode_iterator VF_128_64
+  [V4SF (V2DF "TARGET_SSE2") (V2SF "TARGET_MMX_WITH_SSE")])
+
 ;; All 256bit vector float modes
 (define_mode_iterator VF_256
   [V8SF V4DF])
@@ -734,6 +744,7 @@
   [(V16SF "V16SI") (V8DF  "V8DI")
    (V8SF  "V8SI")  (V4DF  "V4DI")
    (V4SF  "V4SI")  (V2DF  "V2DI")
+   (V2SF  "V2SI")
    (V16SI "V16SI") (V8DI  "V8DI")
    (V8SI  "V8SI")  (V4DI  "V4DI")
    (V4SI  "V4SI")  (V2DI  "V2DI")
@@ -749,6 +760,7 @@
   [(V16SF "v16si") (V8DF "v8di")
    (V8SF "v8si") (V4DF "v4di")
    (V4SF "v4si") (V2DF "v2di")
+   (V2SF "v2si")
    (V8SI "v8si") (V4DI "v4di")
    (V4SI "v4si") (V2DI "v2di")
    (V16HI "v16hi") (V8HI "v8hi")
@@ -2766,6 +2778,37 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<ssescalarmode>")])
 
+(define_insn "*sse_maskcmpv2sf3_comm"
+  [(set (match_operand:V2SF 0 "register_operand" "=Yx,Yx")
+	(match_operator:V2SF 3 "sse_comparison_operator"
+	  [(match_operand:V2SF 1 "register_operand" "%0,Yx")
+	   (match_operand:V2SF 2 "register_operand" "Yx,Yx")]))]
+  "TARGET_MMX_WITH_SSE
+   && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
+  "@
+   cmp%D3ps\t{%2, %0|%0, %2}
+   vcmp%D3ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "*sse_maskcmpv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=Yx,Yx")
+	(match_operator:V2SF 3 "sse_comparison_operator"
+	  [(match_operand:V2SF 1 "register_operand" "0,Yx")
+	   (match_operand:V2SF 2 "register_operand" "Yx,Yx")]))]
+  "TARGET_MMX_WITH_SSE"
+  "@
+   cmp%D3ps\t{%2, %0|%0, %2}
+   vcmp%D3ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "SF")])
+
 (define_mode_attr cmp_imm_predicate
   [(V16SF "const_0_to_31_operand")  (V8DF "const_0_to_31_operand")
    (V16SI "const_0_to_7_operand")   (V8DI "const_0_to_7_operand")
@@ -3089,17 +3132,17 @@
   DONE;
 })
 
-(define_expand "vcond<V_128:mode><VF_128:mode>"
-  [(set (match_operand:V_128 0 "register_operand")
-	(if_then_else:V_128
+(define_expand "vcond<V_128_64:mode><VF_128_64:mode>"
+  [(set (match_operand:V_128_64 0 "register_operand")
+	(if_then_else:V_128_64
 	  (match_operator 3 ""
-	    [(match_operand:VF_128 4 "vector_operand")
-	     (match_operand:VF_128 5 "vector_operand")])
-	  (match_operand:V_128 1 "general_operand")
-	  (match_operand:V_128 2 "general_operand")))]
+	    [(match_operand:VF_128_64 4 "vector_operand")
+	     (match_operand:VF_128_64 5 "vector_operand")])
+	  (match_operand:V_128_64 1 "general_operand")
+	  (match_operand:V_128_64 2 "general_operand")))]
   "TARGET_SSE
-   && (GET_MODE_NUNITS (<V_128:MODE>mode)
-       == GET_MODE_NUNITS (<VF_128:MODE>mode))"
+   && (GET_MODE_NUNITS (<V_128_64:MODE>mode)
+       == GET_MODE_NUNITS (<VF_128_64:MODE>mode))"
 {
   bool ok = ix86_expand_fp_vcond (operands);
   gcc_assert (ok);
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-10.c b/gcc/testsuite/gcc.target/i386/pr89028-10.c
new file mode 100644
index 00000000000..fdb14212292
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-10.c
@@ -0,0 +1,39 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "cmpneqps" 5 } } */
+
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef short __v4hi __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+typedef long long __v1di __attribute__ ((__vector_size__ (8)));
+
+__v2sf
+foo1 (__v2sf x, __v2sf y)
+{
+  return x != y;
+}
+
+__v8qi
+foo2 (__v2sf x, __v2sf y)
+{
+  return x != y;
+}
+
+__v4hi
+foo3 (__v2sf x, __v2sf y)
+{
+  return x != y;
+}
+
+__v2si
+foo4 (__v2sf x, __v2sf y)
+{
+  return x != y;
+}
+
+__v1di
+foo5 (__v2sf x, __v2sf y)
+{
+  return x != y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-11.c b/gcc/testsuite/gcc.target/i386/pr89028-11.c
new file mode 100644
index 00000000000..9cf1ea79d2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-11.c
@@ -0,0 +1,39 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "cmpeqps" 5 } } */
+
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef short __v4hi __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+typedef long long __v1di __attribute__ ((__vector_size__ (8)));
+
+__v2sf
+foo1 (__v2sf x, __v2sf y)
+{
+  return x == y;
+}
+
+__v8qi
+foo2 (__v2sf x, __v2sf y)
+{
+  return x == y;
+}
+
+__v4hi
+foo3 (__v2sf x, __v2sf y)
+{
+  return x == y;
+}
+
+__v2si
+foo4 (__v2sf x, __v2sf y)
+{
+  return x == y;
+}
+
+__v1di
+foo5 (__v2sf x, __v2sf y)
+{
+  return x == y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-12.c b/gcc/testsuite/gcc.target/i386/pr89028-12.c
new file mode 100644
index 00000000000..20f91c4b0cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-12.c
@@ -0,0 +1,39 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "cmpleps" 5 } } */
+
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef short __v4hi __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+typedef long long __v1di __attribute__ ((__vector_size__ (8)));
+
+__v2sf
+foo1 (__v2sf x, __v2sf y)
+{
+  return x >= y;
+}
+
+__v8qi
+foo2 (__v2sf x, __v2sf y)
+{
+  return x >= y;
+}
+
+__v4hi
+foo3 (__v2sf x, __v2sf y)
+{
+  return x >= y;
+}
+
+__v2si
+foo4 (__v2sf x, __v2sf y)
+{
+  return x >= y;
+}
+
+__v1di
+foo5 (__v2sf x, __v2sf y)
+{
+  return x >= y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-13.c b/gcc/testsuite/gcc.target/i386/pr89028-13.c
new file mode 100644
index 00000000000..1bc3a54ab02
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-13.c
@@ -0,0 +1,39 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "cmpltps" 5 } } */
+
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef short __v4hi __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+typedef long long __v1di __attribute__ ((__vector_size__ (8)));
+
+__v2sf
+foo1 (__v2sf x, __v2sf y)
+{
+  return x > y;
+}
+
+__v8qi
+foo2 (__v2sf x, __v2sf y)
+{
+  return x > y;
+}
+
+__v4hi
+foo3 (__v2sf x, __v2sf y)
+{
+  return x > y;
+}
+
+__v2si
+foo4 (__v2sf x, __v2sf y)
+{
+  return x > y;
+}
+
+__v1di
+foo5 (__v2sf x, __v2sf y)
+{
+  return x > y;
+}
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 13/43] i386: Emulate MMX pshufw with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (18 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 16/43] i386: Emulate MMX mmx_pextrw with SSE H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 26/43] i386: Emulate MMX umulv1siv1di3 with SSE2 H.J. Lu
                   ` (23 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX pshufw with SSE.  Only SSE register source operand is allowed.

	PR target/89021
	* config/i386/mmx.md (mmx_pshufw_1): Add SSE emulation.
	(*vec_dupv4hi): Likewise.
	emulation.
---
 gcc/config/i386/mmx.md | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e31c3f5c366..8a5c5fb93b7 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1376,9 +1376,9 @@
 })
 
 (define_insn "mmx_pshufw_1"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yy")
         (vec_select:V4HI
-          (match_operand:V4HI 1 "nonimmediate_operand" "ym")
+          (match_operand:V4HI 1 "nonimmediate_operand" "ym,Yy")
           (parallel [(match_operand 2 "const_0_to_3_operand")
                      (match_operand 3 "const_0_to_3_operand")
                      (match_operand 4 "const_0_to_3_operand")
@@ -1392,11 +1392,15 @@
   mask |= INTVAL (operands[5]) << 6;
   operands[2] = GEN_INT (mask);
 
-  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+  if (TARGET_MMX_WITH_SSE)
+    return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+  else
+    return "pshufw\t{%2, %1, %0|%0, %1, %2}";
 }
-  [(set_attr "type" "mmxcvt")
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog")
    (set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_insn "mmx_pswapdv2si2"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -1410,15 +1414,18 @@
    (set_attr "mode" "DI")])
 
 (define_insn "*vec_dupv4hi"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yy")
 	(vec_duplicate:V4HI
 	  (truncate:HI
-	    (match_operand:SI 1 "register_operand" "0"))))]
+	    (match_operand:SI 1 "register_operand" "0,Yy"))))]
   "TARGET_SSE || TARGET_3DNOW_A"
-  "pshufw\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "type" "mmxcvt")
+  "@
+   pshufw\t{$0, %0, %0|%0, %0, 0}
+   %vpshuflw\t{$0, %1, %0|%0, %1, 0}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog1")
    (set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_insn_and_split "*vec_dupv2si"
   [(set (match_operand:V2SI 0 "register_operand" "=y,Yx,Yy")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 31/43] i386: Emulate MMX pshufb with SSE version
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (30 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 36/43] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE H.J. Lu
                   ` (11 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX version of pshufb with SSE version by masking out the bit 3
of the shuffle control byte.  Only SSE register source operand is allowed.

	PR target/89021
	* config/i386/sse.md (ssse3_pshufbv8qi3): Renamed to ...
	(ssse3_pshufbv8qi3_mmx): This.
	(ssse3_pshufbv8qi3): New.
	(ssse3_pshufbv8qi3_sse): Likewise.
---
 gcc/config/i386/sse.md | 63 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 61 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5f00179aa95..a32d67f811a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15817,18 +15817,77 @@
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "ssse3_pshufbv8qi3"
+(define_expand "ssse3_pshufbv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand")
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand")
+		      (match_operand:V8QI 2 "nonimmediate_operand")]
+		     UNSPEC_PSHUFB))]
+  "TARGET_SSSE3"
+{
+  if (TARGET_MMX_WITH_SSE)
+    {
+      /* Emulate MMX version of pshufb with SSE version by masking
+	 out the bit 3 of the shuffle control byte.  */
+      rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
+			     GEN_INT (0xf7f7f7f7),
+			     GEN_INT (0xf7f7f7f7),
+			     GEN_INT (0xf7f7f7f7));
+      rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
+      vec_const = force_const_mem (V4SImode, vec_const);
+      rtx op3 = gen_reg_rtx (V4SImode);
+      rtx op4 = gen_reg_rtx (V4SImode);
+      rtx insn = gen_rtx_SET (op4, vec_const);
+      emit_insn (insn);
+      rtx op2 = force_reg (V8QImode, operands[2]);
+      insn = gen_ssse3_pshufbv8qi3_sse (operands[0], operands[1],
+					op2, op3, op4);
+      emit_insn (insn);
+      DONE;
+    }
+})
+
+(define_insn "ssse3_pshufbv8qi3_mmx"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
 	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
 		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
 		     UNSPEC_PSHUFB))]
-  "TARGET_SSSE3"
+  "TARGET_SSSE3 && !TARGET_MMX_WITH_SSE"
   "pshufb\t{%2, %0|%0, %2}";
   [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
    (set_attr "mode" "DI")])
 
+(define_insn_and_split "ssse3_pshufbv8qi3_sse"
+  [(set (match_operand:V8QI 0 "register_operand" "=Yx,Yy")
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,Yy")
+		      (match_operand:V8QI 2 "register_operand" "Yx,Yy")]
+		     UNSPEC_PSHUFB))
+   (set (match_operand:V4SI 3 "register_operand" "=Yx,Yy")
+	(unspec:V4SI [(match_operand:V4SI 4 "register_operand" "3,3")]
+		     UNSPEC_PSHUFB))]
+  "TARGET_SSSE3 && TARGET_MMX_WITH_SSE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  /* Mask out the bit 3 of the shuffle control byte.  */
+  rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+  rtx op3 = operands[3];
+  rtx insn = gen_andv4si3 (op3, op3, op2);
+  emit_insn (insn);
+  /* Generate SSE version of pshufb.  */
+  rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]));
+  rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]));
+  op3 = gen_rtx_REG (V16QImode, REGNO (op3));
+  insn = gen_ssse3_pshufbv16qi3 (op0, op1, op3);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "x64_noavx,x64_avx")
+   (set_attr "type" "sselog1")
+   (set_attr "mode" "TI,TI")])
+
 (define_insn "<ssse3_avx2>_psign<mode>3"
   [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
 	(unspec:VI124_AVX2
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 23/43] i386: Emulate MMX mmx_uavgv4hi3 with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (13 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 43/43] i386: Implement V2SF comparisons " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 29/43] i386: Emulate MMX ssse3_pmaddubsw " H.J. Lu
                   ` (28 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX mmx_uavgv4hi3 with SSE.  Only SSE register source operand is
allowed.

	PR target/89021
	* config/i386/mmx.md (*mmx_uavgv4hi3): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 17776c66d90..e345b2b8875 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1732,23 +1732,27 @@
   "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
 
 (define_insn "*mmx_uavgv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
 	(truncate:V4HI
 	  (lshiftrt:V4SI
 	    (plus:V4SI
 	      (plus:V4SI
 		(zero_extend:V4SI
-		  (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+		  (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy"))
 		(zero_extend:V4SI
-		  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+		  (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))
 	      (const_vector:V4SI [(const_int 1) (const_int 1)
 				  (const_int 1) (const_int 1)]))
 	    (const_int 1))))]
   "(TARGET_SSE || TARGET_3DNOW_A)
    && ix86_binary_operator_ok (PLUS, V4HImode, operands)"
-  "pavgw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "@
+   pavgw\t{%2, %0|%0, %2}
+   pavgw\t{%2, %0|%0, %2}
+   vpavgw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_psadbw"
   [(set (match_operand:V1DI 0 "register_operand" "=y")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 40/43] i386: Enable 8-byte vectorizer for TARGET_MMX_WITH_SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (16 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 24/43] i386: Emulate MMX mmx_psadbw " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 16/43] i386: Emulate MMX mmx_pextrw with SSE H.J. Lu
                   ` (25 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

In 64-bit, we support 8-byte vectorizer with SSE.  Also xfail x86-64
targets for gcc.dg/tree-ssa/pr84512.c.

gcc/

	PR target/89028
	* config/i386/i386.c (ix86_autovectorize_vector_sizes): Enable
	8-byte vectorizer for TARGET_MMX_WITH_SSE.

gcc/testsuite/

	PR target/89028
	* gcc.dg/tree-ssa/pr84512.c: Also xfail x86-64 targets.
	* gcc.target/i386/pr89028-1.c: New test.
---
 gcc/config/i386/i386.c                    |  2 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr84512.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr89028-1.c | 10 ++++++++++
 3 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-1.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e45284ce1a2..9cf96471e8f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -50213,6 +50213,8 @@ ix86_autovectorize_vector_sizes (vector_sizes *sizes)
       sizes->safe_push (32);
       sizes->safe_push (16);
     }
+  if (TARGET_MMX_WITH_SSE)
+    sizes->safe_push (8);
 }
 
 /* Implemenation of targetm.vectorize.get_mask_mode.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr84512.c b/gcc/testsuite/gcc.dg/tree-ssa/pr84512.c
index 3975757d844..8f8529ba8cf 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr84512.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr84512.c
@@ -13,4 +13,4 @@ int foo()
 }
 
 /* Listed targets xfailed due to PR84958.  */
-/* { dg-final { scan-tree-dump "return 285;" "optimized" { xfail { { alpha*-*-* amdgcn*-*-* nvptx*-*-* } || { sparc*-*-* && lp64 } } } } } */
+/* { dg-final { scan-tree-dump "return 285;" "optimized" { xfail { { { alpha*-*-* amdgcn*-*-* nvptx*-*-* } || { sparc*-*-* && lp64 } } || { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-1.c b/gcc/testsuite/gcc.target/i386/pr89028-1.c
new file mode 100644
index 00000000000..d2ebb7f844d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx2 -O3" } */
+/* { dg-final { scan-assembler "vpaddb\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+void
+foo (char* restrict r, char* restrict a)
+{
+  for (int i = 0; i < 8; i++)
+    r[i] += a[i];
+}
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 19/43] i386: Emulate MMX mmx_pmovmskb with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (41 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 37/43] i386: Allow MMX intrinsic emulation " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:53 ` [PATCH 00/43] V2: Emulate MMX intrinsics " Uros Bizjak
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX mmx_pmovmskb with SSE by zero-extending result of SSE pmovmskb
from QImode to SImode.  Only SSE register source operand is allowed.

	PR target/89021
	* config/i386/mmx.md (mmx_pmovmskb): Changed to
	define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/mmx.md | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ee5acb00a6d..7759e3e1082 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1753,14 +1753,31 @@
   [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_pmovmskb"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+(define_insn_and_split "mmx_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:V8QI 1 "register_operand" "y,Yx")]
 		   UNSPEC_MOVMSK))]
   "TARGET_SSE || TARGET_3DNOW_A"
-  "pmovmskb\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "@
+   pmovmskb\t{%1, %0|%0, %1}
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+{
+  /* Generate SSE pmovmskb.  */
+  rtx op0 = operands[0];
+  rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]));
+  rtx insn = gen_sse2_pmovmskb (op0, op1);
+  emit_insn (insn);
+  /* Zero-extend from QImode to SImode.  */
+  op1 = gen_rtx_REG (QImode, REGNO (operands[0]));
+  insn = gen_zero_extendqisi2 (op0, op1);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,ssemov")
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_maskmovq"
   [(set (match_operand:V8QI 0 "memory_operand")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 30/43] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (21 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 22/43] i386: Emulate MMX mmx_uavgv8qi3 with SSE H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 25/43] i386: Emulate MMX movntq with SSE2 movntidi H.J. Lu
                   ` (20 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX ssse3_pmulhrswv4hi3 with SSE.  Only SSE register source
operand is allowed.

	PR target/89021
	* config/i386/sse.md (*ssse3_pmulhrswv4hi3): Add SSE emulation.
---
 gcc/config/i386/sse.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b0ded2008f1..5f00179aa95 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15774,25 +15774,29 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "*ssse3_pmulhrswv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
 	(truncate:V4HI
 	  (lshiftrt:V4SI
 	    (plus:V4SI
 	      (lshiftrt:V4SI
 		(mult:V4SI
 		  (sign_extend:V4SI
-		    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+		    (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy"))
 		  (sign_extend:V4SI
-		    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+		    (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))
 		(const_int 14))
 	      (match_operand:V4HI 3 "const1_operand"))
 	    (const_int 1))))]
   "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "pmulhrsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
+  "@
+   pmulhrsw\t{%2, %0|%0, %2}
+   pmulhrsw\t{%2, %0|%0, %2}
+   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseimul")
    (set_attr "prefix_extra" "1")
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 16/43] i386: Emulate MMX mmx_pextrw with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (17 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 40/43] i386: Enable 8-byte vectorizer for TARGET_MMX_WITH_SSE H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 13/43] i386: Emulate MMX pshufw " H.J. Lu
                   ` (24 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX mmx_pextrw with SSE.  Only SSE register source operand is
allowed.

	PR target/89021
	* config/i386/mmx.md (mmx_pextrw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 8a5c5fb93b7..fced8fd4a10 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1349,16 +1349,17 @@
    (set_attr "mode" "DI")])
 
 (define_insn "mmx_pextrw"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
         (zero_extend:SI
 	  (vec_select:HI
-	    (match_operand:V4HI 1 "register_operand" "y")
-	    (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
+	    (match_operand:V4HI 1 "register_operand" "y,Yy")
+	    (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]))))]
   "TARGET_SSE || TARGET_3DNOW_A"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "mmxcvt")
+  "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog1")
    (set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_pshufw"
   [(match_operand:V4HI 0 "register_operand")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 36/43] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (31 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 31/43] i386: Emulate MMX pshufb with SSE version H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 10/43] i386: Emulate MMX mmx_andnot<mode>3 with SSE H.J. Lu
                   ` (10 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

	PR target/89021
	* config/i386/i386.c (ix86_expand_vector_init_duplicate): Set
	mmx_ok to true if TARGET_MMX_WITH_SSE is true.
	(ix86_expand_vector_init_one_nonzero): Likewise.
	(ix86_expand_vector_init_one_var): Likewise.
	(ix86_expand_vector_init_general): Likewise.
	(ix86_expand_vector_init): Likewise.
	(ix86_expand_vector_set): Likewise.
	(ix86_expand_vector_extract): Likewise.
	* config/i386/mmx.md (*vec_dupv2sf): Changed to
	define_insn_and_split to support SSE emulation.
	(vec_setv2sf): Also allow TARGET_MMX_WITH_SSE.
	(vec_extractv2sf_1 splitter): Likewise.
	(vec_extractv2sfsf): Likewise.
	(vec_setv2si): Likewise.
	(vec_extractv2si_1 splitter): Likewise.
	(vec_extractv2sisi): Likewise.
	(vec_setv4hi): Likewise.
	(vec_extractv4hihi): Likewise.
	(vec_setv8qi): Likewise.
	(vec_extractv8qiqi): Likewise.
	(*vec_extractv2sf_0): Don't allow TARGET_MMX_WITH_SSE.
	(*vec_extractv2sf_1): Likewise.
	(*vec_extractv2si_0): Likewise.
	(*vec_extractv2si_1): Likewise.
	(*vec_extractv2sf_0_sse): New.
	(*vec_extractv2sf_1_sse): Likewise.
	(*vec_extractv2si_0_sse): Likewise.
	(*vec_extractv2si_1_sse): Likewise.
---
 gcc/config/i386/i386.c |   8 +++
 gcc/config/i386/mmx.md | 133 +++++++++++++++++++++++++++++++++--------
 2 files changed, 117 insertions(+), 24 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6e67ac346dd..3770bb882d4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -42364,6 +42364,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
 {
   bool ok;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
     {
     case E_V2SImode:
@@ -42523,6 +42524,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
   bool use_vector_set = false;
   rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
     {
     case E_V2DImode:
@@ -42716,6 +42718,7 @@ ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
     {
     case E_V2DFmode:
@@ -43101,6 +43104,7 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
   machine_mode quarter_mode = VOIDmode;
   int n, i;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
     {
     case E_V2SFmode:
@@ -43300,6 +43304,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
   int i;
   rtx x;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
+
   /* Handle first initialization from vector elts.  */
   if (n_elts != XVECLEN (vals, 0))
     {
@@ -43399,6 +43405,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
   machine_mode mmode = VOIDmode;
   rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
     {
     case E_V2SFmode:
@@ -43754,6 +43761,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
   bool use_vec_extr = false;
   rtx tmp;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
     {
     case E_V2SImode:
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 283661f7887..31db0e8b0c7 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -595,14 +595,27 @@
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "V2SF")])
 
-(define_insn "*vec_dupv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=y,Yx,Yy")
 	(vec_duplicate:V2SF
-	  (match_operand:SF 1 "register_operand" "0")))]
-  "TARGET_MMX"
-  "punpckldq\t%0, %0"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+	  (match_operand:SF 1 "register_operand" "0,0,Yy")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t%0, %0
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+{
+  /* Emulate MMX vec_dupv2sf with SSE vec_dupv4sf.  */
+  rtx op0 = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
+  rtx insn = gen_vec_dupv4sf (op0, operands[1]);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,ssemov,ssemov")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "*mmx_concatv2sf"
   [(set (match_operand:V2SF 0 "register_operand"     "=y,y")
@@ -620,7 +633,7 @@
   [(match_operand:V2SF 0 "register_operand")
    (match_operand:SF 1 "register_operand")
    (match_operand 2 "const_int_operand")]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_set (false, operands[0], operands[1],
 			  INTVAL (operands[2]));
@@ -634,7 +647,20 @@
 	(vec_select:SF
 	  (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
 	  (parallel [(const_int 0)])))]
-  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "TARGET_MMX
+   && !TARGET_MMX_WITH_SSE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (SFmode, operands[1]);")
+
+(define_insn_and_split "*vec_extractv2sf_0_sse"
+  [(set (match_operand:SF 0 "nonimmediate_operand"     "=x, m,f,r")
+	(vec_select:SF
+	  (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,m,m")
+	  (parallel [(const_int 0)])))]
+  "TARGET_MMX_WITH_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (match_dup 1))]
@@ -647,7 +673,9 @@
 	(vec_select:SF
 	  (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o")
 	  (parallel [(const_int 1)])))]
-  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "TARGET_MMX
+   && !TARGET_MMX_WITH_SSE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    punpckhdq\t%0, %0
    %vmovshdup\t{%1, %0|%0, %1}
@@ -669,12 +697,33 @@
    (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig,orig")
    (set_attr "mode" "DI,V4SF,V4SF,SF,SF,SF,SF")])
 
+(define_insn "*vec_extractv2sf_1_sse"
+  [(set (match_operand:SF 0 "nonimmediate_operand"     "=x,x,x,f,r")
+	(vec_select:SF
+	  (match_operand:V2SF 1 "nonimmediate_operand" " x,x,o,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_MMX_WITH_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   %vmovshdup\t{%1, %0|%0, %1}
+   shufps\t{$0xe5, %1, %0|%0, %1, 0xe5}
+   #
+   #
+   #"
+  [(set_attr "isa" "sse3,noavx,*,*,*")
+   (set_attr "type" "sse,sseshuf1,ssemov,fmov,imov")
+   (set (attr "length_immediate")
+     (if_then_else (eq_attr "alternative" "1")
+		   (const_string "1")
+		   (const_string "*")))
+   (set_attr "prefix" "maybe_vex,orig,orig,orig,orig")
+   (set_attr "mode" "V4SF,V4SF,SF,SF,SF")])
+
 (define_split
   [(set (match_operand:SF 0 "register_operand")
 	(vec_select:SF
 	  (match_operand:V2SF 1 "memory_operand")
 	  (parallel [(const_int 1)])))]
-  "TARGET_MMX && reload_completed"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
   [(set (match_dup 0) (match_dup 1))]
   "operands[1] = adjust_address (operands[1], SFmode, 4);")
 
@@ -682,7 +731,7 @@
   [(match_operand:SF 0 "register_operand")
    (match_operand:V2SF 1 "register_operand")
    (match_operand 2 "const_int_operand")]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_extract (false, operands[0], operands[1],
 			      INTVAL (operands[2]));
@@ -1523,7 +1572,7 @@
   [(match_operand:V2SI 0 "register_operand")
    (match_operand:SI 1 "register_operand")
    (match_operand 2 "const_int_operand")]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_set (false, operands[0], operands[1],
 			  INTVAL (operands[2]));
@@ -1537,7 +1586,20 @@
 	(vec_select:SI
 	  (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m")
 	  (parallel [(const_int 0)])))]
-  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "TARGET_MMX
+   && !TARGET_MMX_WITH_SSE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (SImode, operands[1]);")
+
+(define_insn_and_split "*vec_extractv2si_0_sse"
+  [(set (match_operand:SI 0 "nonimmediate_operand"     "=x,m,r")
+	(vec_select:SI
+	  (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,m")
+	  (parallel [(const_int 0)])))]
+  "TARGET_MMX_WITH_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (match_dup 1))]
@@ -1550,7 +1612,9 @@
 	(vec_select:SI
 	  (match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o")
 	  (parallel [(const_int 1)])))]
-  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "TARGET_MMX
+   && !TARGET_MMX_WITH_SSE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    punpckhdq\t%0, %0
    %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5}
@@ -1567,22 +1631,43 @@
    (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig")
    (set_attr "mode" "DI,TI,V4SF,SI,SI,SI")])
 
+(define_insn "*vec_extractv2si_1_sse"
+  [(set (match_operand:SI 0 "nonimmediate_operand"     "=x,x,x,r")
+	(vec_select:SI
+	  (match_operand:V2SI 1 "nonimmediate_operand" " x,x,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_MMX_WITH_SSE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5}
+   shufps\t{$0xe5, %1, %0|%0, %1, 0xe5}
+   #
+   #"
+  [(set_attr "isa" "sse2,noavx,*,*")
+   (set_attr "type" "sseshuf1,sseshuf1,ssemov,imov")
+   (set (attr "length_immediate")
+     (if_then_else (eq_attr "alternative" "0,1")
+		   (const_string "1")
+		   (const_string "*")))
+   (set_attr "prefix" "maybe_vex,orig,orig,orig")
+   (set_attr "mode" "TI,V4SF,SI,SI")])
+
 (define_split
   [(set (match_operand:SI 0 "register_operand")
 	(vec_select:SI
 	  (match_operand:V2SI 1 "memory_operand")
 	  (parallel [(const_int 1)])))]
-  "TARGET_MMX && reload_completed"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
   [(set (match_dup 0) (match_dup 1))]
   "operands[1] = adjust_address (operands[1], SImode, 4);")
 
 (define_insn_and_split "*vec_extractv2si_zext_mem"
-  [(set (match_operand:DI 0 "register_operand" "=y,x,r")
+  [(set (match_operand:DI 0 "register_operand" "=x,r")
 	(zero_extend:DI
 	  (vec_select:SI
-	    (match_operand:V2SI 1 "memory_operand" "o,o,o")
+	    (match_operand:V2SI 1 "memory_operand" "o,o")
 	    (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))]
-  "TARGET_64BIT && TARGET_MMX"
+  "TARGET_64BIT"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
@@ -1594,7 +1679,7 @@
   [(match_operand:SI 0 "register_operand")
    (match_operand:V2SI 1 "register_operand")
    (match_operand 2 "const_int_operand")]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_extract (false, operands[0], operands[1],
 			      INTVAL (operands[2]));
@@ -1614,7 +1699,7 @@
   [(match_operand:V4HI 0 "register_operand")
    (match_operand:HI 1 "register_operand")
    (match_operand 2 "const_int_operand")]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_set (false, operands[0], operands[1],
 			  INTVAL (operands[2]));
@@ -1625,7 +1710,7 @@
   [(match_operand:HI 0 "register_operand")
    (match_operand:V4HI 1 "register_operand")
    (match_operand 2 "const_int_operand")]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_extract (false, operands[0], operands[1],
 			      INTVAL (operands[2]));
@@ -1645,7 +1730,7 @@
   [(match_operand:V8QI 0 "register_operand")
    (match_operand:QI 1 "register_operand")
    (match_operand 2 "const_int_operand")]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_set (false, operands[0], operands[1],
 			  INTVAL (operands[2]));
@@ -1656,7 +1741,7 @@
   [(match_operand:QI 0 "register_operand")
    (match_operand:V8QI 1 "register_operand")
    (match_operand 2 "const_int_operand")]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_extract (false, operands[0], operands[1],
 			      INTVAL (operands[2]));
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 05/43] i386: Emulate MMX mulv4hi3 with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (23 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 25/43] i386: Emulate MMX movntq with SSE2 movntidi H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-10 10:17   ` Uros Bizjak
  2019-02-09 13:25 ` [PATCH 20/43] i386: Emulate MMX mmx_umulv4hi3_highpart " H.J. Lu
                   ` (18 subsequent siblings)
  43 siblings, 1 reply; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX mulv4hi3 with SSE.  Only SSE register source operand is
allowed.

	PR target/89021
	* config/i386/mmx.md (mulv4hi3): New.
	(*mmx_mulv4hi3): Also allow TARGET_MMX_WITH_SSE.  Add SSE
	support.
---
 gcc/config/i386/mmx.md | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 01a71aa128b..2712a86ea3c 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -753,14 +753,26 @@
   "TARGET_MMX"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
+(define_expand "mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+        (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand")
+		   (match_operand:V4HI 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
 (define_insn "*mmx_mulv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
-		   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmullw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
+        (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy")
+		   (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmullw\t{%2, %0|%0, %2}
+   pmullw\t{%2, %0|%0, %2}
+   vpmullw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_smulv4hi3_highpart"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 25/43] i386: Emulate MMX movntq with SSE2 movntidi
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (22 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 30/43] i386: Emulate MMX ssse3_pmulhrswv4hi3 " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 05/43] i386: Emulate MMX mulv4hi3 with SSE H.J. Lu
                   ` (19 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX movntq with SSE2 movntidi.  Only SSE register source operand
is allowed.

	PR target/89021
	* config/i386/mmx.md (sse_movntq): Renamed to ...
	(*sse_movntq): This.
	(sse_movntq): New.  Emulate MMX movntq with SSE2 movntidi.
---
 gcc/config/i386/mmx.md | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e5e7c6ec4ce..c52e5b2e393 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -243,7 +243,21 @@
   DONE;
 })
 
-(define_insn "sse_movntq"
+(define_expand "sse_movntq"
+  [(set (match_operand:DI 0 "memory_operand")
+	(unspec:DI [(match_operand:DI 1 "register_operand")]
+		   UNSPEC_MOVNTQ))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  if (TARGET_MMX_WITH_SSE)
+    {
+      rtx insn = gen_sse2_movntidi (operands[0], operands[1]);
+      emit_insn (insn);
+      DONE;
+    }
+})
+
+(define_insn "*sse_movntq"
   [(set (match_operand:DI 0 "memory_operand" "=m")
 	(unspec:DI [(match_operand:DI 1 "register_operand" "y")]
 		   UNSPEC_MOVNTQ))]
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 24/43] i386: Emulate MMX mmx_psadbw with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (15 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 29/43] i386: Emulate MMX ssse3_pmaddubsw " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 40/43] i386: Enable 8-byte vectorizer for TARGET_MMX_WITH_SSE H.J. Lu
                   ` (26 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX mmx_psadbw with SSE.  Only SSE register source operand is
allowed.

	PR target/89021
	* config/i386/mmx.md (mmx_psadbw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e345b2b8875..e5e7c6ec4ce 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1755,14 +1755,18 @@
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_psadbw"
-  [(set (match_operand:V1DI 0 "register_operand" "=y")
-        (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
-		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+  [(set (match_operand:V1DI 0 "register_operand" "=y,Yx,Yy")
+        (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yy")
+		      (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy")]
 		     UNSPEC_PSADBW))]
   "TARGET_SSE || TARGET_3DNOW_A"
-  "psadbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "@
+   psadbw\t{%2, %0|%0, %2}
+   psadbw\t{%2, %0|%0, %2}
+   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "mmx_pmovmskb"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 37/43] i386: Allow MMX intrinsic emulation with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (40 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 18/43] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 14:43   ` Uros Bizjak
  2019-02-09 13:25 ` [PATCH 19/43] i386: Emulate MMX mmx_pmovmskb " H.J. Lu
  2019-02-09 13:53 ` [PATCH 00/43] V2: Emulate MMX intrinsics " Uros Bizjak
  43 siblings, 1 reply; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Allow MMX intrinsic emulation with SSE/SSE2/SSSE3.  For pr82483-1.c and
pr82483-2.c, "-mssse3 -mno-mmx" no longer ICEs in 64-bit mode since MMX
intrinsics can be emulated wit SSE.

gcc/

	PR target/89021
	* config/i386/i386-builtin.def: Enable MMX intrinsics with
	SSE/SSE2/SSSE3.
	* config/i386/i386.c (bdesc_tm): Likewise.
	(ix86_init_mmx_sse_builtins): Likewise.
	(ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
	intrinsics in 64-bit mode without MMX.
	* config/i386/mmintrin.h: Don't require MMX in 64-bit mode.

gcc/testsuite/

	PR target/89021
	* gcc.target/i386/pr82483-1.c: Error only on ia32.
	* gcc.target/i386/pr82483-2.c: Likewise.
---
 gcc/config/i386/i386-builtin.def          | 126 +++++++++++-----------
 gcc/config/i386/i386.c                    |  45 +++++---
 gcc/config/i386/mmintrin.h                |  10 +-
 gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
 5 files changed, 107 insertions(+), 78 deletions(-)

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 88005f4687f..10a9d631f29 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKN
 BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* 3DNow! */
 BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
@@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNO
 BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT)
 
 /* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
 
 /* 3DNow! */
 BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3770bb882d4..e45284ce1a2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -30810,13 +30810,13 @@ static const struct builtin_description bdesc_##kind[] =		    \
    we're lazy.  Add casts to make them fit.  */
 static const struct builtin_description bdesc_tm[] =
 {
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
 
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
@@ -30834,7 +30834,7 @@ static const struct builtin_description bdesc_tm[] =
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
 
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
 };
@@ -31509,14 +31509,17 @@ ix86_init_mmx_sse_builtins (void)
 	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
 
   /* MMX access to the vec_init patterns.  */
-  def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v2si",
+  def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
+		     "__builtin_ia32_vec_init_v2si",
 		     V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
 
-  def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v4hi",
+  def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
+		     "__builtin_ia32_vec_init_v4hi",
 		     V4HI_FTYPE_HI_HI_HI_HI,
 		     IX86_BUILTIN_VEC_INIT_V4HI);
 
-  def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v8qi",
+  def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
+		     "__builtin_ia32_vec_init_v8qi",
 		     V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
 		     IX86_BUILTIN_VEC_INIT_V8QI);
 
@@ -31538,7 +31541,8 @@ ix86_init_mmx_sse_builtins (void)
 		     "__builtin_ia32_vec_ext_v4hi",
 		     HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
 
-  def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_ext_v2si",
+  def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
+		     "__builtin_ia32_vec_ext_v2si",
 		     SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
 
   def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v16qi",
@@ -36671,6 +36675,23 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
        == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
       && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
     isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
+  /* Use SSE/SSE2/SSSE3 to emulate MMX intrinsics in 64-bit mode when
+     MMX is disabled.  */
+  if (TARGET_MMX_WITH_SSE)
+    {
+      if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
+	   == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
+	  && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX)) != 0)
+	isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX);
+      if (((bisa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
+	   == (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
+	  && (isa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX)) != 0)
+	isa |= (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX);
+      if (((bisa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
+	   == (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
+	  && (isa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX)) != 0)
+	isa |= (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX);
+    }
   if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
     {
       char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index 238b3df3121..7b613658111 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -30,7 +30,7 @@
 #if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
 #pragma GCC push_options
 #ifdef __x86_64__
-#pragma GCC target("sse,mmx")
+#pragma GCC target("sse2")
 #else
 #pragma GCC target("mmx")
 #endif
@@ -315,7 +315,11 @@ _m_paddd (__m64 __m1, __m64 __m2)
 /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
 #ifndef __SSE2__
 #pragma GCC push_options
+#ifdef __x86_64__
+#pragma GCC target("sse2")
+#else
 #pragma GCC target("sse2,mmx")
+#endif
 #define __DISABLE_SSE2__
 #endif /* __SSE2__ */
 
@@ -427,7 +431,11 @@ _m_psubd (__m64 __m1, __m64 __m2)
 /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
 #ifndef __SSE2__
 #pragma GCC push_options
+#ifdef __x86_64__
+#pragma GCC target("sse2")
+#else
 #pragma GCC target("sse2,mmx")
+#endif
 #define __DISABLE_SSE2__
 #endif /* __SSE2__ */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr82483-1.c b/gcc/testsuite/gcc.target/i386/pr82483-1.c
index 59a59dc8dfe..b2028d8dc5e 100644
--- a/gcc/testsuite/gcc.target/i386/pr82483-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr82483-1.c
@@ -1,7 +1,7 @@
 /* PR target/82483 */
 /* { dg-do compile } */
 /* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
-/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
+/* { dg-error "needs isa option" "" { target ia32 } 0 } */
 
 #include <x86intrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/pr82483-2.c b/gcc/testsuite/gcc.target/i386/pr82483-2.c
index 305ddbd6c64..c92de405cb3 100644
--- a/gcc/testsuite/gcc.target/i386/pr82483-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr82483-2.c
@@ -1,7 +1,7 @@
 /* PR target/82483 */
 /* { dg-do compile } */
 /* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
-/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
+/* { dg-error "needs isa option" "" { target ia32 } 0 } */
 
 #include <x86intrin.h>
 
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 42/43] i386: Implement V2SF <-> V2SI conversions with SEE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (35 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 39/43] i386: Also enable SSSE3 __m64 tests in 64-bit mode H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 21/43] i386: Emulate MMX maskmovq with SSE2 maskmovdqu H.J. Lu
                   ` (6 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

In 64-bit mode, implement V2SF <-> V2SI conversions with SEE.  Only SSE
register source operand is allowed.

gcc/

	PR target/89028
	* config/i386/sse.md (floatv2siv2sf2): New.
	(fix_truncv2sfv2si2): Likewise.

gcc/testsuite/

	PR target/89028
	* gcc.target/i386/pr89028-8.c: New test.
	* gcc.target/i386/pr89028-9.c: Likewise.
---
 gcc/config/i386/sse.md                    | 31 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89028-8.c | 12 +++++++++
 gcc/testsuite/gcc.target/i386/pr89028-9.c | 12 +++++++++
 3 files changed, 55 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-9.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e8e25759c57..fe63239f53f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4897,6 +4897,17 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "floatv2siv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=Yx,Yy")
+	(float:V2SF
+	  (match_operand:V2SI 1 "register_operand" "Yx,Yy")))]
+  "TARGET_MMX_WITH_SSE"
+  "%vcvtdq2ps\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V4SF")])
+
 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
   [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
 	(unsigned_float:VF1_AVX512VL
@@ -5056,6 +5067,26 @@
    (set_attr "prefix" "<mask_prefix2>")
    (set_attr "mode" "TI")])
 
+(define_insn "fix_truncv2sfv2si2"
+  [(set (match_operand:V2SI 0 "register_operand" "=Yy")
+	(fix:V2SI (match_operand:V2SF 1 "register_operand" "Yy")))]
+  "TARGET_MMX_WITH_SSE"
+  "%vcvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set (attr "prefix_rep")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "1")))
+   (set (attr "prefix_data16")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "0")))
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "mode" "TI")])
+
 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
   [(match_operand:<sseintvecmode> 0 "register_operand")
    (match_operand:VF1 1 "register_operand")]
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-8.c b/gcc/testsuite/gcc.target/i386/pr89028-8.c
new file mode 100644
index 00000000000..35cdf1ed332
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "cvttps2dq" 1 } } */
+
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+__v2si
+foo1 ( __v2sf x)
+{
+  return __builtin_convertvector (x, __v2si);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-9.c b/gcc/testsuite/gcc.target/i386/pr89028-9.c
new file mode 100644
index 00000000000..17242c0402d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-9.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "cvtdq2ps" 1 } } */
+
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+__v2sf
+foo1 ( __v2si x)
+{
+  return __builtin_convertvector (x, __v2sf);
+}
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 27/43] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (27 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 33/43] i386: Emulate MMX ssse3_palignrdi " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 32/43] i386: Emulate MMX ssse3_psign<mode>3 " H.J. Lu
                   ` (14 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 with SSE by moving bits
64:95 to bits 32:63 in SSE register.  Only SSE register source operand
is allowed.

	PR target/89021
	* config/i386/sse.md (ssse3_ph<plusminus_mnemonic>wv4hi3):
	Changed to define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 4503d393dc9..625e1c4cfd9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15358,13 +15358,13 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
 	(vec_concat:V4HI
 	  (vec_concat:V2HI
 	    (ssse3_plusminus:HI
 	      (vec_select:HI
-		(match_operand:V4HI 1 "register_operand" "0")
+		(match_operand:V4HI 1 "register_operand" "0,0,Yy")
 		(parallel [(const_int 0)]))
 	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
 	    (ssse3_plusminus:HI
@@ -15373,19 +15373,35 @@
 	  (vec_concat:V2HI
 	    (ssse3_plusminus:HI
 	      (vec_select:HI
-		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")
 		(parallel [(const_int 0)]))
 	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
 	    (ssse3_plusminus:HI
 	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
 	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
   "TARGET_SSSE3"
-  "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "@
+   ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+{
+  /* Generate SSE version of the operation.  */
+  rtx op0 = gen_rtx_REG (V8HImode, REGNO (operands[0]));
+  rtx op1 = gen_rtx_REG (V8HImode, REGNO (operands[1]));
+  rtx op2 = gen_rtx_REG (V8HImode, REGNO (operands[2]));
+  rtx insn = gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2);
+  emit_insn (insn);
+  ix86_move_vector_high_sse_to_mmx (op0);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
    (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
   [(set (match_operand:V8SI 0 "register_operand" "=x")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 35/43] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (37 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 21/43] i386: Emulate MMX maskmovq with SSE2 maskmovdqu H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 28/43] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE H.J. Lu
                   ` (4 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

	PR target/89021
	* config/i386/mmx.md (MMXMODE:mov<mode>): Also allow
	TARGET_MMX_WITH_SSE.
	(MMXMODE:*mov<mode>_internal): Likewise.
	(MMXMODE:movmisalign<mode>): Likewise.
---
 gcc/config/i386/mmx.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index f5e96ebe3f3..283661f7887 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -75,7 +75,7 @@
 (define_expand "mov<mode>"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
 	(match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (<MODE>mode, operands);
   DONE;
@@ -86,7 +86,7 @@
     "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r  ,?!y,v,v,v,m,r,v,!y,*x")
 	(match_operand:MMXMODE 1 "nonimm_or_0_operand"
     "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!y,r  ,C,v,m,v,v,r,*x,!y"))]
-  "TARGET_MMX
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -237,7 +237,7 @@
 (define_expand "movmisalign<mode>"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
 	(match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (<MODE>mode, operands);
   DONE;
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 26/43] i386: Emulate MMX umulv1siv1di3 with SSE2
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (19 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 13/43] i386: Emulate MMX pshufw " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 22/43] i386: Emulate MMX mmx_uavgv8qi3 with SSE H.J. Lu
                   ` (22 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX umulv1siv1di3 with SSE2.  Only SSE register source operand
is allowed.

	PR target/89021
	* config/i386/mmx.md (*sse2_umulv1siv1di3): Add SSE2 emulation.
---
 gcc/config/i386/mmx.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c52e5b2e393..f5e96ebe3f3 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -955,20 +955,24 @@
   "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
 
 (define_insn "*sse2_umulv1siv1di3"
-  [(set (match_operand:V1DI 0 "register_operand" "=y")
+  [(set (match_operand:V1DI 0 "register_operand" "=y,Yx,Yy")
         (mult:V1DI
 	  (zero_extend:V1DI
 	    (vec_select:V1SI
-	      (match_operand:V2SI 1 "nonimmediate_operand" "%0")
+	      (match_operand:V2SI 1 "nonimmediate_operand" "%0,0,Yy")
 	      (parallel [(const_int 0)])))
 	  (zero_extend:V1DI
 	    (vec_select:V1SI
-	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+	      (match_operand:V2SI 2 "nonimmediate_operand" "ym,Yx,Yy")
 	      (parallel [(const_int 0)])))))]
   "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
-  "pmuludq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "@
+   pmuludq\t{%2, %0|%0, %2}
+   pmuludq\t{%2, %0|%0, %2}
+   vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_<code>v4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 28/43] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (38 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 35/43] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 18/43] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin " H.J. Lu
                   ` (3 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE by moving bits
64:95 to bits 32:63 in SSE register.  Only SSE register source operand
is allowed.

	PR target/89021
	* config/i386/sse.md (ssse3_ph<plusminus_mnemonic>dv2si3):
	Changed to define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 625e1c4cfd9..029f33a7000 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15480,26 +15480,42 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yx,Yy")
 	(vec_concat:V2SI
 	  (plusminus:SI
 	    (vec_select:SI
-	      (match_operand:V2SI 1 "register_operand" "0")
+	      (match_operand:V2SI 1 "register_operand" "0,0,Yy")
 	      (parallel [(const_int 0)]))
 	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
 	  (plusminus:SI
 	    (vec_select:SI
-	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+	      (match_operand:V2SI 2 "nonimmediate_operand" "ym,Yx,Yy")
 	      (parallel [(const_int 0)]))
 	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
   "TARGET_SSSE3"
-  "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "@
+   ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+{
+  /* Generate SSE version of the operation.  */
+  rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+  rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
+  rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+  rtx insn = gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2);
+  emit_insn (insn);
+  ix86_move_vector_high_sse_to_mmx (op0);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
    (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_pmaddubsw256"
   [(set (match_operand:V16HI 0 "register_operand" "=x,v")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 18/43] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (39 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 28/43] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 37/43] i386: Allow MMX intrinsic emulation " H.J. Lu
                   ` (2 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE.  Only SSE register source
operand is allowed.

	PR target/89021
	* config/i386/mmx.md (smaxmin:<code>v4hi3): New.
	(umaxmin:<code>v8qi3): Likewise.
	(smaxmin:*mmx_<code>v4hi3): Add SSE emulation.
	(umaxmin:*mmx_<code>v8qi3): Likewise.
---
 gcc/config/i386/mmx.md | 48 +++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ad33e587352..ee5acb00a6d 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -960,16 +960,28 @@
   "TARGET_SSE || TARGET_3DNOW_A"
   "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
 
+(define_expand "<code>v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+        (smaxmin:V4HI
+	  (match_operand:V4HI 1 "nonimmediate_operand")
+	  (match_operand:V4HI 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
+
 (define_insn "*mmx_<code>v4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
         (smaxmin:V4HI
-	  (match_operand:V4HI 1 "nonimmediate_operand" "%0")
-	  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+	  (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy")
+	  (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
   "(TARGET_SSE || TARGET_3DNOW_A)
    && ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
-  "p<maxmin_int>w\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   p<maxmin_int>w\t{%2, %0|%0, %2}
+   p<maxmin_int>w\t{%2, %0|%0, %2}
+   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_<code>v8qi3"
   [(set (match_operand:V8QI 0 "register_operand")
@@ -979,16 +991,28 @@
   "TARGET_SSE || TARGET_3DNOW_A"
   "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
 
+(define_expand "<code>v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand")
+        (umaxmin:V8QI
+	  (match_operand:V8QI 1 "nonimmediate_operand")
+	  (match_operand:V8QI 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
+
 (define_insn "*mmx_<code>v8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
         (umaxmin:V8QI
-	  (match_operand:V8QI 1 "nonimmediate_operand" "%0")
-	  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+	  (match_operand:V8QI 1 "nonimmediate_operand" "%0,0,Yy")
+	  (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
   "(TARGET_SSE || TARGET_3DNOW_A)
    && ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
-  "p<maxmin_int>b\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   p<maxmin_int>b\t{%2, %0|%0, %2}
+   p<maxmin_int>b\t{%2, %0|%0, %2}
+   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_ashr<mode>3"
   [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 10/43] i386: Emulate MMX mmx_andnot<mode>3 with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (32 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 36/43] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-10 10:28   ` Uros Bizjak
  2019-02-09 13:25 ` [PATCH 41/43] i386: Implement V2SF add/sub/mul with SEE H.J. Lu
                   ` (9 subsequent siblings)
  43 siblings, 1 reply; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX mmx_andnot<mode>3 with SSE.  Only SSE register source operand
is allowed.

	PR target/89021
	* config/i386/mmx.md (mmx_andnot<mode>3): Also allow
	TARGET_MMX_WITH_SSE.  Add SSE support.
---
 gcc/config/i386/mmx.md | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index fae2e43af24..1e235bfcde4 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1093,14 +1093,18 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (define_insn "mmx_andnot<mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yx,Yy")
 	(and:MMXMODEI
-	  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
-	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+	  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yy"))
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pandn\t{%2, %0|%0, %2}
+   pandn\t{%2, %0|%0, %2}
+   vpandn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_<code><mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 33/43] i386: Emulate MMX ssse3_palignrdi with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (26 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 38/43] i386: Add tests for MMX intrinsic emulations " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 27/43] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 " H.J. Lu
                   ` (15 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX version of palignrq with SSE version by concatenating 2
64-bit MMX operands into a single 128-bit SSE operand, followed by
SSE psrldq.  Only SSE register source operand is allowed.

	PR target/89021
	* config/i386/sse.md (ssse3_palignrdi): Changed to
	define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 53 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 44 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 47a97540d82..92c12319d16 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15975,23 +15975,58 @@
    (set_attr "prefix" "orig,vex,evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "ssse3_palignrdi"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
-		    (match_operand:DI 2 "nonimmediate_operand" "ym")
-		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+(define_insn_and_split "ssse3_palignrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y,Yx,Yy")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yy")
+		    (match_operand:DI 2 "nonimmediate_operand" "ym,Yx,Yy")
+		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
 		   UNSPEC_PALIGNR))]
   "TARGET_SSSE3"
 {
-  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
-  return "palignr\t{%3, %2, %0|%0, %2, %3}";
+  if (TARGET_MMX_WITH_SSE)
+    return "#";
+  else
+    {
+      operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+      return "palignr\t{%3, %2, %0|%0, %2, %3}";
+    }
 }
-  [(set_attr "type" "sseishft")
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+{
+  /* Emulate MMX palignrdi with SSE psrldq.  */
+  rtx op0 = gen_rtx_REG (V2DImode, REGNO (operands[0]));
+  rtx insn;
+  if (TARGET_AVX)
+    insn = gen_vec_concatv2di (op0, operands[2], operands[1]);
+  else
+    {
+      /* NB: SSE can only concatenate OP0 and OP1 to OP0.  */
+      insn = gen_vec_concatv2di (op0, operands[1], operands[2]);
+      emit_insn (insn);
+      /* Swap bits 0:63 with bits 64:127.  */
+      rtx mask = gen_rtx_PARALLEL (VOIDmode,
+				   gen_rtvec (4, GEN_INT (2),
+					      GEN_INT (3),
+					      GEN_INT (0),
+					      GEN_INT (1)));
+      rtx op1 = gen_rtx_REG (V4SImode, REGNO (op0));
+      rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
+      insn = gen_rtx_SET (op1, op2);
+    }
+  emit_insn (insn);
+  op0 = gen_rtx_REG (V1TImode, REGNO (op0));
+  insn = gen_sse2_lshrv1ti3 (op0, op0, operands[3]);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseishft")
    (set_attr "atom_unit" "sishuf")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
 ;; modes for abs instruction on pre AVX-512 targets.
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 41/43] i386: Implement V2SF add/sub/mul with SEE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (33 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 10/43] i386: Emulate MMX mmx_andnot<mode>3 with SSE H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 39/43] i386: Also enable SSSE3 __m64 tests in 64-bit mode H.J. Lu
                   ` (8 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

In 64-bit mode, implement V2SF add/sub/mul with SEE.  Only SSE register
source operand is allowed.

gcc/

	PR target/89028
	* config/i386/i386.md (comm): Handle mult.
	* config/i386/mmx.md (plusminusmult): New.
	(plusminusmult_insn): Likewse.
	(plusminusmult_mnemonic): Likewse.
	(plusminusmult_type): Likewse.
	(mmx_addv2sf3): Add "&& !TARGET_MMX_WITH_SSE".
	(*mmx_addv2sf3): Likewise.
	(mmx_subv2sf3): Likewise.
	(mmx_subrv2sf3): Likewise.
	(*mmx_subv2sf3): Likewise.
	(mmx_mulv2sf3): Likewise.
	(*mmx_mulv2sf3): Likewise.
	(<plusminusmult_insn>v2sf3): New.
	(*sse_<plusminusmult_insn>v2sf3): Likewise.

gcc/testsuite/

	PR target/89028
	* gcc.target/i386/pr89028-2.c: New test.
	* gcc.target/i386/pr89028-3.c: Likewise.
	* gcc.target/i386/pr89028-4.c: Likewise.
	* gcc.target/i386/pr89028-5.c: Likewise.
	* gcc.target/i386/pr89028-6.c: Likewise.
	* gcc.target/i386/pr89028-7.c: Likewise.
---
 gcc/config/i386/i386.md                   |  3 +-
 gcc/config/i386/mmx.md                    | 56 ++++++++++++++++++++---
 gcc/testsuite/gcc.target/i386/pr89028-2.c | 11 +++++
 gcc/testsuite/gcc.target/i386/pr89028-3.c | 14 ++++++
 gcc/testsuite/gcc.target/i386/pr89028-4.c | 14 ++++++
 gcc/testsuite/gcc.target/i386/pr89028-5.c | 11 +++++
 gcc/testsuite/gcc.target/i386/pr89028-6.c | 14 ++++++
 gcc/testsuite/gcc.target/i386/pr89028-7.c | 14 ++++++
 8 files changed, 129 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-7.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 72685107fc0..cda973c0fbf 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -873,7 +873,8 @@
 
 ;; Mark commutative operators as such in constraints.
 (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
-			(minus "") (ss_minus "") (us_minus "")])
+			(minus "") (ss_minus "") (us_minus "")
+			(mult "%")])
 
 ;; Mapping of max and min
 (define_code_iterator maxmin [smax smin umax umin])
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 31db0e8b0c7..b2af9ace2c3 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -63,6 +63,20 @@
 ;; Instruction suffix for truncations with saturation.
 (define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
 
+(define_code_iterator plusminusmult [plus minus mult])
+
+;; Base name for define_insn
+(define_code_attr plusminusmult_insn
+  [(plus "add") (minus "sub") (mult "mul")])
+
+;; Base name for insn mnemonic.
+(define_code_attr plusminusmult_mnemonic
+  [(plus "add") (minus "sub") (mult "mul")])
+
+;; Insn type name for insn mnemonic.
+(define_code_attr plusminusmult_type
+  [(plus "add") (minus "add") (mult "mul")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Move patterns
@@ -277,14 +291,16 @@
 	(plus:V2SF
 	  (match_operand:V2SF 1 "nonimmediate_operand")
 	  (match_operand:V2SF 2 "nonimmediate_operand")))]
-  "TARGET_3DNOW"
+  "TARGET_3DNOW && !TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
 
 (define_insn "*mmx_addv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y")
 	(plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
 		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
+  "TARGET_3DNOW
+   && !TARGET_MMX_WITH_SSE
+   && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
   "pfadd\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxadd")
    (set_attr "prefix_extra" "1")
@@ -294,19 +310,21 @@
   [(set (match_operand:V2SF 0 "register_operand")
         (minus:V2SF (match_operand:V2SF 1 "register_operand")
 		    (match_operand:V2SF 2 "nonimmediate_operand")))]
-  "TARGET_3DNOW")
+  "TARGET_3DNOW && !TARGET_MMX_WITH_SSE")
 
 (define_expand "mmx_subrv2sf3"
   [(set (match_operand:V2SF 0 "register_operand")
         (minus:V2SF (match_operand:V2SF 2 "register_operand")
 		    (match_operand:V2SF 1 "nonimmediate_operand")))]
-  "TARGET_3DNOW")
+  "TARGET_3DNOW && !TARGET_MMX_WITH_SSE")
 
 (define_insn "*mmx_subv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y,y")
         (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
 		    (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
-  "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "TARGET_3DNOW
+   && !TARGET_MMX_WITH_SSE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    pfsub\t{%2, %0|%0, %2}
    pfsubr\t{%1, %0|%0, %1}"
@@ -318,19 +336,43 @@
   [(set (match_operand:V2SF 0 "register_operand")
 	(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
 		   (match_operand:V2SF 2 "nonimmediate_operand")))]
-  "TARGET_3DNOW"
+  "TARGET_3DNOW && !TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
 
 (define_insn "*mmx_mulv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y")
 	(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
 		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
+  "TARGET_3DNOW
+   && !TARGET_MMX_WITH_SSE
+   && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
   "pfmul\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxmul")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "V2SF")])
 
+(define_expand "<plusminusmult_insn>v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(plusminusmult:V2SF
+	  (match_operand:V2SF 1 "nonimmediate_operand")
+	  (match_operand:V2SF 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);")
+
+(define_insn "*sse_<plusminusmult_insn>v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=Yx,Yy")
+        (plusminusmult:V2SF
+	  (match_operand:V2SF 1 "nonimmediate_operand" "<comm>0,Yy")
+	  (match_operand:V2SF 2 "nonimmediate_operand" "Yx,Yy")))]
+  "TARGET_MMX_WITH_SSE
+   && ix86_binary_operator_ok (<CODE>, V4SFmode, operands)"
+  "@
+   <plusminusmult_mnemonic>ps\t{%2, %0|%0, %2}
+   v<plusminusmult_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sse<plusminusmult_type>")
+   (set_attr "mode" "V4SF")])
+
 (define_expand "mmx_<code>v2sf3"
   [(set (match_operand:V2SF 0 "register_operand")
         (smaxmin:V2SF
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-2.c b/gcc/testsuite/gcc.target/i386/pr89028-2.c
new file mode 100644
index 00000000000..d096b0b6863
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "addps" 1 } } */
+
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+__v2sf
+foo1 (__v2sf x, __v2sf y)
+{
+  return x + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-3.c b/gcc/testsuite/gcc.target/i386/pr89028-3.c
new file mode 100644
index 00000000000..0fa187aaf72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-3.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "movlps" 2 } } */
+/* { dg-final { scan-assembler-times "addps" 1 } } */
+
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+extern __v2sf x, y, z;
+
+__v2sf
+foo2 (void)
+{
+  return x + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-4.c b/gcc/testsuite/gcc.target/i386/pr89028-4.c
new file mode 100644
index 00000000000..b25f67632cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "movlps" 1 } } */
+/* { dg-final { scan-assembler-times "addps" 1 } } */
+
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+extern __v2sf x, y, z;
+
+void
+foo3 (__v2sf x, __v2sf y)
+{
+  z = x + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-5.c b/gcc/testsuite/gcc.target/i386/pr89028-5.c
new file mode 100644
index 00000000000..4ead7187605
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-5.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "mulps" 1 } } */
+
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+__v2sf
+foo1 (__v2sf x, __v2sf y)
+{
+  return x * y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-6.c b/gcc/testsuite/gcc.target/i386/pr89028-6.c
new file mode 100644
index 00000000000..9277c848c6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-6.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "movlps" 2 } } */
+/* { dg-final { scan-assembler-times "mulps" 1 } } */
+
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+extern __v2sf x, y, z;
+
+__v2sf
+foo2 (void)
+{
+  return x * y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-7.c b/gcc/testsuite/gcc.target/i386/pr89028-7.c
new file mode 100644
index 00000000000..c8af7b2a4e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-7.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "movlps" 1 } } */
+/* { dg-final { scan-assembler-times "mulps" 1 } } */
+
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+extern __v2sf x, y, z;
+
+void
+foo3 (__v2sf x, __v2sf y)
+{
+  z = x * y;
+}
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 34/43] i386: Emulate MMX abs<mode>2 with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (11 preceding siblings ...)
  2019-02-09 13:24 ` [PATCH 12/43] i386: Emulate MMX vec_dupv2si " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 43/43] i386: Implement V2SF comparisons " H.J. Lu
                   ` (30 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX abs<mode>2 with SSE.  Only SSE register source operand is
allowed.

	PR target/89021
	* config/i386/sse.md (abs<mode>2): Add SSE emulation.
---
 gcc/config/i386/sse.md | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 92c12319d16..e8e25759c57 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16090,16 +16090,19 @@
 })
 
 (define_insn "abs<mode>2"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yy")
 	(abs:MMXMODEI
-	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym,Yy")))]
   "TARGET_SSSE3"
-  "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
-  [(set_attr "type" "sselog1")
+  "@
+   pabs<mmxvecsize>\t{%1, %0|%0, %1}
+   %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "sselog1")
    (set_attr "prefix_rep" "0")
    (set_attr "prefix_extra" "1")
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 21/43] i386: Emulate MMX maskmovq with SSE2 maskmovdqu
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (36 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 42/43] i386: Implement V2SF <-> V2SI conversions with SEE H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 35/43] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE H.J. Lu
                   ` (5 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX maskmovq with SSE2 maskmovdqu in 64-bit mode by zero-extending
source and mask operands to 128 bits.  Handle unmapped bits 64:127 at
memory address by adjusting source and mask operands together with memory
address.

	PR target/89021
	* config/i386/xmmintrin.h: Emulate MMX maskmovq with SSE2
	maskmovdqu in 64-bit mode.
---
 gcc/config/i386/xmmintrin.h | 61 +++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 58284378514..e797795f127 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1165,7 +1165,68 @@ _m_pshufw (__m64 __A, int const __N)
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
 {
+#ifdef __x86_64__
+  /* Emulate MMX maskmovq with SSE2 maskmovdqu and handle unmapped bits
+     64:127 at address __P.  */
+  typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+  typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+  /* Zero-extend __A and __N to 128 bits.  */
+  __v2di __A128 = __extension__ (__v2di) { ((__v1di) __A)[0], 0 };
+  __v2di __N128 = __extension__ (__v2di) { ((__v1di) __N)[0], 0 };
+
+  /* Check the alignment of __P.  */
+  __SIZE_TYPE__ offset = ((__SIZE_TYPE__) __P) & 0xf;
+  if (offset)
+    {
+      /* If the misalignment of __P > 8, subtract __P by 8 bytes.
+	 Otherwise, subtract __P by the misalignment.  */
+      if (offset > 8)
+	offset = 8;
+      __P = (char *) (((__SIZE_TYPE__) __P) - offset);
+
+      /* Shift __A128 and __N128 to the left by the adjustment.  */
+      switch (offset)
+	{
+	case 1:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 8);
+	  break;
+	case 2:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 2 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 2 * 8);
+	  break;
+	case 3:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 3 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 3 * 8);
+	  break;
+	case 4:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 4 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 4 * 8);
+	  break;
+	case 5:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 5 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 5 * 8);
+	  break;
+	case 6:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 6 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 6 * 8);
+	  break;
+	case 7:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 7 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 7 * 8);
+	  break;
+	case 8:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 8 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 8 * 8);
+	  break;
+	default:
+	  break;
+	}
+    }
+  __builtin_ia32_maskmovdqu ((__v16qi)__A128, (__v16qi)__N128, __P);
+#else
   __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+#endif
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 32/43] i386: Emulate MMX ssse3_psign<mode>3 with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (28 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 27/43] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers H.J. Lu
                   ` (13 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX ssse3_psign<mode>3 with SSE.  Only SSE register source operand
is allowed.

	PR target/89021
	* config/i386/sse.md (ssse3_psign<mode>3): Add SSE emulation.
---
 gcc/config/i386/sse.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a32d67f811a..47a97540d82 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15906,17 +15906,21 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "ssse3_psign<mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yx,Yy")
 	(unspec:MMXMODEI
-	  [(match_operand:MMXMODEI 1 "register_operand" "0")
-	   (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+	  [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yy")
+	   (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,Yx,Yy")]
 	  UNSPEC_PSIGN))]
   "TARGET_SSSE3"
-  "psign<mmxvecsize>\t{%2, %0|%0, %2}";
-  [(set_attr "type" "sselog1")
+  "@
+   psign<mmxvecsize>\t{%2, %0|%0, %2}
+   psign<mmxvecsize>\t{%2, %0|%0, %2}
+   vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 39/43] i386: Also enable SSSE3 __m64 tests in 64-bit mode
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (34 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 41/43] i386: Implement V2SF add/sub/mul with SEE H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 42/43] i386: Implement V2SF <-> V2SI conversions with SEE H.J. Lu
                   ` (7 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Since we now emulate MMX intrinsics with SSE in 64-bit mode without
3DNOW, we can enable SSSE3 __m64 tests even when AVX is enabled.

	PR target/89021
	* gcc.target/i386/ssse3-pabsb.c: Also enable __m64 check in
	64-bit mode without 3DNOW,
	* gcc.target/i386/ssse3-pabsd.c: Likewise.
	* gcc.target/i386/ssse3-pabsw.c: Likewise.
	* gcc.target/i386/ssse3-palignr.c: Likewise.
	* gcc.target/i386/ssse3-phaddd.c: Likewise.
	* gcc.target/i386/ssse3-phaddsw.c: Likewise.
	* gcc.target/i386/ssse3-phaddw.c: Likewise.
	* gcc.target/i386/ssse3-phsubd.c: Likewise.
	* gcc.target/i386/ssse3-phsubsw.c: Likewise.
	* gcc.target/i386/ssse3-phsubw.c: Likewise.
	* gcc.target/i386/ssse3-pmaddubsw.c: Likewise.
	* gcc.target/i386/ssse3-pmulhrsw.c: Likewise.
	* gcc.target/i386/ssse3-pshufb.c: Likewise.
	* gcc.target/i386/ssse3-psignb.c: Likewise.
	* gcc.target/i386/ssse3-psignd.c: Likewise.
	* gcc.target/i386/ssse3-psignw.c: Likewise.
---
 gcc/testsuite/gcc.target/i386/ssse3-pabsb.c     | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-pabsd.c     | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-pabsw.c     | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-palignr.c   | 6 +++---
 gcc/testsuite/gcc.target/i386/ssse3-phaddd.c    | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c   | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-phaddw.c    | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-phsubd.c    | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c   | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-phsubw.c    | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c  | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-pshufb.c    | 6 +++---
 gcc/testsuite/gcc.target/i386/ssse3-psignb.c    | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-psignd.c    | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-psignw.c    | 4 ++--
 16 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c b/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
index 7caa1b6c3a6..68d81b4a068 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
@@ -15,7 +15,7 @@
 #include "ssse3-vals.h"
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_pabsb (int *i1, int *r)
@@ -63,7 +63,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result(&vals[i + 0], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_pabsb (&vals[i + 0], &r[0]);
       ssse3_test_pabsb (&vals[i + 2], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c b/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c
index 3a73cf01170..9eb1aedc838 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c
@@ -16,7 +16,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_pabsd (int *i1, int *r)
@@ -62,7 +62,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result(&vals[i + 0], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_pabsd (&vals[i + 0], &r[0]);
       ssse3_test_pabsd (&vals[i + 2], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c b/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c
index 67e4721b8e6..36e99a46a1a 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c
@@ -16,7 +16,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_pabsw (int *i1, int *r)
@@ -64,7 +64,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result (&vals[i + 0], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_pabsw (&vals[i + 0], &r[0]);
       ssse3_test_pabsw (&vals[i + 2], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-palignr.c b/gcc/testsuite/gcc.target/i386/ssse3-palignr.c
index dbee9bee4aa..c46e5d40f9a 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-palignr.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-palignr.c
@@ -17,7 +17,7 @@
 #include <tmmintrin.h>
 #include <string.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
@@ -214,7 +214,7 @@ compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
       bout[i] = buf[imm + i];
 }
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 static void
 compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
 {
@@ -256,7 +256,7 @@ TEST (void)
   for (i = 0; i < 256; i += 8)
     for (imm = 0; imm < 100; imm++)
       {
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 	/* Manually compute the result */
 	compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
 
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phaddd.c b/gcc/testsuite/gcc.target/i386/ssse3-phaddd.c
index bef78168659..ff94513a345 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-phaddd.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-phaddd.c
@@ -16,7 +16,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_phaddd (int *i1, int *i2, int *r)
@@ -64,7 +64,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_phaddd (&vals[i + 0], &vals[i + 2], &r[0]);
       ssse3_test_phaddd (&vals[i + 4], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c b/gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c
index ff31fe5a5fe..1d7b6bf0715 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c
@@ -16,7 +16,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_phaddsw (int *i1, int *i2, int *r)
@@ -78,7 +78,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_phaddsw (&vals[i + 0], &vals[i + 2], &r[0]);
       ssse3_test_phaddsw (&vals[i + 4], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phaddw.c b/gcc/testsuite/gcc.target/i386/ssse3-phaddw.c
index 05c0afd4f69..0c7476e75c0 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-phaddw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-phaddw.c
@@ -16,7 +16,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_phaddw (int *i1, int *i2, int *r)
@@ -67,7 +67,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_phaddw (&vals[i + 0], &vals[i + 2], &r[0]);
       ssse3_test_phaddw (&vals[i + 4], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phsubd.c b/gcc/testsuite/gcc.target/i386/ssse3-phsubd.c
index 5884e5c12fe..2a6cb058f85 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-phsubd.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-phsubd.c
@@ -16,7 +16,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_phsubd (int *i1, int *i2, int *r)
@@ -63,7 +63,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_phsubd (&vals[i + 0], &vals[i + 2], &r[0]);
       ssse3_test_phsubd (&vals[i + 4], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c b/gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c
index 371c8d112d1..e97044db270 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c
@@ -16,7 +16,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_phsubsw (int *i1, int *i2, int *r)
@@ -81,7 +81,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_phsubsw (&vals[i + 0], &vals[i + 2], &r[0]);
       ssse3_test_phsubsw (&vals[i + 4], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-phsubw.c b/gcc/testsuite/gcc.target/i386/ssse3-phsubw.c
index f3dbf9c9896..cf6e632964f 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-phsubw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-phsubw.c
@@ -15,7 +15,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_phsubw (int *i1, int *i2, int *r)
@@ -66,7 +66,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_phsubw (&vals[i + 0], &vals[i + 2], &r[0]);
       ssse3_test_phsubw (&vals[i + 4], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c b/gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c
index 00bfc844f42..0efa4c2ccdc 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c
@@ -16,7 +16,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_pmaddubsw (int *i1, int *i2, int *r)
@@ -81,7 +81,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_pmaddubsw (&vals[i + 0], &vals[i + 4], &r[0]);
       ssse3_test_pmaddubsw (&vals[i + 2], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c b/gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c
index 24570b3bd63..b78ced28003 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c
@@ -16,7 +16,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_pmulhrsw (int *i1, int *i2, int *r)
@@ -68,7 +68,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_pmulhrsw (&vals[i + 0], &vals[i + 4], &r[0]);
       ssse3_test_pmulhrsw (&vals[i + 2], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pshufb.c b/gcc/testsuite/gcc.target/i386/ssse3-pshufb.c
index b995456b61c..b995076eff2 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pshufb.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pshufb.c
@@ -16,7 +16,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_pshufb (int *i1, int *i2, int *r)
@@ -38,7 +38,7 @@ ssse3_test_pshufb128 (int *i1, int *i2, int *r)
   *(__m128i *)r = _mm_shuffle_epi8 (t1, t2);
 }
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Routine to manually compute the results */
 static void
 compute_correct_result_64 (int *i1, int *i2, int *r)
@@ -91,7 +91,7 @@ TEST (void)
 
   for (i = 0; i < 256; i += 8)
     {
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Manually compute the result */
       compute_correct_result_64 (&vals[i + 0], &vals[i + 4], ck);
 
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-psignb.c b/gcc/testsuite/gcc.target/i386/ssse3-psignb.c
index 7462929aa20..d60309c2975 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-psignb.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-psignb.c
@@ -15,7 +15,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_psignb (int *i1, int *i2, int *r)
@@ -68,7 +68,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_psignb (&vals[i + 0], &vals[i + 4], &r[0]);
       ssse3_test_psignb (&vals[i + 2], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-psignd.c b/gcc/testsuite/gcc.target/i386/ssse3-psignd.c
index eca0489f8d3..691bc6f9205 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-psignd.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-psignd.c
@@ -15,7 +15,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_psignd (int *i1, int *i2, int *r)
@@ -65,7 +65,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_psignd (&vals[i + 0], &vals[i + 4], &r[0]);
       ssse3_test_psignd (&vals[i + 2], &vals[i + 6], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-psignw.c b/gcc/testsuite/gcc.target/i386/ssse3-psignw.c
index 00a506fd894..dc2cbb40a79 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-psignw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-psignw.c
@@ -15,7 +15,7 @@
 
 #include <tmmintrin.h>
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_psignw (int *i1, int *i2, int *r)
@@ -68,7 +68,7 @@ TEST (void)
       /* Manually compute the result */
       compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
       /* Run the 64-bit tests */
       ssse3_test_psignw (&vals[i + 0], &vals[i + 4], &r[0]);
       ssse3_test_psignw (&vals[i + 2], &vals[i + 6], &r[2]);
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 38/43] i386: Add tests for MMX intrinsic emulations with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (25 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 20/43] i386: Emulate MMX mmx_umulv4hi3_highpart " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 33/43] i386: Emulate MMX ssse3_palignrdi " H.J. Lu
                   ` (16 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Test MMX intrinsics with -msse2 -mno-mmx in 64-bit mode.

	PR target/89021
	* gcc.target/i386/mmx-vals.h: New file.
	* gcc.target/i386/sse2-mmx-2.c: Likewise.
	* gcc.target/i386/sse2-mmx-3.c: Likewise.
	* gcc.target/i386/sse2-mmx-4.c: Likewise.
	* gcc.target/i386/sse2-mmx-5.c: Likewise.
	* gcc.target/i386/sse2-mmx-6.c: Likewise.
	* gcc.target/i386/sse2-mmx-7.c: Likewise.
	* gcc.target/i386/sse2-mmx-8.c: Likewise.
	* gcc.target/i386/sse2-mmx-9.c: Likewise.
	* gcc.target/i386/sse2-mmx-10.c: Likewise.
	* gcc.target/i386/sse2-mmx-11.c: Likewise.
	* gcc.target/i386/sse2-mmx-12.c: Likewise.
	* gcc.target/i386/sse2-mmx-13.c: Likewise.
	* gcc.target/i386/sse2-mmx-14.c: Likewise.
	* gcc.target/i386/sse2-mmx-15.c: Likewise.
	* gcc.target/i386/sse2-mmx-16.c: Likewise.
	* gcc.target/i386/sse2-mmx-17.c: Likewise.
	* gcc.target/i386/sse2-mmx-18.c: Likewise.
	* gcc.target/i386/sse2-mmx-19.c: Likewise.
	* gcc.target/i386/sse2-mmx-20.c: Likewise.
	* gcc.target/i386/sse2-mmx-21.c: Likewise.
	* gcc.target/i386/sse2-mmx-cvtpi2ps.c: Likewise.
	* gcc.target/i386/sse2-mmx-cvtps2pi.c: Likewise.
	* gcc.target/i386/sse2-mmx-cvttps2pi.c: Likewise.
	* gcc.target/i386/sse2-mmx-maskmovq.c: Likewise.
	* gcc.target/i386/sse2-mmx-packssdw.c: Likewise.
	* gcc.target/i386/sse2-mmx-packsswb.c: Likewise.
	* gcc.target/i386/sse2-mmx-packuswb.c: Likewise.
	* gcc.target/i386/sse2-mmx-paddb.c: Likewise.
	* gcc.target/i386/sse2-mmx-paddd.c: Likewise.
	* gcc.target/i386/sse2-mmx-paddq.c: Likewise.
	* gcc.target/i386/sse2-mmx-paddsb.c: Likewise.
	* gcc.target/i386/sse2-mmx-paddsw.c: Likewise.
	* gcc.target/i386/sse2-mmx-paddusb.c: Likewise.
	* gcc.target/i386/sse2-mmx-paddusw.c: Likewise.
	* gcc.target/i386/sse2-mmx-paddw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pand.c: Likewise.
	* gcc.target/i386/sse2-mmx-pandn.c: Likewise.
	* gcc.target/i386/sse2-mmx-pavgb.c: Likewise.
	* gcc.target/i386/sse2-mmx-pavgw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pcmpeqb.c: Likewise.
	* gcc.target/i386/sse2-mmx-pcmpeqd.c: Likewise.
	* gcc.target/i386/sse2-mmx-pcmpeqw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pcmpgtb.c: Likewise.
	* gcc.target/i386/sse2-mmx-pcmpgtd.c: Likewise.
	* gcc.target/i386/sse2-mmx-pcmpgtw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pextrw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pinsrw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pmaddwd.c: Likewise.
	* gcc.target/i386/sse2-mmx-pmaxsw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pmaxub.c: Likewise.
	* gcc.target/i386/sse2-mmx-pminsw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pminub.c: Likewise.
	* gcc.target/i386/sse2-mmx-pmovmskb.c: Likewise.
	* gcc.target/i386/sse2-mmx-pmulhuw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pmulhw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pmullw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pmuludq.c: Likewise.
	* gcc.target/i386/sse2-mmx-por.c: Likewise.
	* gcc.target/i386/sse2-mmx-psadbw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pshufw.c: Likewise.
	* gcc.target/i386/sse2-mmx-pslld.c: Likewise.
	* gcc.target/i386/sse2-mmx-pslldi.c: Likewise.
	* gcc.target/i386/sse2-mmx-psllq.c: Likewise.
	* gcc.target/i386/sse2-mmx-psllqi.c: Likewise.
	* gcc.target/i386/sse2-mmx-psllw.c: Likewise.
	* gcc.target/i386/sse2-mmx-psllwi.c: Likewise.
	* gcc.target/i386/sse2-mmx-psrad.c: Likewise.
	* gcc.target/i386/sse2-mmx-psradi.c: Likewise.
	* gcc.target/i386/sse2-mmx-psraw.c: Likewise.
	* gcc.target/i386/sse2-mmx-psrawi.c: Likewise.
	* gcc.target/i386/sse2-mmx-psrld.c: Likewise.
	* gcc.target/i386/sse2-mmx-psrldi.c: Likewise.
	* gcc.target/i386/sse2-mmx-psrlq.c: Likewise.
	* gcc.target/i386/sse2-mmx-psrlqi.c: Likewise.
	* gcc.target/i386/sse2-mmx-psrlw.c: Likewise.
	* gcc.target/i386/sse2-mmx-psrlwi.c: Likewise.
	* gcc.target/i386/sse2-mmx-psubb.c: Likewise.
	* gcc.target/i386/sse2-mmx-psubd.c: Likewise.
	* gcc.target/i386/sse2-mmx-psubq.c: Likewise.
	* gcc.target/i386/sse2-mmx-psubusb.c: Likewise.
	* gcc.target/i386/sse2-mmx-psubusw.c: Likewise.
	* gcc.target/i386/sse2-mmx-psubw.c: Likewise.
	* gcc.target/i386/sse2-mmx-punpckhbw.c: Likewise.
	* gcc.target/i386/sse2-mmx-punpckhdq.c: Likewise.
	* gcc.target/i386/sse2-mmx-punpckhwd.c: Likewise.
	* gcc.target/i386/sse2-mmx-punpcklbw.c: Likewise.
	* gcc.target/i386/sse2-mmx-punpckldq.c: Likewise.
	* gcc.target/i386/sse2-mmx-punpcklwd.c: Likewise.
	* gcc.target/i386/sse2-mmx-pxor.c: Likewise.
---
 gcc/testsuite/gcc.target/i386/mmx-vals.h      |  77 ++++++
 gcc/testsuite/gcc.target/i386/sse2-mmx-10.c   |  42 +++
 gcc/testsuite/gcc.target/i386/sse2-mmx-11.c   |  39 +++
 gcc/testsuite/gcc.target/i386/sse2-mmx-12.c   |  41 +++
 gcc/testsuite/gcc.target/i386/sse2-mmx-13.c   |  40 +++
 gcc/testsuite/gcc.target/i386/sse2-mmx-14.c   |  30 +++
 gcc/testsuite/gcc.target/i386/sse2-mmx-15.c   |  35 +++
 gcc/testsuite/gcc.target/i386/sse2-mmx-16.c   |  39 +++
 gcc/testsuite/gcc.target/i386/sse2-mmx-17.c   |  50 ++++
 gcc/testsuite/gcc.target/i386/sse2-mmx-18.c   |  13 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-19.c   |  11 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-2.c    |  12 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-20.c   |  11 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-21.c   |  13 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-3.c    |  12 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-4.c    |   4 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-5.c    |  12 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-6.c    |  12 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-7.c    |  12 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-8.c    |   4 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-9.c    |  79 ++++++
 .../gcc.target/i386/sse2-mmx-cvtpi2ps.c       |  42 +++
 .../gcc.target/i386/sse2-mmx-cvtps2pi.c       |  35 +++
 .../gcc.target/i386/sse2-mmx-cvttps2pi.c      |  35 +++
 .../gcc.target/i386/sse2-mmx-maskmovq.c       |  98 +++++++
 .../gcc.target/i386/sse2-mmx-packssdw.c       |  51 ++++
 .../gcc.target/i386/sse2-mmx-packsswb.c       |  51 ++++
 .../gcc.target/i386/sse2-mmx-packuswb.c       |  51 ++++
 .../gcc.target/i386/sse2-mmx-paddb.c          |  47 ++++
 .../gcc.target/i386/sse2-mmx-paddd.c          |  47 ++++
 .../gcc.target/i386/sse2-mmx-paddq.c          |  42 +++
 .../gcc.target/i386/sse2-mmx-paddsb.c         |  47 ++++
 .../gcc.target/i386/sse2-mmx-paddsw.c         |  47 ++++
 .../gcc.target/i386/sse2-mmx-paddusb.c        |  47 ++++
 .../gcc.target/i386/sse2-mmx-paddusw.c        |  47 ++++
 .../gcc.target/i386/sse2-mmx-paddw.c          |  47 ++++
 gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c |  43 +++
 .../gcc.target/i386/sse2-mmx-pandn.c          |  43 +++
 .../gcc.target/i386/sse2-mmx-pavgb.c          |  51 ++++
 .../gcc.target/i386/sse2-mmx-pavgw.c          |  51 ++++
 .../gcc.target/i386/sse2-mmx-pcmpeqb.c        |  47 ++++
 .../gcc.target/i386/sse2-mmx-pcmpeqd.c        |  47 ++++
 .../gcc.target/i386/sse2-mmx-pcmpeqw.c        |  47 ++++
 .../gcc.target/i386/sse2-mmx-pcmpgtb.c        |  47 ++++
 .../gcc.target/i386/sse2-mmx-pcmpgtd.c        |  47 ++++
 .../gcc.target/i386/sse2-mmx-pcmpgtw.c        |  47 ++++
 .../gcc.target/i386/sse2-mmx-pextrw.c         |  58 ++++
 .../gcc.target/i386/sse2-mmx-pinsrw.c         |  60 +++++
 .../gcc.target/i386/sse2-mmx-pmaddwd.c        |  46 ++++
 .../gcc.target/i386/sse2-mmx-pmaxsw.c         |  47 ++++
 .../gcc.target/i386/sse2-mmx-pmaxub.c         |  47 ++++
 .../gcc.target/i386/sse2-mmx-pminsw.c         |  47 ++++
 .../gcc.target/i386/sse2-mmx-pminub.c         |  47 ++++
 .../gcc.target/i386/sse2-mmx-pmovmskb.c       |  45 ++++
 .../gcc.target/i386/sse2-mmx-pmulhuw.c        |  50 ++++
 .../gcc.target/i386/sse2-mmx-pmulhw.c         |  52 ++++
 .../gcc.target/i386/sse2-mmx-pmullw.c         |  51 ++++
 .../gcc.target/i386/sse2-mmx-pmuludq.c        |  46 ++++
 gcc/testsuite/gcc.target/i386/sse2-mmx-por.c  |  43 +++
 .../gcc.target/i386/sse2-mmx-psadbw.c         |  57 ++++
 .../gcc.target/i386/sse2-mmx-pshufw.c         | 247 ++++++++++++++++++
 .../gcc.target/i386/sse2-mmx-pslld.c          |  51 ++++
 .../gcc.target/i386/sse2-mmx-pslldi.c         | 152 +++++++++++
 .../gcc.target/i386/sse2-mmx-psllq.c          |  46 ++++
 .../gcc.target/i386/sse2-mmx-psllqi.c         | 244 +++++++++++++++++
 .../gcc.target/i386/sse2-mmx-psllw.c          |  51 ++++
 .../gcc.target/i386/sse2-mmx-psllwi.c         | 104 ++++++++
 .../gcc.target/i386/sse2-mmx-psrad.c          |  51 ++++
 .../gcc.target/i386/sse2-mmx-psradi.c         | 152 +++++++++++
 .../gcc.target/i386/sse2-mmx-psraw.c          |  51 ++++
 .../gcc.target/i386/sse2-mmx-psrawi.c         | 104 ++++++++
 .../gcc.target/i386/sse2-mmx-psrld.c          |  51 ++++
 .../gcc.target/i386/sse2-mmx-psrldi.c         | 152 +++++++++++
 .../gcc.target/i386/sse2-mmx-psrlq.c          |  46 ++++
 .../gcc.target/i386/sse2-mmx-psrlqi.c         | 244 +++++++++++++++++
 .../gcc.target/i386/sse2-mmx-psrlw.c          |  51 ++++
 .../gcc.target/i386/sse2-mmx-psrlwi.c         | 104 ++++++++
 .../gcc.target/i386/sse2-mmx-psubb.c          |  47 ++++
 .../gcc.target/i386/sse2-mmx-psubd.c          |  47 ++++
 .../gcc.target/i386/sse2-mmx-psubq.c          |  42 +++
 .../gcc.target/i386/sse2-mmx-psubusb.c        |  47 ++++
 .../gcc.target/i386/sse2-mmx-psubusw.c        |  47 ++++
 .../gcc.target/i386/sse2-mmx-psubw.c          |  47 ++++
 .../gcc.target/i386/sse2-mmx-punpckhbw.c      |  52 ++++
 .../gcc.target/i386/sse2-mmx-punpckhdq.c      |  46 ++++
 .../gcc.target/i386/sse2-mmx-punpckhwd.c      |  48 ++++
 .../gcc.target/i386/sse2-mmx-punpcklbw.c      |  52 ++++
 .../gcc.target/i386/sse2-mmx-punpckldq.c      |  46 ++++
 .../gcc.target/i386/sse2-mmx-punpcklwd.c      |  48 ++++
 gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c |  43 +++
 gcc/testsuite/gcc.target/i386/sse2-mmx.c      |   1 -
 91 files changed, 5001 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/mmx-vals.h
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-18.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c

diff --git a/gcc/testsuite/gcc.target/i386/mmx-vals.h b/gcc/testsuite/gcc.target/i386/mmx-vals.h
new file mode 100644
index 00000000000..62d0c1cb514
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mmx-vals.h
@@ -0,0 +1,77 @@
+/* Routine to check correctness of the results */
+
+__attribute__((unused))
+static int
+saturate_b (int i)
+{
+  if (i > 127)
+    i = 127;
+  else if (i < -128)
+    i = -128;
+  return i;
+}
+
+__attribute__((unused))
+static int
+saturate_w (int i)
+{
+  if (i > 32767)
+    i = 32767;
+  else if (i < -32768)
+    i = -32768;
+  return i;
+}
+
+__attribute__((unused))
+static int
+saturate_ub (int i)
+{
+  if (i > 255)
+    i = 255;
+  else if (i < 0)
+    i = 0;
+  return i;
+}
+
+__attribute__((unused))
+static int
+saturate_uw (int i)
+{
+  if (i > 65535)
+    i = 65535;
+  else if (i < 0)
+    i = 0;
+  return i;
+}
+
+static long long MMXops[] =
+{
+  0x3467512347612976LL, 0x000000000000000eLL,
+  0x3467512347612976LL, 0x0000000000000014LL,
+  0x3467512347612976LL, 0x000000000000003cLL,
+  0x0000000000000000LL, 0xFFFFFFFFFFFFFFFFLL,
+  0xFFFFFFFFFFFFFFFFLL, 0x0000000000000000LL,
+  0x0000000000000001LL, 0x1000000000000000LL,
+  0x1000000000000000LL, 0x0000000000000001LL,
+  0xFF00FF00FF00FF00LL, 0x00FF00FF00FF00FFLL,
+  0xFFFFFFFFFFFFFFFFLL, 0x0101010101010101LL,
+  0x0101010101010101LL, 0xFFFFFFFFFFFFFFFFLL,
+  0x0123456789ABCDEFLL, 0x0123456789ABCDEFLL,
+  0x3467512347612976LL, 0x1839876340879234LL,
+  0x0000000000000000LL, 0x0000000000000000LL,
+  0xFFFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL,
+  0x7F7F7F7F7F7F7F7FLL, 0x7F7F7F7F7F7F7F7FLL,
+  0x7F7F7F7F7F7F7F7FLL, 0x0101010101010101LL,
+  0x7F7F7F7F7F7F7F7FLL, 0x4782082349761237LL,
+  0x0000000000000000LL, 0x7F7F7F7F7F7F7F7FLL,
+  0x8080808080808080LL, 0x8080808080808080LL,
+  0x0101010101010101LL, 0x8080808080808080LL,
+  0x8080808080808080LL, 0x0000000000000000LL,
+  0x2372347120982458LL, 0x8080808080808080LL,
+  0xFFFFFFFFFFFFFFFFLL, 0x8080808080808080LL,
+  0x7F7F7F7F7F7F7F7FLL, 0xFFFFFFFFFFFFFFFFLL,
+  0x8080808080808080LL, 0x7F7F7F7F7F7F7F7FLL,
+  0xFFFFFFFFFFFFFFFFLL, 0x7F7F7F7F7F7F7F7FLL
+};
+
+#define MMX_num_ops (sizeof (MMXops) / sizeof (MMXops[0]))
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-10.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
new file mode 100644
index 00000000000..cb63401a251
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
@@ -0,0 +1,42 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_from_int  (long long *ll1, long long *r)
+{
+  int i1 = *(int *) ll1;
+  *(__m64 *) r = _m_from_int (i1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+  int *res = (int *) r;
+  res[0] = *(int *) ll1;
+  res[1] = 0;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      test_from_int (&MMXops[i], &r);
+      compute_correct_result (&MMXops[i], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-11.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
new file mode 100644
index 00000000000..6737ec5f2d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_from_long_long  (long long *ll1, long long *r)
+{
+  *(__m64 *) r = _mm_cvtsi64_m64 (*ll1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+  *r = *ll1;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      test_from_long_long (&MMXops[i], &r);
+      compute_correct_result (&MMXops[i], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-12.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
new file mode 100644
index 00000000000..7390bcf3ccc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
@@ -0,0 +1,41 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_to_int  (long long *ll1, long long *r)
+{
+  __m64 m = *(__m64 *) ll1; 
+  *(int *) r = _m_to_int (m);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+  int *i1 = (int *) ll1;
+  *(int *) r = *i1;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r = 0, ck = 0;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      test_to_int (&MMXops[i], &r);
+      compute_correct_result (&MMXops[i], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-13.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
new file mode 100644
index 00000000000..fd1eed66daa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
@@ -0,0 +1,40 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_to_long_long  (long long *ll1, long long *r)
+{
+  __m64 m = *(__m64 *) ll1; 
+  *r = _mm_cvtm64_si64 (m);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+  *r = *ll1;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      test_to_long_long (&MMXops[i], &r);
+      compute_correct_result (&MMXops[i], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-14.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
new file mode 100644
index 00000000000..cc586182259
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_setzero (long long *r)
+{
+  *(__m64 *) r = _mm_setzero_si64 ();
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *r)
+{
+  *r = 0x0LL;
+}
+
+static void
+sse2_test (void)
+{
+  long long r, ck;
+
+  /* Run the MMX tests */
+  test_setzero (&r);
+  compute_correct_result (&ck);
+  if (ck != r)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-15.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
new file mode 100644
index 00000000000..35308633f59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
@@ -0,0 +1,35 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_set (int x, int y, long long *r)
+{
+  *(__m64 *) r = _mm_set_pi32 (x, y);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int x, int y, long long *res_p)
+{
+  int *res = (int *) res_p;
+  res[0] = y;
+  res[1] = x;
+}
+
+static void
+sse2_test (void)
+{
+  int x, y;
+  long long r, ck;
+
+  /* Run the MMX tests */
+  x = 0x0badbeef;
+  y = 0x0badfeed;
+  test_set (x, y, &r);
+  compute_correct_result (x, y, &ck);
+  if (ck != r)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-16.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
new file mode 100644
index 00000000000..9f0fb46765c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_set (int i0, int i1, int i2, int i3, long long *r)
+{
+  *(__m64 *) r = _mm_set_pi16 (i0, i1, i2, i3);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int i0, int i1, int i2, int i3, long long *res_p)
+{
+  short *res = (short *) res_p;
+  res[0] = i3;
+  res[1] = i2;
+  res[2] = i1;
+  res[3] = i0;
+}
+
+static void
+sse2_test (void)
+{
+  short i0, i1, i2, i3;
+  long long r, ck;
+
+  /* Run the MMX tests */
+  i0 = 0x0bad;
+  i1 = 0xbeef;
+  i2 = 0x0bad;
+  i3 = 0xfeed;
+  test_set (i0, i1, i2, i3, &r);
+  compute_correct_result (i0, i1, i2, i3, &ck);
+  if (ck != r)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-17.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
new file mode 100644
index 00000000000..a38351ea056
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
@@ -0,0 +1,50 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_set (char i0, char i1, char i2, char i3,
+	  char i4, char i5, char i6, char i7, long long *r)
+{
+  *(__m64 *) r = _mm_set_pi8 (i0, i1, i2, i3, i4, i5, i6, i7);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (char i0, char i1, char i2, char i3,
+			char i4, char i5, char i6, char i7,
+			long long *res_p)
+{
+  char *res = (char *) res_p;
+  res[0] = i7;
+  res[1] = i6;
+  res[2] = i5;
+  res[3] = i4;
+  res[4] = i3;
+  res[5] = i2;
+  res[6] = i1;
+  res[7] = i0;
+}
+
+static void
+sse2_test (void)
+{
+  char i0, i1, i2, i3, i4, i5, i6, i7;
+  long long r, ck;
+
+  /* Run the MMX tests */
+  i0 = 0x12;
+  i1 = 0x34;
+  i2 = 0x56;
+  i3 = 0x78;
+  i4 = 0x90;
+  i5 = 0xab;
+  i6 = 0xcd;
+  i7 = 0xef;
+  test_set (i0, i1, i2, i3, i4, i5, i6, i7, &r);
+  compute_correct_result (i0, i1, i2, i3, i4, i5, i6, i7, &ck);
+  if (ck != r)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-18.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-18.c
new file mode 100644
index 00000000000..77f518b6c5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-18.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler "cvtdq2ps" } } */
+/* { dg-final { scan-assembler-not "cvtpi2ps" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+__m128
+foo (__m128 i1, __m64 i2)
+{
+  return _mm_cvtpi32_ps (i1, i2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19.c
new file mode 100644
index 00000000000..1953dc89bb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+int
+foo (__m64 i)
+{
+  return _m_pextrw (i, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-2.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
new file mode 100644
index 00000000000..e4cee2da83e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-not "%xmm" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+float
+foo (__m64 x)
+{
+  return ((__v2sf) x)[0];
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-20.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
new file mode 100644
index 00000000000..f73444f493b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+__m64
+foo (__m64 i, int w)
+{
+  return _m_pinsrw (i, w, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
new file mode 100644
index 00000000000..6ea491d2715
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler "movnti" } } */
+/* { dg-final { scan-assembler-not "movntq" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+void
+foo (__m64 *p, __m64 i)
+{
+  _mm_stream_pi (p, i);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-3.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
new file mode 100644
index 00000000000..e42c0e83bf9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler "(shufps\[ \\t\]+\\\$0xe|movshdup)" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+float
+foo (__m64 x)
+{
+  return ((__v2sf) x)[1];
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-4.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
new file mode 100644
index 00000000000..d923724fc1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+
+#include "mmx-4.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-5.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
new file mode 100644
index 00000000000..51f9eb2dfbc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler "unpcklps" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+__m64
+foo3 (float x)
+{
+  return __extension__ (__m64) (__v2sf) { x, 0 };
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-6.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
new file mode 100644
index 00000000000..1a3df4e60a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler "(shufps|vbroadcastss)" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+__m64
+foo (float x)
+{
+  return __extension__ (__m64) (__v2sf) { x, x };
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-7.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
new file mode 100644
index 00000000000..dfcd5a26f51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler "unpcklps" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+__m64
+foo (float x, float y)
+{
+  return __extension__ (__m64) (__v2sf) { x, y };
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-8.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
new file mode 100644
index 00000000000..342c2fa4f25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+
+#include "mmx-8.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-9.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
new file mode 100644
index 00000000000..f0bf7256c0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
@@ -0,0 +1,79 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "sse2-check.h"
+
+#include <string.h>
+
+#define FLOAT_X	2.3456
+#define FLOAT_Y	-4.5987
+
+static float expected_x = FLOAT_X;
+static float expected_y = FLOAT_Y;
+static __v2sf expected1 = { FLOAT_X, FLOAT_Y };
+static __v2sf expected2 = { FLOAT_X, 0 };
+static __v2sf expected3 = { FLOAT_X, FLOAT_X };
+
+float
+__attribute__((noinline, noclone))
+foo1 (__m64 x)
+{
+  return ((__v2sf) x)[0];
+}
+
+float
+__attribute__((noinline, noclone))
+foo2 (__m64 x)
+{
+  return ((__v2sf) x)[1];
+}
+
+__m64
+__attribute__((noinline, noclone))
+foo3 (float x)
+{
+  return __extension__ (__m64) (__v2sf) { x, 0 };
+}
+
+__m64
+__attribute__((noinline, noclone))
+foo4 (float x)
+{
+  return __extension__ (__m64) (__v2sf) { x, x };
+}
+
+__m64
+__attribute__((noinline, noclone))
+foo5 (float x, float y)
+{
+  return __extension__ (__m64) (__v2sf) { x, y };
+}
+
+void
+__attribute__((noinline))
+sse2_test (void)
+{
+  __m64 res;
+  float x;
+
+  x = foo1 ((__m64) expected1);
+  if (x != expected_x)
+    abort ();
+
+  x = foo2 ((__m64) expected1);
+  if (x != expected_y)
+    abort ();
+
+  res = foo3 (FLOAT_X); 
+  if (memcmp (&res, &expected2, sizeof (res)))
+    abort ();
+
+  res = foo4 (FLOAT_X); 
+  if (memcmp (&res, &expected3, sizeof (res)))
+    abort ();
+
+  res = foo5 (FLOAT_X, FLOAT_Y); 
+  if (memcmp (&res, &expected1, sizeof (res)))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
new file mode 100644
index 00000000000..bdf1085446b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
@@ -0,0 +1,42 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include <string.h>
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_cvtpi32_ps (__m128 *i1, __m64 *i2, __m128 *r)
+{
+  *(__m128 *) r = _mm_cvtpi32_ps (*i1, *i2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m128 *dst_p, __m64 *src_p, __m128 *res_p)
+{
+  int *src = (int *) src_p;
+  float *res = (float *) res_p;
+  *res_p = *dst_p;
+  int i;
+  __m128 r;
+  for (i = 0; i < 2; i++)
+    {
+      r = _mm_cvt_si2ss (*dst_p, src[i]);
+      res[i] = ((__v4sf) r)[0];
+    }
+}
+
+static void
+sse2_test (void)
+{
+  __m128 r, ck;
+  __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
+  __v2si y = { 30, -39 };
+
+  /* Run the MMX tests */
+  test_cvtpi32_ps ((__m128 *) &x, (__m64 *) &y, &r);
+  compute_correct_result ((__m128 *) &x, (__m64 *) &y, &ck);
+  if (memcmp (&ck, &r, sizeof (r)))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
new file mode 100644
index 00000000000..8e860e7a5be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
@@ -0,0 +1,35 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_cvtps_pi32 (__m128 *src_p, long long *r)
+{
+  *(__m64 *) r = _mm_cvtps_pi32 (*src_p);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m128 *src_p, long long *res_p)
+{
+  __v4sf *src = (__v4sf *) src_p;
+  int *res = (int *) res_p;
+  int i;
+  for (i = 0; i < 2; i++)
+    res[i] = _mm_cvt_ss2si (_mm_set_ss ((*src)[i]));
+}
+
+static void
+sse2_test (void)
+{
+  long long r, ck;
+  __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
+
+  /* Run the MMX tests */
+  test_cvtps_pi32 ((__m128 *) &x, &r);
+  compute_correct_result ((__m128 *) &x, &ck);
+  if (ck != r)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
new file mode 100644
index 00000000000..8b9dd7fc8ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
@@ -0,0 +1,35 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_cvttps_pi32 (__m128 *src_p, long long *r)
+{
+  *(__m64 *) r = _mm_cvttps_pi32 (*src_p);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m128 *src_p, long long *res_p)
+{
+  __v4sf *src = (__v4sf *) src_p;
+  int *res = (int *) res_p;
+  int i;
+  for (i = 0; i < 2; i++)
+    res[i] = _mm_cvtt_ss2si (_mm_set_ss ((*src)[i]));
+}
+
+static void
+sse2_test (void)
+{
+  long long r, ck;
+  __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
+
+  /* Run the MMX tests */
+  test_cvttps_pi32 ((__m128 *) &x, &r);
+  compute_correct_result ((__m128 *) &x, &ck);
+  if (ck != r)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
new file mode 100644
index 00000000000..815a499ff84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
@@ -0,0 +1,98 @@
+/* { dg-do run { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_maskmovq  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+   _m_maskmovq (t1, t2, (char *) r);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  char *dst = (char *) dst_p;
+  char *src = (char *) src_p;
+  char *res = (char *) res_p;
+  int i;
+  for (i = 0; i < 8; i++)
+    if ((src[i] & 0x80) != 0)
+      res[i] = dst[i];
+}
+
+static void
+do_maskmovq_test (long long *r)
+{
+  int i;
+  long long ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      r[0] = -1LL;
+      ck = -1LL;
+      test_maskmovq (&MMXops[i], &MMXops[i], r);
+      compute_correct_result (&MMXops[i], &MMXops[i], &ck);
+      if (*r != ck)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
+
+static void
+sse2_test (void)
+{
+  char *buf;
+  long long *r;
+  size_t page_size = sysconf(_SC_PAGESIZE);
+
+  buf = mmap (0, 3 * page_size, PROT_READ | PROT_WRITE,
+	      MAP_PRIVATE | MAP_ANON, -1, 0);
+  if (buf == MAP_FAILED)
+    {
+      perror ("mmap");
+      abort ();
+    }
+
+  if (mprotect (buf, page_size, PROT_NONE))
+    {
+      perror ("mprotect");
+      abort ();
+    }
+
+  if (mprotect (buf + 2 * page_size, page_size, PROT_NONE))
+    {
+      perror ("mprotect");
+      abort ();
+    }
+
+  r = (long long *) (buf + page_size);
+  do_maskmovq_test (r);
+
+  r = (long long *) (buf + page_size + 3);
+  do_maskmovq_test (r);
+
+  r = (long long *) (buf + page_size + 11);
+  do_maskmovq_test (r);
+
+  r = (long long *) (buf + 2 * page_size - 16);
+  do_maskmovq_test (r);
+
+  r = (long long *) (buf + 2 * page_size - 16 + 3);
+  do_maskmovq_test (r);
+
+  r = (long long *) (buf + 2 * page_size - 16 + 8);
+  do_maskmovq_test (r);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
new file mode 100644
index 00000000000..fb895c6cfe7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_packssdw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_packssdw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  int *dst = (int *) dst_p;
+  int *src = (int *) src_p;
+  short *res = (short *) res_p;
+  int i;
+
+  for (i = 0; i < 2; i++)
+    {
+      res[i] = saturate_w (dst[i]);
+      res[i + 2] = saturate_w (src[i]);
+    }
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_packssdw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
new file mode 100644
index 00000000000..1c4a948027c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_packsswb  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_packsswb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  char *res = (char *) res_p;
+  int i;
+
+  for (i = 0; i < 4; i++)
+    {
+      res[i] = saturate_b (dst[i]);
+      res[i + 4] = saturate_b (src[i]);
+    }
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_packsswb (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
new file mode 100644
index 00000000000..24abd5dcc9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_packuswb  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_packuswb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  unsigned char *res = (unsigned char *) res_p;
+  int i;
+
+  for (i = 0; i < 4; i++)
+    {
+      res[i] = saturate_ub (dst[i]);
+      res[i + 4] = saturate_ub (src[i]);
+    }
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_packuswb (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
new file mode 100644
index 00000000000..f4c8273c5e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddb  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_paddb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  char *dst = (char *) dst_p;
+  char *src = (char *) src_p;
+  char *res = (char *) res_p;
+  int i;
+  for (i = 0; i < 8; i++)
+    res[i] = dst[i] + src[i];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_paddb (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
new file mode 100644
index 00000000000..32911a7852a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddd  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_paddd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  int *dst = (int *) dst_p;
+  int *src = (int *) src_p;
+  int *res = (int *) res_p;
+  int i;
+  for (i = 0; i < 2; i++)
+    res[i] = dst[i] + src[i];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_paddd (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
new file mode 100644
index 00000000000..8e257a314e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
@@ -0,0 +1,42 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddq  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _mm_add_si64 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  res_p[0] = dst_p[0] + src_p[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_paddq (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
new file mode 100644
index 00000000000..9798a2024fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddsb  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_paddsb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  char *dst = (char *) dst_p;
+  char *src = (char *) src_p;
+  char *res = (char *) res_p;
+  int i;
+  for (i = 0; i < 8; i++)
+    res[i] = saturate_b (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_paddsb (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
new file mode 100644
index 00000000000..6371b1930fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddsw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_paddsw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  int i;
+  for (i = 0; i < 4; i++)
+    res[i] = saturate_w (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_paddsw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
new file mode 100644
index 00000000000..bac22b641cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddusb  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_paddusb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned char *dst = (unsigned char *) dst_p;
+  unsigned char *src = (unsigned char *) src_p;
+  unsigned char *res = (unsigned char *) res_p;
+  int i;
+  for (i = 0; i < 8; i++)
+    res[i] = saturate_ub (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_paddusb (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
new file mode 100644
index 00000000000..70f987bf381
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddusw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_paddusw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned short *dst = (unsigned short *) dst_p;
+  unsigned short *src = (unsigned short *) src_p;
+  unsigned short *res = (unsigned short *) res_p;
+  int i;
+  for (i = 0; i < 4; i++)
+    res[i] = saturate_uw (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_paddusw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
new file mode 100644
index 00000000000..8e01cc4734f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_paddw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  int i;
+  for (i = 0; i < 4; i++)
+    res[i] = dst[i] + src[i];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_paddw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
new file mode 100644
index 00000000000..0876fee92ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
@@ -0,0 +1,43 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pand  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pand (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+			unsigned long long *src,
+			unsigned long long *res)
+{
+  res[0] = dst[0] & src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pand (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
new file mode 100644
index 00000000000..362c475029b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
@@ -0,0 +1,43 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pandn  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pandn (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+			unsigned long long *src,
+			unsigned long long *res)
+{
+  res[0] = ~dst[0] & src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pandn (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
new file mode 100644
index 00000000000..0c57d94e9e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pavgb  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pavgb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned char *dst = (unsigned char *) dst_p;
+  unsigned char *src = (unsigned char *) src_p;
+  unsigned char *res = (unsigned char *) res_p;
+  int i;
+  unsigned int tmp;
+  for (i = 0; i < 8; i++)
+    {
+      tmp = dst[i] + src[i] + 1;
+      res[i] = tmp >> 1;
+    }
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pavgb (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
new file mode 100644
index 00000000000..e38669ffbb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pavgw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pavgw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned short *dst = (unsigned short *) dst_p;
+  unsigned short *src = (unsigned short *) src_p;
+  unsigned short *res = (unsigned short *) res_p;
+  int i;
+  unsigned int tmp;
+  for (i = 0; i < 4; i++)
+    {
+      tmp = dst[i] + src[i] + 1;
+      res[i] = tmp >> 1;
+    }
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pavgw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
new file mode 100644
index 00000000000..f0f3a28cf62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpeqb  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pcmpeqb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  char *dst = (char *) dst_p;
+  char *src = (char *) src_p;
+  char *res = (char *) res_p;
+  int i;
+  for (i = 0; i < 8; i++)
+    res[i] = dst[i] == src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pcmpeqb (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
new file mode 100644
index 00000000000..7dc13f147e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpeqd  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pcmpeqd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  int *dst = (int *) dst_p;
+  int *src = (int *) src_p;
+  int *res = (int *) res_p;
+  int i;
+  for (i = 0; i < 2; i++)
+    res[i] = dst[i] == src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pcmpeqd (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
new file mode 100644
index 00000000000..d6e59077204
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpeqw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pcmpeqw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  int i;
+  for (i = 0; i < 4; i++)
+    res[i] = dst[i] == src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pcmpeqw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
new file mode 100644
index 00000000000..3a1c188a407
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpgtb  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pcmpgtb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  char *dst = (char *) dst_p;
+  char *src = (char *) src_p;
+  char *res = (char *) res_p;
+  int i;
+  for (i = 0; i < 8; i++)
+    res[i] = dst[i] > src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pcmpgtb (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
new file mode 100644
index 00000000000..121cafcd834
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpgtd  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pcmpgtd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  int *dst = (int *) dst_p;
+  int *src = (int *) src_p;
+  int *res = (int *) res_p;
+  int i;
+  for (i = 0; i < 2; i++)
+    res[i] = dst[i] > src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pcmpgtd (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
new file mode 100644
index 00000000000..7b4e99d0a34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpgtw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pcmpgtw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  int i;
+  for (i = 0; i < 4; i++)
+    res[i] = dst[i] > src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pcmpgtw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
new file mode 100644
index 00000000000..58e5ea5aa3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
@@ -0,0 +1,58 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing" } */
+
+#include <string.h>
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pextrw (__m64 *i, unsigned int imm, int *r)
+{
+  switch (imm)
+    {
+    case 0:
+      *r = _m_pextrw (*i, 0);
+      break;
+    case 1:
+      *r = _m_pextrw (*i, 1);
+      break;
+    case 2:
+      *r = _m_pextrw (*i, 2);
+      break;
+    case 3:
+      *r = _m_pextrw (*i, 3);
+      break;
+    default:
+      break;
+    }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m64 *src_p, unsigned int imm, int *res_p)
+{
+  short *src = (short *) src_p;
+  if (imm < 4)
+    *res_p = src[imm];
+}
+
+static void
+sse2_test (void)
+{
+  int r, ck;
+  int i;
+  int failed = 0;
+  __v4hi y = { 3320, -3339, 48, 4392 };
+
+  /* Run the MMX tests */
+  for (i = 0; i < 4; i++)
+    {
+      test_pextrw ((__m64 *) &y, i, &r);
+      compute_correct_result ((__m64 *) &y, i, &ck);
+      if (r != ck)
+	failed++;
+    }
+
+  if (failed)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
new file mode 100644
index 00000000000..2c49d8ce5f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
@@ -0,0 +1,60 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing" } */
+
+#include <string.h>
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pinsrw  (__m64 *i, int val, unsigned int imm, int *r)
+{
+  switch (imm)
+    {
+    case 0:
+       *(__m64 *) r = _m_pinsrw  (*i, val, 0);
+      break;
+    case 1:
+       *(__m64 *) r = _m_pinsrw  (*i, val, 1);
+      break;
+    case 2:
+       *(__m64 *) r = _m_pinsrw  (*i, val, 2);
+      break;
+    case 3:
+       *(__m64 *) r = _m_pinsrw  (*i, val, 3);
+      break;
+    default:
+      break;
+    }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m64 *src_p, int val, unsigned int imm,
+			int *res_p)
+{
+  short *res = (short *) res_p;
+  *(__m64 *) res_p = *src_p;
+  if (imm < 4)
+    res[imm] = val;
+}
+
+static void
+sse2_test (void)
+{
+  int r, ck;
+  int i;
+  int failed = 0;
+  __v4hi y = { 3320, -3339, 48, 4392 };
+
+  /* Run the MMX tests */
+  for (i = 0; i < 4; i++)
+    {
+      test_pinsrw  ((__m64 *) &y, 0x1234, i, &r);
+      compute_correct_result ((__m64 *) &y, 0x1234, i, &ck);
+      if (r != ck)
+	failed++;
+    }
+
+  if (failed)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
new file mode 100644
index 00000000000..8f08aabf954
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmaddwd  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pmaddwd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  int *res = (int *) res_p;
+  res[0] = dst[0] * src[0] + dst[1] * src[1];
+  res[1] = dst[2] * src[2] + dst[3] * src[3];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pmaddwd (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
new file mode 100644
index 00000000000..e4a6d87f344
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmaxsw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pmaxsw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  int i;
+  for (i = 0; i < 4; i++)
+    res[i] = dst[i] > src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pmaxsw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
new file mode 100644
index 00000000000..f943989b96f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmaxub  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pmaxub (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned char *dst = (unsigned char *) dst_p;
+  unsigned char *src = (unsigned char *) src_p;
+  unsigned char *res = (unsigned char *) res_p;
+  int i;
+  for (i = 0; i < 8; i++)
+    res[i] = dst[i] > src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pmaxub (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
new file mode 100644
index 00000000000..6a92f7eb3a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pminsw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pminsw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  int i;
+  for (i = 0; i < 4; i++)
+    res[i] = dst[i] < src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pminsw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
new file mode 100644
index 00000000000..a3b0e5093a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pminub  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pminub (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned char *dst = (unsigned char *) dst_p;
+  unsigned char *src = (unsigned char *) src_p;
+  unsigned char *res = (unsigned char *) res_p;
+  int i;
+  for (i = 0; i < 8; i++)
+    res[i] = dst[i] < src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pminub (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
new file mode 100644
index 00000000000..73b2a00d282
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmovmskb  (long long *ll1, int *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  *r = _m_pmovmskb (t1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, int *res_p)
+{
+  char *src = (char *) src_p;
+  int i;
+  int res = 0;
+  for (i = 0; i < 8; i++)
+    res |= ((src[i] & 0x80) >> 7) << i;
+  *res_p = res;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  int r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      test_pmovmskb (&MMXops[i], &r);
+      compute_correct_result (&MMXops[i], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
new file mode 100644
index 00000000000..ebf2a760fd7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
@@ -0,0 +1,50 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmulhuw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pmulhuw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned short *dst = (unsigned short *) dst_p;
+  unsigned short *src = (unsigned short *) src_p;
+  unsigned short *res = (unsigned short *) res_p;
+  int i;
+  for (i = 0; i < 4; i++)
+    {
+      unsigned int t = dst[i] * src[i];
+      res[i] = t >> 16;
+    }
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pmulhuw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
new file mode 100644
index 00000000000..4aa8d605c4c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
@@ -0,0 +1,52 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmulhw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pmulhw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  int tmp;
+  int i;
+  for (i = 0; i < 4; i++)
+    {
+      tmp = dst[i] * src[i];
+      tmp >>= 16;
+      res[i] = tmp;
+    }
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pmulhw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
new file mode 100644
index 00000000000..732687e8cd7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmullw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pmullw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  int tmp;
+  int i;
+  for (i = 0; i < 4; i++)
+    {
+      tmp = dst[i] * src[i];
+      res[i] = tmp;
+    }
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pmullw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
new file mode 100644
index 00000000000..eec4d970d63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmuludq  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _mm_mul_su32 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned int *dst = (unsigned int*) dst_p;
+  unsigned int *src = (unsigned int *) src_p;
+  unsigned long long *res = (unsigned long long *) res_p;
+  res[0] = dst[0];
+  res[0] *= src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pmuludq (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-por.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
new file mode 100644
index 00000000000..79d3a9a548f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
@@ -0,0 +1,43 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_por  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_por (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+			unsigned long long *src,
+			unsigned long long *res)
+{
+  res[0] = dst[0] | src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_por (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
new file mode 100644
index 00000000000..324ce8c51a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
@@ -0,0 +1,57 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psadbw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psadbw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned char *dst = (unsigned char *) dst_p;
+  unsigned char *src = (unsigned char *) src_p;
+  unsigned short *res = (unsigned short *) res_p;
+  int i;
+  int tmp;
+  unsigned int sum = 0;
+  for (i = 0; i < 8; i++)
+    {
+      tmp = dst[i] - src[i];
+      if (tmp < 0)
+	tmp = -tmp;
+      sum += tmp;
+    }
+  res[0] = sum;
+  for (i = 1; i < 3; i++)
+    res[i] = 0;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psadbw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
new file mode 100644
index 00000000000..8da4b7afac7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
@@ -0,0 +1,247 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pshufw  (long long *ll1, unsigned int imm, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  switch (imm)
+    {
+    case 0:
+      *(__m64 *) r = _m_pshufw (t1, 0);
+      break;
+    case 1:
+      *(__m64 *) r = _m_pshufw (t1, 1);
+      break;
+    case 2:
+      *(__m64 *) r = _m_pshufw (t1, 2);
+      break;
+    case 3:
+      *(__m64 *) r = _m_pshufw (t1, 3);
+      break;
+    case 4:
+      *(__m64 *) r = _m_pshufw (t1, 4);
+      break;
+    case 5:
+      *(__m64 *) r = _m_pshufw (t1, 5);
+      break;
+    case 6:
+      *(__m64 *) r = _m_pshufw (t1, 6);
+      break;
+    case 7:
+      *(__m64 *) r = _m_pshufw (t1, 7);
+      break;
+    case 8:
+      *(__m64 *) r = _m_pshufw (t1, 8);
+      break;
+    case 9:
+      *(__m64 *) r = _m_pshufw (t1, 9);
+      break;
+    case 10:
+      *(__m64 *) r = _m_pshufw (t1, 10);
+      break;
+    case 11:
+      *(__m64 *) r = _m_pshufw (t1, 11);
+      break;
+    case 12:
+      *(__m64 *) r = _m_pshufw (t1, 12);
+      break;
+    case 13:
+      *(__m64 *) r = _m_pshufw (t1, 13);
+      break;
+    case 14:
+      *(__m64 *) r = _m_pshufw (t1, 14);
+      break;
+    case 15:
+      *(__m64 *) r = _m_pshufw (t1, 15);
+      break;
+    case 16:
+      *(__m64 *) r = _m_pshufw (t1, 16);
+      break;
+    case 17:
+      *(__m64 *) r = _m_pshufw (t1, 17);
+      break;
+    case 18:
+      *(__m64 *) r = _m_pshufw (t1, 18);
+      break;
+    case 19:
+      *(__m64 *) r = _m_pshufw (t1, 19);
+      break;
+    case 20:
+      *(__m64 *) r = _m_pshufw (t1, 20);
+      break;
+    case 21:
+      *(__m64 *) r = _m_pshufw (t1, 21);
+      break;
+    case 22:
+      *(__m64 *) r = _m_pshufw (t1, 22);
+      break;
+    case 23:
+      *(__m64 *) r = _m_pshufw (t1, 23);
+      break;
+    case 24:
+      *(__m64 *) r = _m_pshufw (t1, 24);
+      break;
+    case 25:
+      *(__m64 *) r = _m_pshufw (t1, 25);
+      break;
+    case 26:
+      *(__m64 *) r = _m_pshufw (t1, 26);
+      break;
+    case 27:
+      *(__m64 *) r = _m_pshufw (t1, 27);
+      break;
+    case 28:
+      *(__m64 *) r = _m_pshufw (t1, 28);
+      break;
+    case 29:
+      *(__m64 *) r = _m_pshufw (t1, 29);
+      break;
+    case 30:
+      *(__m64 *) r = _m_pshufw (t1, 30);
+      break;
+    case 31:
+      *(__m64 *) r = _m_pshufw (t1, 31);
+      break;
+    case 32:
+      *(__m64 *) r = _m_pshufw (t1, 32);
+      break;
+    case 33:
+      *(__m64 *) r = _m_pshufw (t1, 33);
+      break;
+    case 34:
+      *(__m64 *) r = _m_pshufw (t1, 34);
+      break;
+    case 35:
+      *(__m64 *) r = _m_pshufw (t1, 35);
+      break;
+    case 36:
+      *(__m64 *) r = _m_pshufw (t1, 36);
+      break;
+    case 37:
+      *(__m64 *) r = _m_pshufw (t1, 37);
+      break;
+    case 38:
+      *(__m64 *) r = _m_pshufw (t1, 38);
+      break;
+    case 39:
+      *(__m64 *) r = _m_pshufw (t1, 39);
+      break;
+    case 40:
+      *(__m64 *) r = _m_pshufw (t1, 40);
+      break;
+    case 41:
+      *(__m64 *) r = _m_pshufw (t1, 41);
+      break;
+    case 42:
+      *(__m64 *) r = _m_pshufw (t1, 42);
+      break;
+    case 43:
+      *(__m64 *) r = _m_pshufw (t1, 43);
+      break;
+    case 44:
+      *(__m64 *) r = _m_pshufw (t1, 44);
+      break;
+    case 45:
+      *(__m64 *) r = _m_pshufw (t1, 45);
+      break;
+    case 46:
+      *(__m64 *) r = _m_pshufw (t1, 46);
+      break;
+    case 47:
+      *(__m64 *) r = _m_pshufw (t1, 47);
+      break;
+    case 48:
+      *(__m64 *) r = _m_pshufw (t1, 48);
+      break;
+    case 49:
+      *(__m64 *) r = _m_pshufw (t1, 49);
+      break;
+    case 50:
+      *(__m64 *) r = _m_pshufw (t1, 50);
+      break;
+    case 51:
+      *(__m64 *) r = _m_pshufw (t1, 51);
+      break;
+    case 52:
+      *(__m64 *) r = _m_pshufw (t1, 52);
+      break;
+    case 53:
+      *(__m64 *) r = _m_pshufw (t1, 53);
+      break;
+    case 54:
+      *(__m64 *) r = _m_pshufw (t1, 54);
+      break;
+    case 55:
+      *(__m64 *) r = _m_pshufw (t1, 55);
+      break;
+    case 56:
+      *(__m64 *) r = _m_pshufw (t1, 56);
+      break;
+    case 57:
+      *(__m64 *) r = _m_pshufw (t1, 57);
+      break;
+    case 58:
+      *(__m64 *) r = _m_pshufw (t1, 58);
+      break;
+    case 59:
+      *(__m64 *) r = _m_pshufw (t1, 59);
+      break;
+    case 60:
+      *(__m64 *) r = _m_pshufw (t1, 60);
+      break;
+    case 61:
+      *(__m64 *) r = _m_pshufw (t1, 61);
+      break;
+    case 62:
+      *(__m64 *) r = _m_pshufw (t1, 62);
+      break;
+    case 63:
+      *(__m64 *) r = _m_pshufw (t1, 63);
+      break;
+    default:
+      break;
+    }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+			long long *res_p)
+{
+  unsigned long long src = *(unsigned long long *) src_p;
+  unsigned short *res = (unsigned short *) res_p;
+  int i;
+  unsigned int shift;
+  for (i = 0; i < 4; i++)
+    {
+      shift = ((imm >> (2 * i)) & 0x3) * 16;
+      res[i] = (src >> shift) & 0xffff;
+    }
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      if (i > 63)
+	break;
+      test_pshufw (&MMXops[i], i, &r);
+      compute_correct_result (&MMXops[i], i, &ck);
+      if (ck != r)
+	  fail++;
+      }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
new file mode 100644
index 00000000000..0eead0822fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pslld  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pslld (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned int *dst = (unsigned int *) dst_p;
+  unsigned int *src = (unsigned int *) src_p;
+  unsigned int *res = (unsigned int *) res_p;
+  int i;
+  if (src[1] || src[0] > 31)
+    for (i = 0; i < 2; i++)
+      res[i] = 0;
+  else
+    for (i = 0; i < 2; i++)
+      res[i] = dst[i] << src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pslld (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
new file mode 100644
index 00000000000..6ae1d4a78e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
@@ -0,0 +1,152 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi  (long long *ll1, unsigned int imm, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  switch (imm)
+    {
+    case 0:
+      *(__m64 *) r = _m_pslldi (t1, 0);
+      break;
+    case 1:
+      *(__m64 *) r = _m_pslldi (t1, 1);
+      break;
+    case 2:
+      *(__m64 *) r = _m_pslldi (t1, 2);
+      break;
+    case 3:
+      *(__m64 *) r = _m_pslldi (t1, 3);
+      break;
+    case 4:
+      *(__m64 *) r = _m_pslldi (t1, 4);
+      break;
+    case 5:
+      *(__m64 *) r = _m_pslldi (t1, 5);
+      break;
+    case 6:
+      *(__m64 *) r = _m_pslldi (t1, 6);
+      break;
+    case 7:
+      *(__m64 *) r = _m_pslldi (t1, 7);
+      break;
+    case 8:
+      *(__m64 *) r = _m_pslldi (t1, 8);
+      break;
+    case 9:
+      *(__m64 *) r = _m_pslldi (t1, 9);
+      break;
+    case 10:
+      *(__m64 *) r = _m_pslldi (t1, 10);
+      break;
+    case 11:
+      *(__m64 *) r = _m_pslldi (t1, 11);
+      break;
+    case 12:
+      *(__m64 *) r = _m_pslldi (t1, 12);
+      break;
+    case 13:
+      *(__m64 *) r = _m_pslldi (t1, 13);
+      break;
+    case 14:
+      *(__m64 *) r = _m_pslldi (t1, 14);
+      break;
+    case 15:
+      *(__m64 *) r = _m_pslldi (t1, 15);
+      break;
+    case 16:
+      *(__m64 *) r = _m_pslldi (t1, 16);
+      break;
+    case 17:
+      *(__m64 *) r = _m_pslldi (t1, 17);
+      break;
+    case 18:
+      *(__m64 *) r = _m_pslldi (t1, 18);
+      break;
+    case 19:
+      *(__m64 *) r = _m_pslldi (t1, 19);
+      break;
+    case 20:
+      *(__m64 *) r = _m_pslldi (t1, 20);
+      break;
+    case 21:
+      *(__m64 *) r = _m_pslldi (t1, 21);
+      break;
+    case 22:
+      *(__m64 *) r = _m_pslldi (t1, 22);
+      break;
+    case 23:
+      *(__m64 *) r = _m_pslldi (t1, 23);
+      break;
+    case 24:
+      *(__m64 *) r = _m_pslldi (t1, 24);
+      break;
+    case 25:
+      *(__m64 *) r = _m_pslldi (t1, 25);
+      break;
+    case 26:
+      *(__m64 *) r = _m_pslldi (t1, 26);
+      break;
+    case 27:
+      *(__m64 *) r = _m_pslldi (t1, 27);
+      break;
+    case 28:
+      *(__m64 *) r = _m_pslldi (t1, 28);
+      break;
+    case 29:
+      *(__m64 *) r = _m_pslldi (t1, 29);
+      break;
+    case 30:
+      *(__m64 *) r = _m_pslldi (t1, 30);
+      break;
+    case 31:
+      *(__m64 *) r = _m_pslldi (t1, 31);
+      break;
+    default:
+      *(__m64 *) r = _m_pslldi (t1, 32);
+      break;
+    }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+			long long *res_p)
+{
+  unsigned int *src = (unsigned int *) src_p;
+  unsigned int *res = (unsigned int *) res_p;
+  int i;
+  if (imm > 31)
+    for (i = 0; i < 2; i++)
+      res[i] = 0;
+  else
+    for (i = 0; i < 2; i++)
+      res[i] = src[i] << imm;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  unsigned int count;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      count = MMXops[i];
+      test_psllwi (&MMXops[i], count, &r);
+      compute_correct_result (&MMXops[i], count, &ck);
+      if (ck != r)
+	  fail++;
+      }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
new file mode 100644
index 00000000000..0283ba0f329
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllq  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psllq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+			unsigned long long *src,
+			unsigned long long *res)
+{
+  if (src[0] > 63)
+    res[0] = 0;
+  else
+    res[0] = dst[0] << src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psllq (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
new file mode 100644
index 00000000000..48d7e5f2e31
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
@@ -0,0 +1,244 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi  (long long *ll1, unsigned int imm, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  switch (imm)
+    {
+    case 0:
+      *(__m64 *) r = _m_psllqi (t1, 0);
+      break;
+    case 1:
+      *(__m64 *) r = _m_psllqi (t1, 1);
+      break;
+    case 2:
+      *(__m64 *) r = _m_psllqi (t1, 2);
+      break;
+    case 3:
+      *(__m64 *) r = _m_psllqi (t1, 3);
+      break;
+    case 4:
+      *(__m64 *) r = _m_psllqi (t1, 4);
+      break;
+    case 5:
+      *(__m64 *) r = _m_psllqi (t1, 5);
+      break;
+    case 6:
+      *(__m64 *) r = _m_psllqi (t1, 6);
+      break;
+    case 7:
+      *(__m64 *) r = _m_psllqi (t1, 7);
+      break;
+    case 8:
+      *(__m64 *) r = _m_psllqi (t1, 8);
+      break;
+    case 9:
+      *(__m64 *) r = _m_psllqi (t1, 9);
+      break;
+    case 10:
+      *(__m64 *) r = _m_psllqi (t1, 10);
+      break;
+    case 11:
+      *(__m64 *) r = _m_psllqi (t1, 11);
+      break;
+    case 12:
+      *(__m64 *) r = _m_psllqi (t1, 12);
+      break;
+    case 13:
+      *(__m64 *) r = _m_psllqi (t1, 13);
+      break;
+    case 14:
+      *(__m64 *) r = _m_psllqi (t1, 14);
+      break;
+    case 15:
+      *(__m64 *) r = _m_psllqi (t1, 15);
+      break;
+    case 16:
+      *(__m64 *) r = _m_psllqi (t1, 16);
+      break;
+    case 17:
+      *(__m64 *) r = _m_psllqi (t1, 17);
+      break;
+    case 18:
+      *(__m64 *) r = _m_psllqi (t1, 18);
+      break;
+    case 19:
+      *(__m64 *) r = _m_psllqi (t1, 19);
+      break;
+    case 20:
+      *(__m64 *) r = _m_psllqi (t1, 20);
+      break;
+    case 21:
+      *(__m64 *) r = _m_psllqi (t1, 21);
+      break;
+    case 22:
+      *(__m64 *) r = _m_psllqi (t1, 22);
+      break;
+    case 23:
+      *(__m64 *) r = _m_psllqi (t1, 23);
+      break;
+    case 24:
+      *(__m64 *) r = _m_psllqi (t1, 24);
+      break;
+    case 25:
+      *(__m64 *) r = _m_psllqi (t1, 25);
+      break;
+    case 26:
+      *(__m64 *) r = _m_psllqi (t1, 26);
+      break;
+    case 27:
+      *(__m64 *) r = _m_psllqi (t1, 27);
+      break;
+    case 28:
+      *(__m64 *) r = _m_psllqi (t1, 28);
+      break;
+    case 29:
+      *(__m64 *) r = _m_psllqi (t1, 29);
+      break;
+    case 30:
+      *(__m64 *) r = _m_psllqi (t1, 30);
+      break;
+    case 31:
+      *(__m64 *) r = _m_psllqi (t1, 31);
+      break;
+    case 32:
+      *(__m64 *) r = _m_psllqi (t1, 32);
+      break;
+    case 33:
+      *(__m64 *) r = _m_psllqi (t1, 33);
+      break;
+    case 34:
+      *(__m64 *) r = _m_psllqi (t1, 34);
+      break;
+    case 35:
+      *(__m64 *) r = _m_psllqi (t1, 35);
+      break;
+    case 36:
+      *(__m64 *) r = _m_psllqi (t1, 36);
+      break;
+    case 37:
+      *(__m64 *) r = _m_psllqi (t1, 37);
+      break;
+    case 38:
+      *(__m64 *) r = _m_psllqi (t1, 38);
+      break;
+    case 39:
+      *(__m64 *) r = _m_psllqi (t1, 39);
+      break;
+    case 40:
+      *(__m64 *) r = _m_psllqi (t1, 40);
+      break;
+    case 41:
+      *(__m64 *) r = _m_psllqi (t1, 41);
+      break;
+    case 42:
+      *(__m64 *) r = _m_psllqi (t1, 42);
+      break;
+    case 43:
+      *(__m64 *) r = _m_psllqi (t1, 43);
+      break;
+    case 44:
+      *(__m64 *) r = _m_psllqi (t1, 44);
+      break;
+    case 45:
+      *(__m64 *) r = _m_psllqi (t1, 45);
+      break;
+    case 46:
+      *(__m64 *) r = _m_psllqi (t1, 46);
+      break;
+    case 47:
+      *(__m64 *) r = _m_psllqi (t1, 47);
+      break;
+    case 48:
+      *(__m64 *) r = _m_psllqi (t1, 48);
+      break;
+    case 49:
+      *(__m64 *) r = _m_psllqi (t1, 49);
+      break;
+    case 50:
+      *(__m64 *) r = _m_psllqi (t1, 50);
+      break;
+    case 51:
+      *(__m64 *) r = _m_psllqi (t1, 51);
+      break;
+    case 52:
+      *(__m64 *) r = _m_psllqi (t1, 52);
+      break;
+    case 53:
+      *(__m64 *) r = _m_psllqi (t1, 53);
+      break;
+    case 54:
+      *(__m64 *) r = _m_psllqi (t1, 54);
+      break;
+    case 55:
+      *(__m64 *) r = _m_psllqi (t1, 55);
+      break;
+    case 56:
+      *(__m64 *) r = _m_psllqi (t1, 56);
+      break;
+    case 57:
+      *(__m64 *) r = _m_psllqi (t1, 57);
+      break;
+    case 58:
+      *(__m64 *) r = _m_psllqi (t1, 58);
+      break;
+    case 59:
+      *(__m64 *) r = _m_psllqi (t1, 59);
+      break;
+    case 60:
+      *(__m64 *) r = _m_psllqi (t1, 60);
+      break;
+    case 61:
+      *(__m64 *) r = _m_psllqi (t1, 61);
+      break;
+    case 62:
+      *(__m64 *) r = _m_psllqi (t1, 62);
+      break;
+    case 63:
+      *(__m64 *) r = _m_psllqi (t1, 63);
+      break;
+    default:
+      *(__m64 *) r = _m_psllqi (t1, 64);
+      break;
+    }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *src, unsigned int imm,
+			unsigned long long *res)
+{
+  int i;
+  if (imm > 63)
+    res[0] = 0;
+  else
+    res[0] = src[0] << imm;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  unsigned int count;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      count = MMXops[i];
+      test_psllwi (&MMXops[i], count, &r);
+      compute_correct_result (&MMXops[i], count, &ck);
+      if (ck != r)
+	  fail++;
+      }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
new file mode 100644
index 00000000000..10d1b79bf26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psllw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned short *dst = (unsigned short *) dst_p;
+  unsigned int *src = (unsigned int *) src_p;
+  unsigned short *res = (unsigned short *) res_p;
+  int i;
+  if (src[1] || src[0] > 15)
+    for (i = 0; i < 4; i++)
+      res[i] = 0;
+  else
+    for (i = 0; i < 4; i++)
+      res[i] = dst[i] << src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psllw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
new file mode 100644
index 00000000000..373fa5c146b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
@@ -0,0 +1,104 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi  (long long *ll1, unsigned int imm, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  switch (imm)
+    {
+    case 0:
+      *(__m64 *) r = _m_psllwi (t1, 0);
+      break;
+    case 1:
+      *(__m64 *) r = _m_psllwi (t1, 1);
+      break;
+    case 2:
+      *(__m64 *) r = _m_psllwi (t1, 2);
+      break;
+    case 3:
+      *(__m64 *) r = _m_psllwi (t1, 3);
+      break;
+    case 4:
+      *(__m64 *) r = _m_psllwi (t1, 4);
+      break;
+    case 5:
+      *(__m64 *) r = _m_psllwi (t1, 5);
+      break;
+    case 6:
+      *(__m64 *) r = _m_psllwi (t1, 6);
+      break;
+    case 7:
+      *(__m64 *) r = _m_psllwi (t1, 7);
+      break;
+    case 8:
+      *(__m64 *) r = _m_psllwi (t1, 8);
+      break;
+    case 9:
+      *(__m64 *) r = _m_psllwi (t1, 9);
+      break;
+    case 10:
+      *(__m64 *) r = _m_psllwi (t1, 10);
+      break;
+    case 11:
+      *(__m64 *) r = _m_psllwi (t1, 11);
+      break;
+    case 12:
+      *(__m64 *) r = _m_psllwi (t1, 12);
+      break;
+    case 13:
+      *(__m64 *) r = _m_psllwi (t1, 13);
+      break;
+    case 14:
+      *(__m64 *) r = _m_psllwi (t1, 14);
+      break;
+    case 15:
+      *(__m64 *) r = _m_psllwi (t1, 15);
+      break;
+    default:
+      *(__m64 *) r = _m_psllwi (t1, 16);
+      break;
+    }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+			long long *res_p)
+{
+  unsigned short *src = (unsigned short *) src_p;
+  unsigned short *res = (unsigned short *) res_p;
+  int i;
+  if (imm > 15)
+    for (i = 0; i < 4; i++)
+      res[i] = 0;
+  else
+    for (i = 0; i < 4; i++)
+      res[i] = src[i] << imm;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  unsigned int count;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      count = MMXops[i];
+      test_psllwi (&MMXops[i], count, &r);
+      compute_correct_result (&MMXops[i], count, &ck);
+      if (ck != r)
+	  fail++;
+      }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
new file mode 100644
index 00000000000..a9d41c273cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrad  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psrad (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  int *dst = (int *) dst_p;
+  unsigned int *src = (unsigned int *) src_p;
+  int *res = (int *) res_p;
+  int i;
+  if (src[1] || src[0] > 31)
+    for (i = 0; i < 2; i++)
+      res[i] = dst[i] < 0 ? -1 : 0;
+  else
+    for (i = 0; i < 2; i++)
+      res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psrad (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
new file mode 100644
index 00000000000..8237250c48f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
@@ -0,0 +1,152 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psradi  (long long *ll1, unsigned int imm, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  switch (imm)
+    {
+    case 0:
+      *(__m64 *) r = _m_psradi (t1, 0);
+      break;
+    case 1:
+      *(__m64 *) r = _m_psradi (t1, 1);
+      break;
+    case 2:
+      *(__m64 *) r = _m_psradi (t1, 2);
+      break;
+    case 3:
+      *(__m64 *) r = _m_psradi (t1, 3);
+      break;
+    case 4:
+      *(__m64 *) r = _m_psradi (t1, 4);
+      break;
+    case 5:
+      *(__m64 *) r = _m_psradi (t1, 5);
+      break;
+    case 6:
+      *(__m64 *) r = _m_psradi (t1, 6);
+      break;
+    case 7:
+      *(__m64 *) r = _m_psradi (t1, 7);
+      break;
+    case 8:
+      *(__m64 *) r = _m_psradi (t1, 8);
+      break;
+    case 9:
+      *(__m64 *) r = _m_psradi (t1, 9);
+      break;
+    case 10:
+      *(__m64 *) r = _m_psradi (t1, 10);
+      break;
+    case 11:
+      *(__m64 *) r = _m_psradi (t1, 11);
+      break;
+    case 12:
+      *(__m64 *) r = _m_psradi (t1, 12);
+      break;
+    case 13:
+      *(__m64 *) r = _m_psradi (t1, 13);
+      break;
+    case 14:
+      *(__m64 *) r = _m_psradi (t1, 14);
+      break;
+    case 15:
+      *(__m64 *) r = _m_psradi (t1, 15);
+      break;
+    case 16:
+      *(__m64 *) r = _m_psradi (t1, 16);
+      break;
+    case 17:
+      *(__m64 *) r = _m_psradi (t1, 17);
+      break;
+    case 18:
+      *(__m64 *) r = _m_psradi (t1, 18);
+      break;
+    case 19:
+      *(__m64 *) r = _m_psradi (t1, 19);
+      break;
+    case 20:
+      *(__m64 *) r = _m_psradi (t1, 20);
+      break;
+    case 21:
+      *(__m64 *) r = _m_psradi (t1, 21);
+      break;
+    case 22:
+      *(__m64 *) r = _m_psradi (t1, 22);
+      break;
+    case 23:
+      *(__m64 *) r = _m_psradi (t1, 23);
+      break;
+    case 24:
+      *(__m64 *) r = _m_psradi (t1, 24);
+      break;
+    case 25:
+      *(__m64 *) r = _m_psradi (t1, 25);
+      break;
+    case 26:
+      *(__m64 *) r = _m_psradi (t1, 26);
+      break;
+    case 27:
+      *(__m64 *) r = _m_psradi (t1, 27);
+      break;
+    case 28:
+      *(__m64 *) r = _m_psradi (t1, 28);
+      break;
+    case 29:
+      *(__m64 *) r = _m_psradi (t1, 29);
+      break;
+    case 30:
+      *(__m64 *) r = _m_psradi (t1, 30);
+      break;
+    case 31:
+      *(__m64 *) r = _m_psradi (t1, 31);
+      break;
+    default:
+      *(__m64 *) r = _m_psradi (t1, 32);
+      break;
+    }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+			long long *res_p)
+{
+  int *src = (int *) src_p;
+  int *res = (int *) res_p;
+  int i;
+  if (imm > 31)
+    for (i = 0; i < 2; i++)
+      res[i] = src[i] < 0 ? -1 : 0;
+  else
+    for (i = 0; i < 2; i++)
+      res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  unsigned int count;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      count = MMXops[i];
+      test_psradi (&MMXops[i], count, &r);
+      compute_correct_result (&MMXops[i], count, &ck);
+      if (ck != r)
+	  fail++;
+      }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
new file mode 100644
index 00000000000..3fed516b811
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psraw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psraw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  unsigned int *src = (unsigned int *) src_p;
+  short *res = (short *) res_p;
+  int i;
+  if (src[1] || src[0] > 15)
+    for (i = 0; i < 4; i++)
+      res[i] = dst[i] < 0 ? -1 : 0;
+  else
+    for (i = 0; i < 4; i++)
+      res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psraw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
new file mode 100644
index 00000000000..1c8973db3db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
@@ -0,0 +1,104 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrawi  (long long *ll1, unsigned int imm, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  switch (imm)
+    {
+    case 0:
+      *(__m64 *) r = _m_psrawi (t1, 0);
+      break;
+    case 1:
+      *(__m64 *) r = _m_psrawi (t1, 1);
+      break;
+    case 2:
+      *(__m64 *) r = _m_psrawi (t1, 2);
+      break;
+    case 3:
+      *(__m64 *) r = _m_psrawi (t1, 3);
+      break;
+    case 4:
+      *(__m64 *) r = _m_psrawi (t1, 4);
+      break;
+    case 5:
+      *(__m64 *) r = _m_psrawi (t1, 5);
+      break;
+    case 6:
+      *(__m64 *) r = _m_psrawi (t1, 6);
+      break;
+    case 7:
+      *(__m64 *) r = _m_psrawi (t1, 7);
+      break;
+    case 8:
+      *(__m64 *) r = _m_psrawi (t1, 8);
+      break;
+    case 9:
+      *(__m64 *) r = _m_psrawi (t1, 9);
+      break;
+    case 10:
+      *(__m64 *) r = _m_psrawi (t1, 10);
+      break;
+    case 11:
+      *(__m64 *) r = _m_psrawi (t1, 11);
+      break;
+    case 12:
+      *(__m64 *) r = _m_psrawi (t1, 12);
+      break;
+    case 13:
+      *(__m64 *) r = _m_psrawi (t1, 13);
+      break;
+    case 14:
+      *(__m64 *) r = _m_psrawi (t1, 14);
+      break;
+    case 15:
+      *(__m64 *) r = _m_psrawi (t1, 15);
+      break;
+    default:
+      *(__m64 *) r = _m_psrawi (t1, 16);
+      break;
+    }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+			long long *res_p)
+{
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  int i;
+  if (imm > 15)
+    for (i = 0; i < 4; i++)
+      res[i] = src[i] < 0 ? -1 : 0;
+  else
+    for (i = 0; i < 4; i++)
+      res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  unsigned int count;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      count = MMXops[i];
+      test_psrawi (&MMXops[i], count, &r);
+      compute_correct_result (&MMXops[i], count, &ck);
+      if (ck != r)
+	  fail++;
+      }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
new file mode 100644
index 00000000000..b7c9565cb24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrld  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psrld (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  int *dst = (int *) dst_p;
+  unsigned int *src = (unsigned int *) src_p;
+  int *res = (int *) res_p;
+  int i;
+  if (src[1] || src[0] > 31)
+    for (i = 0; i < 2; i++)
+      res[i] = 0;
+  else
+    for (i = 0; i < 2; i++)
+      res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psrld (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
new file mode 100644
index 00000000000..6a150ee2eff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
@@ -0,0 +1,152 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrldi  (long long *ll1, unsigned int imm, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  switch (imm)
+    {
+    case 0:
+      *(__m64 *) r = _m_psrldi (t1, 0);
+      break;
+    case 1:
+      *(__m64 *) r = _m_psrldi (t1, 1);
+      break;
+    case 2:
+      *(__m64 *) r = _m_psrldi (t1, 2);
+      break;
+    case 3:
+      *(__m64 *) r = _m_psrldi (t1, 3);
+      break;
+    case 4:
+      *(__m64 *) r = _m_psrldi (t1, 4);
+      break;
+    case 5:
+      *(__m64 *) r = _m_psrldi (t1, 5);
+      break;
+    case 6:
+      *(__m64 *) r = _m_psrldi (t1, 6);
+      break;
+    case 7:
+      *(__m64 *) r = _m_psrldi (t1, 7);
+      break;
+    case 8:
+      *(__m64 *) r = _m_psrldi (t1, 8);
+      break;
+    case 9:
+      *(__m64 *) r = _m_psrldi (t1, 9);
+      break;
+    case 10:
+      *(__m64 *) r = _m_psrldi (t1, 10);
+      break;
+    case 11:
+      *(__m64 *) r = _m_psrldi (t1, 11);
+      break;
+    case 12:
+      *(__m64 *) r = _m_psrldi (t1, 12);
+      break;
+    case 13:
+      *(__m64 *) r = _m_psrldi (t1, 13);
+      break;
+    case 14:
+      *(__m64 *) r = _m_psrldi (t1, 14);
+      break;
+    case 15:
+      *(__m64 *) r = _m_psrldi (t1, 15);
+      break;
+    case 16:
+      *(__m64 *) r = _m_psrldi (t1, 16);
+      break;
+    case 17:
+      *(__m64 *) r = _m_psrldi (t1, 17);
+      break;
+    case 18:
+      *(__m64 *) r = _m_psrldi (t1, 18);
+      break;
+    case 19:
+      *(__m64 *) r = _m_psrldi (t1, 19);
+      break;
+    case 20:
+      *(__m64 *) r = _m_psrldi (t1, 20);
+      break;
+    case 21:
+      *(__m64 *) r = _m_psrldi (t1, 21);
+      break;
+    case 22:
+      *(__m64 *) r = _m_psrldi (t1, 22);
+      break;
+    case 23:
+      *(__m64 *) r = _m_psrldi (t1, 23);
+      break;
+    case 24:
+      *(__m64 *) r = _m_psrldi (t1, 24);
+      break;
+    case 25:
+      *(__m64 *) r = _m_psrldi (t1, 25);
+      break;
+    case 26:
+      *(__m64 *) r = _m_psrldi (t1, 26);
+      break;
+    case 27:
+      *(__m64 *) r = _m_psrldi (t1, 27);
+      break;
+    case 28:
+      *(__m64 *) r = _m_psrldi (t1, 28);
+      break;
+    case 29:
+      *(__m64 *) r = _m_psrldi (t1, 29);
+      break;
+    case 30:
+      *(__m64 *) r = _m_psrldi (t1, 30);
+      break;
+    case 31:
+      *(__m64 *) r = _m_psrldi (t1, 31);
+      break;
+    default:
+      *(__m64 *) r = _m_psrldi (t1, 32);
+      break;
+    }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+			long long *res_p)
+{
+  int *src = (int *) src_p;
+  int *res = (int *) res_p;
+  int i;
+  if (imm > 31)
+    for (i = 0; i < 2; i++)
+      res[i] = 0;
+  else
+    for (i = 0; i < 2; i++)
+      res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  unsigned int count;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      count = MMXops[i];
+      test_psrldi (&MMXops[i], count, &r);
+      compute_correct_result (&MMXops[i], count, &ck);
+      if (ck != r)
+	  fail++;
+      }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
new file mode 100644
index 00000000000..c9fa8b45671
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrlq  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psrlq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+			unsigned long long *src,
+			unsigned long long *res)
+{
+  if (src[0] > 63)
+    res[0] = 0;
+  else
+    res[0] = dst[0] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psrlq (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
new file mode 100644
index 00000000000..bdbecd6ab6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
@@ -0,0 +1,244 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi  (long long *ll1, unsigned int imm, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  switch (imm)
+    {
+    case 0:
+      *(__m64 *) r = _m_psrlqi (t1, 0);
+      break;
+    case 1:
+      *(__m64 *) r = _m_psrlqi (t1, 1);
+      break;
+    case 2:
+      *(__m64 *) r = _m_psrlqi (t1, 2);
+      break;
+    case 3:
+      *(__m64 *) r = _m_psrlqi (t1, 3);
+      break;
+    case 4:
+      *(__m64 *) r = _m_psrlqi (t1, 4);
+      break;
+    case 5:
+      *(__m64 *) r = _m_psrlqi (t1, 5);
+      break;
+    case 6:
+      *(__m64 *) r = _m_psrlqi (t1, 6);
+      break;
+    case 7:
+      *(__m64 *) r = _m_psrlqi (t1, 7);
+      break;
+    case 8:
+      *(__m64 *) r = _m_psrlqi (t1, 8);
+      break;
+    case 9:
+      *(__m64 *) r = _m_psrlqi (t1, 9);
+      break;
+    case 10:
+      *(__m64 *) r = _m_psrlqi (t1, 10);
+      break;
+    case 11:
+      *(__m64 *) r = _m_psrlqi (t1, 11);
+      break;
+    case 12:
+      *(__m64 *) r = _m_psrlqi (t1, 12);
+      break;
+    case 13:
+      *(__m64 *) r = _m_psrlqi (t1, 13);
+      break;
+    case 14:
+      *(__m64 *) r = _m_psrlqi (t1, 14);
+      break;
+    case 15:
+      *(__m64 *) r = _m_psrlqi (t1, 15);
+      break;
+    case 16:
+      *(__m64 *) r = _m_psrlqi (t1, 16);
+      break;
+    case 17:
+      *(__m64 *) r = _m_psrlqi (t1, 17);
+      break;
+    case 18:
+      *(__m64 *) r = _m_psrlqi (t1, 18);
+      break;
+    case 19:
+      *(__m64 *) r = _m_psrlqi (t1, 19);
+      break;
+    case 20:
+      *(__m64 *) r = _m_psrlqi (t1, 20);
+      break;
+    case 21:
+      *(__m64 *) r = _m_psrlqi (t1, 21);
+      break;
+    case 22:
+      *(__m64 *) r = _m_psrlqi (t1, 22);
+      break;
+    case 23:
+      *(__m64 *) r = _m_psrlqi (t1, 23);
+      break;
+    case 24:
+      *(__m64 *) r = _m_psrlqi (t1, 24);
+      break;
+    case 25:
+      *(__m64 *) r = _m_psrlqi (t1, 25);
+      break;
+    case 26:
+      *(__m64 *) r = _m_psrlqi (t1, 26);
+      break;
+    case 27:
+      *(__m64 *) r = _m_psrlqi (t1, 27);
+      break;
+    case 28:
+      *(__m64 *) r = _m_psrlqi (t1, 28);
+      break;
+    case 29:
+      *(__m64 *) r = _m_psrlqi (t1, 29);
+      break;
+    case 30:
+      *(__m64 *) r = _m_psrlqi (t1, 30);
+      break;
+    case 31:
+      *(__m64 *) r = _m_psrlqi (t1, 31);
+      break;
+    case 32:
+      *(__m64 *) r = _m_psrlqi (t1, 32);
+      break;
+    case 33:
+      *(__m64 *) r = _m_psrlqi (t1, 33);
+      break;
+    case 34:
+      *(__m64 *) r = _m_psrlqi (t1, 34);
+      break;
+    case 35:
+      *(__m64 *) r = _m_psrlqi (t1, 35);
+      break;
+    case 36:
+      *(__m64 *) r = _m_psrlqi (t1, 36);
+      break;
+    case 37:
+      *(__m64 *) r = _m_psrlqi (t1, 37);
+      break;
+    case 38:
+      *(__m64 *) r = _m_psrlqi (t1, 38);
+      break;
+    case 39:
+      *(__m64 *) r = _m_psrlqi (t1, 39);
+      break;
+    case 40:
+      *(__m64 *) r = _m_psrlqi (t1, 40);
+      break;
+    case 41:
+      *(__m64 *) r = _m_psrlqi (t1, 41);
+      break;
+    case 42:
+      *(__m64 *) r = _m_psrlqi (t1, 42);
+      break;
+    case 43:
+      *(__m64 *) r = _m_psrlqi (t1, 43);
+      break;
+    case 44:
+      *(__m64 *) r = _m_psrlqi (t1, 44);
+      break;
+    case 45:
+      *(__m64 *) r = _m_psrlqi (t1, 45);
+      break;
+    case 46:
+      *(__m64 *) r = _m_psrlqi (t1, 46);
+      break;
+    case 47:
+      *(__m64 *) r = _m_psrlqi (t1, 47);
+      break;
+    case 48:
+      *(__m64 *) r = _m_psrlqi (t1, 48);
+      break;
+    case 49:
+      *(__m64 *) r = _m_psrlqi (t1, 49);
+      break;
+    case 50:
+      *(__m64 *) r = _m_psrlqi (t1, 50);
+      break;
+    case 51:
+      *(__m64 *) r = _m_psrlqi (t1, 51);
+      break;
+    case 52:
+      *(__m64 *) r = _m_psrlqi (t1, 52);
+      break;
+    case 53:
+      *(__m64 *) r = _m_psrlqi (t1, 53);
+      break;
+    case 54:
+      *(__m64 *) r = _m_psrlqi (t1, 54);
+      break;
+    case 55:
+      *(__m64 *) r = _m_psrlqi (t1, 55);
+      break;
+    case 56:
+      *(__m64 *) r = _m_psrlqi (t1, 56);
+      break;
+    case 57:
+      *(__m64 *) r = _m_psrlqi (t1, 57);
+      break;
+    case 58:
+      *(__m64 *) r = _m_psrlqi (t1, 58);
+      break;
+    case 59:
+      *(__m64 *) r = _m_psrlqi (t1, 59);
+      break;
+    case 60:
+      *(__m64 *) r = _m_psrlqi (t1, 60);
+      break;
+    case 61:
+      *(__m64 *) r = _m_psrlqi (t1, 61);
+      break;
+    case 62:
+      *(__m64 *) r = _m_psrlqi (t1, 62);
+      break;
+    case 63:
+      *(__m64 *) r = _m_psrlqi (t1, 63);
+      break;
+    default:
+      *(__m64 *) r = _m_psrlqi (t1, 64);
+      break;
+    }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *src, unsigned int imm,
+			unsigned long long *res)
+{
+  int i;
+  if (imm > 63)
+    res[0] = 0;
+  else
+    res[0] = src[0] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  unsigned int count;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      count = MMXops[i];
+      test_psllwi (&MMXops[i], count, &r);
+      compute_correct_result (&MMXops[i], count, &ck);
+      if (ck != r)
+	  fail++;
+      }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
new file mode 100644
index 00000000000..6382448b1a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrlw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psrlw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  unsigned int *src = (unsigned int *) src_p;
+  short *res = (short *) res_p;
+  int i;
+  if (src[1] || src[0] > 15)
+    for (i = 0; i < 4; i++)
+      res[i] = 0;
+  else
+    for (i = 0; i < 4; i++)
+      res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psrlw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
new file mode 100644
index 00000000000..98c6df35e5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
@@ -0,0 +1,104 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrlwi  (long long *ll1, unsigned int imm, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  switch (imm)
+    {
+    case 0:
+      *(__m64 *) r = _m_psrlwi (t1, 0);
+      break;
+    case 1:
+      *(__m64 *) r = _m_psrlwi (t1, 1);
+      break;
+    case 2:
+      *(__m64 *) r = _m_psrlwi (t1, 2);
+      break;
+    case 3:
+      *(__m64 *) r = _m_psrlwi (t1, 3);
+      break;
+    case 4:
+      *(__m64 *) r = _m_psrlwi (t1, 4);
+      break;
+    case 5:
+      *(__m64 *) r = _m_psrlwi (t1, 5);
+      break;
+    case 6:
+      *(__m64 *) r = _m_psrlwi (t1, 6);
+      break;
+    case 7:
+      *(__m64 *) r = _m_psrlwi (t1, 7);
+      break;
+    case 8:
+      *(__m64 *) r = _m_psrlwi (t1, 8);
+      break;
+    case 9:
+      *(__m64 *) r = _m_psrlwi (t1, 9);
+      break;
+    case 10:
+      *(__m64 *) r = _m_psrlwi (t1, 10);
+      break;
+    case 11:
+      *(__m64 *) r = _m_psrlwi (t1, 11);
+      break;
+    case 12:
+      *(__m64 *) r = _m_psrlwi (t1, 12);
+      break;
+    case 13:
+      *(__m64 *) r = _m_psrlwi (t1, 13);
+      break;
+    case 14:
+      *(__m64 *) r = _m_psrlwi (t1, 14);
+      break;
+    case 15:
+      *(__m64 *) r = _m_psrlwi (t1, 15);
+      break;
+    default:
+      *(__m64 *) r = _m_psrlwi (t1, 16);
+      break;
+    }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+			long long *res_p)
+{
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  int i;
+  if (imm > 15)
+    for (i = 0; i < 4; i++)
+      res[i] = 0;
+  else
+    for (i = 0; i < 4; i++)
+      res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  unsigned int count;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i++)
+    {
+      count = MMXops[i];
+      test_psrlwi (&MMXops[i], count, &r);
+      compute_correct_result (&MMXops[i], count, &ck);
+      if (ck != r)
+	  fail++;
+      }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
new file mode 100644
index 00000000000..b3637353879
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubb  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psubb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  char *dst = (char *) dst_p;
+  char *src = (char *) src_p;
+  char *res = (char *) res_p;
+  int i;
+  for (i = 0; i < 8; i++)
+    res[i] = dst[i] - src[i];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psubb (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
new file mode 100644
index 00000000000..b091d7f590f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubd  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psubd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  int *dst = (int *) dst_p;
+  int *src = (int *) src_p;
+  int *res = (int *) res_p;
+  int i;
+  for (i = 0; i < 2; i++)
+    res[i] = dst[i] - src[i];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psubd (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
new file mode 100644
index 00000000000..767bf8ea303
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
@@ -0,0 +1,42 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubq  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _mm_sub_si64 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  res_p[0] = dst_p[0] - src_p[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psubq (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
new file mode 100644
index 00000000000..29a5f708e12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubusb  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psubusb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned char *dst = (unsigned char *) dst_p;
+  unsigned char *src = (unsigned char *) src_p;
+  unsigned char *res = (unsigned char *) res_p;
+  int i;
+  for (i = 0; i < 8; i++)
+    res[i] = saturate_ub (dst[i] - src[i]);
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psubusb (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
new file mode 100644
index 00000000000..279051f7303
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubusw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psubusw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  unsigned short *dst = (unsigned short *) dst_p;
+  unsigned short *src = (unsigned short *) src_p;
+  unsigned short *res = (unsigned short *) res_p;
+  int i;
+  for (i = 0; i < 4; i++)
+    res[i] = saturate_uw (dst[i] - src[i]);
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psubusw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
new file mode 100644
index 00000000000..dde5fce50ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_psubw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  int i;
+  for (i = 0; i < 4; i++)
+    res[i] = dst[i] - src[i];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_psubw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
new file mode 100644
index 00000000000..5059d74d6c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
@@ -0,0 +1,52 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckhbw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_punpckhbw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  char *dst = (char *) dst_p;
+  char *src = (char *) src_p;
+  char *res = (char *) res_p;
+  res[0] = dst[4];
+  res[1] = src[4];
+  res[2] = dst[5];
+  res[3] = src[5];
+  res[4] = dst[6];
+  res[5] = src[6];
+  res[6] = dst[7];
+  res[7] = src[7];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_punpckhbw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
new file mode 100644
index 00000000000..9c4690dee0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckhdq  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_punpckhdq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  int *dst = (int *) dst_p;
+  int *src = (int *) src_p;
+  int *res = (int *) res_p;
+  res[0] = dst[1];
+  res[1] = src[1];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_punpckhdq (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
new file mode 100644
index 00000000000..7525a2bba63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckhwd  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_punpckhwd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  res[0] = dst[2];
+  res[1] = src[2];
+  res[2] = dst[3];
+  res[3] = src[3];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_punpckhwd (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
new file mode 100644
index 00000000000..14bdc433ed3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
@@ -0,0 +1,52 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpcklbw  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_punpcklbw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  char *dst = (char *) dst_p;
+  char *src = (char *) src_p;
+  char *res = (char *) res_p;
+  res[0] = dst[0];
+  res[1] = src[0];
+  res[2] = dst[1];
+  res[3] = src[1];
+  res[4] = dst[2];
+  res[5] = src[2];
+  res[6] = dst[3];
+  res[7] = src[3];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_punpcklbw (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
new file mode 100644
index 00000000000..1d8a932ba7c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckldq  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_punpckldq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  int *dst = (int *) dst_p;
+  int *src = (int *) src_p;
+  int *res = (int *) res_p;
+  res[0] = dst[0];
+  res[1] = src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_punpckldq (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
new file mode 100644
index 00000000000..6b2a9d56a89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpcklwd  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_punpcklwd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+			long long *res_p)
+{
+  short *dst = (short *) dst_p;
+  short *src = (short *) src_p;
+  short *res = (short *) res_p;
+  res[0] = dst[0];
+  res[1] = src[0];
+  res[2] = dst[1];
+  res[3] = src[1];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_punpcklwd (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c
new file mode 100644
index 00000000000..7858c2f6856
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c
@@ -0,0 +1,43 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pxor  (long long *ll1, long long *ll2, long long *r)
+{
+  __m64 t1 = *(__m64 *) ll1;
+  __m64 t2 = *(__m64 *) ll2;
+  *(__m64 *) r = _m_pxor (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+			unsigned long long *src,
+			unsigned long long *res)
+{
+  res[0] = dst[0] ^ src[0];
+}
+
+static void
+sse2_test (void)
+{
+  int i;
+  long long r, ck;
+  int fail = 0;
+
+  /* Run the MMX tests */
+  for (i = 0; i < MMX_num_ops; i += 2)
+    {
+      test_pxor (&MMXops[i], &MMXops[i + 1], &r);
+      compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+      if (ck != r)
+	fail++;
+    }
+
+  if (fail != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx.c b/gcc/testsuite/gcc.target/i386/sse2-mmx.c
index fb226a8e8f3..338cb9da289 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-mmx.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx.c
@@ -4,7 +4,6 @@
 
 #include "sse2-check.h"
 
-#include <mmintrin.h>
 
 #define N 4
 
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 20/43] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (24 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 05/43] i386: Emulate MMX mulv4hi3 with SSE H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 38/43] i386: Add tests for MMX intrinsic emulations " H.J. Lu
                   ` (17 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX mmx_umulv4hi3_highpart with SSE.  Only SSE register source
operand is allowed.

	PR target/89021
	* config/i386/mmx.md (*mmx_umulv4hi3_highpart): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 7759e3e1082..c758aac72e5 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -821,20 +821,24 @@
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_umulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
 	(truncate:V4HI
 	  (lshiftrt:V4SI
 	    (mult:V4SI
 	      (zero_extend:V4SI
-		(match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+		(match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy"))
 	      (zero_extend:V4SI
-		(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))
 	  (const_int 16))))]
   "(TARGET_SSE || TARGET_3DNOW_A)
    && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmulhuw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "@
+   pmulhuw\t{%2, %0|%0, %2}
+   pmulhuw\t{%2, %0|%0, %2}
+   vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pmaddwd"
   [(set (match_operand:V2SI 0 "register_operand")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 22/43] i386: Emulate MMX mmx_uavgv8qi3 with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (20 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 26/43] i386: Emulate MMX umulv1siv1di3 with SSE2 H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 30/43] i386: Emulate MMX ssse3_pmulhrswv4hi3 " H.J. Lu
                   ` (21 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX mmx_uavgv8qi3 with SSE.  Only SSE register source operand is
allowed.

	PR target/89021
	* config/i386/mmx.md (*mmx_uavgv8qi3): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c758aac72e5..17776c66d90 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1679,15 +1679,15 @@
   "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
 
 (define_insn "*mmx_uavgv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
 	(truncate:V8QI
 	  (lshiftrt:V8HI
 	    (plus:V8HI
 	      (plus:V8HI
 		(zero_extend:V8HI
-		  (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
+		  (match_operand:V8QI 1 "nonimmediate_operand" "%0,0,Yy"))
 		(zero_extend:V8HI
-		  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
+		  (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy")))
 	      (const_vector:V8HI [(const_int 1) (const_int 1)
 				  (const_int 1) (const_int 1)
 				  (const_int 1) (const_int 1)
@@ -1698,19 +1698,22 @@
 {
   /* These two instructions have the same operation, but their encoding
      is different.  Prefer the one that is de facto standard.  */
-  if (TARGET_SSE || TARGET_3DNOW_A)
+  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+    return "vpavgb\t{%2, %1, %0|%0, %1, %2}";
+  else if (TARGET_SSE || TARGET_3DNOW_A)
     return "pavgb\t{%2, %0|%0, %2}";
   else
     return "pavgusb\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "mmxshft")
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
    (set (attr "prefix_extra")
      (if_then_else
        (not (ior (match_test "TARGET_SSE")
 		 (match_test "TARGET_3DNOW_A")))
        (const_string "1")
        (const_string "*")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_uavgv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 29/43] i386: Emulate MMX ssse3_pmaddubsw with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (14 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 23/43] i386: Emulate MMX mmx_uavgv4hi3 " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 13:25 ` [PATCH 24/43] i386: Emulate MMX mmx_psadbw " H.J. Lu
                   ` (27 subsequent siblings)
  43 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX ssse3_pmaddubsw with SSE.  Only SSE register source operand
is allowed.

	PR target/89021
	* config/i386/sse.md (ssse3_pmaddubsw): Add SSE emulation.
---
 gcc/config/i386/sse.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 029f33a7000..b0ded2008f1 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15666,17 +15666,17 @@
    (set_attr "mode" "TI")])
 
 (define_insn "ssse3_pmaddubsw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
 	(ss_plus:V4HI
 	  (mult:V4HI
 	    (zero_extend:V4HI
 	      (vec_select:V4QI
-		(match_operand:V8QI 1 "register_operand" "0")
+		(match_operand:V8QI 1 "register_operand" "0,0,Yy")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)])))
 	    (sign_extend:V4HI
 	      (vec_select:V4QI
-		(match_operand:V8QI 2 "nonimmediate_operand" "ym")
+		(match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)]))))
 	  (mult:V4HI
@@ -15689,12 +15689,16 @@
 		(parallel [(const_int 1) (const_int 3)
 			   (const_int 5) (const_int 7)]))))))]
   "TARGET_SSSE3"
-  "pmaddubsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "@
+   pmaddubsw\t{%2, %0|%0, %2}
+   pmaddubsw\t{%2, %0|%0, %2}
+   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
    (set_attr "atom_unit" "simul")
    (set_attr "prefix_extra" "1")
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_mode_iterator PMULHRSW
   [V4HI V8HI (V16HI "TARGET_AVX2")])
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (29 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 32/43] i386: Emulate MMX ssse3_psign<mode>3 " H.J. Lu
@ 2019-02-09 13:25 ` H.J. Lu
  2019-02-09 14:09   ` Uros Bizjak
  2019-02-09 13:25 ` [PATCH 31/43] i386: Emulate MMX pshufb with SSE version H.J. Lu
                   ` (12 subsequent siblings)
  43 siblings, 1 reply; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 13:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support 64-bit vectors.

	PR target/89021
	* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
	* config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
	modes for TARGET_MMX_WITH_SSE.
	(SSE_REG_MODE_P): Likewise.
---
 gcc/config/i386/i386.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5..c1df3ec3326 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -585,6 +585,11 @@ extern unsigned char ix86_arch_features[X86_ARCH_LAST];
 
 #define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)
 
+/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
+   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
+#define TARGET_MMX_WITH_SSE \
+  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
+
 extern unsigned char x86_prefetch_sse;
 #define TARGET_PREFETCH_SSE	x86_prefetch_sse
 
@@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\
    || (MODE) == TFmode || (MODE) == V1TImode)
 
+/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
+   want to include 8-byte vector modes, like V2SFmode, but not DImode
+   nor SImode.  */
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
-   || (MODE) == V2DImode || (MODE) == DFmode)
+   || (MODE) == V2DImode || (MODE) == DFmode				\
+   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode	\
+			       || (MODE) == V4HImode			\
+			       || (MODE) == V2SImode			\
+			       || (MODE) == V2SFmode)))
 
 #define VALID_SSE_REG_MODE(MODE)					\
   ((MODE) == V1TImode || (MODE) == TImode				\
@@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode	\
    || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode	\
    || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode	\
-   || (MODE) == V16SFmode)
+   || (MODE) == V16SFmode						\
+   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode	\
+			       || (MODE) == V4HImode			\
+			       || (MODE) == V2SImode			\
+			       || (MODE) == V2SFmode)))
 
 #define X87_FLOAT_MODE_P(MODE)	\
   (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 00/43] V2: Emulate MMX intrinsics with SSE
  2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
                   ` (42 preceding siblings ...)
  2019-02-09 13:25 ` [PATCH 19/43] i386: Emulate MMX mmx_pmovmskb " H.J. Lu
@ 2019-02-09 13:53 ` Uros Bizjak
  43 siblings, 0 replies; 60+ messages in thread
From: Uros Bizjak @ 2019-02-09 13:53 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> On x86-64, since __m64 is returned and passed in XMM registers, we can
> emulate MMX intrinsics with SSE instructions. To support it, we added
>
>  #define TARGET_MMX_WITH_SSE \
>   (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
>
> SSE emulation is disabled for 3DNOW since 3DNOW patterns haven't been
> updated with SSE emulation.
>
> ;; Define instruction set of MMX instructions
> (define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" (const_string
> "base"))
>
>          (eq_attr "mmx_isa" "native")
>            (symbol_ref "!TARGET_MMX_WITH_SSE")
>          (eq_attr "mmx_isa" "x64")
>            (symbol_ref "TARGET_MMX_WITH_SSE")
>          (eq_attr "mmx_isa" "x64_avx")
>            (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
>          (eq_attr "mmx_isa" "x64_noavx")
>            (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
>
> (define_register_constraint "Yx" "TARGET_MMX_WITH_SSE ? SSE_REGS : NO_REGS"
>  "@internal Any SSE register if MMX is disabled in 64-bit mode.")
>
> (define_register_constraint "Yy"
>  "TARGET_MMX_WITH_SSE ? (TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ?
> SSE_REGS : NO_REGS) : NO_REGS"
>  "@internal Any EVEX encodable SSE register for AVX512VL target, otherwise
> any SSE register if MMX is disabled in 64-bit mode.")

I don't think we need the above two new constraints; with mmx_isa
attribute, we can simply use x and Yv constraints. We need to use
mmx_isa to correctly switch between register sets, so we are sure that
TARGET_MMX_WITH_SSE is true when mmx_isa is !native.

Uros.

> We added SSE emulation to MMX patterns and disabled MMX alternatives with
> TARGET_MMX_WITH_SSE.
>
> Most of MMX instructions have equivalent SSE versions and results of some
> SSE versions need to be reshuffled to the right order for MMX.  Thee are
> couple tricky cases:
>
> 1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
> maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
> mask operand and handle unmapped bits 64:127 at memory address by
> adjusting source and mask operands together with memory address.
>
> 2. MMX movntq is emulated with SSE2 DImode movnti, which is available
> in 64-bit mode.
>
> 3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
> SSE emulation must clear the bit 4 in the shuffle control mask.
>
> 4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve
> the upper 64 bits of destination XMM register.
>
> Tests are also added to check each SSE emulation of MMX intrinsics.
>
> With SSE emulation in 64-bit mode, 8-byte vectorizer is enabled with SSE2.
>
> There are no regressions on i686 and x86-64.  For x86-64, GCC is also
> tested with
>
> --with-arch=native --with-cpu=native
>
> on AVX2 and AVX512F machines.
>
> H.J. Lu (43):
>   i386: Allow 64-bit vector modes in SSE registers
>   i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
>   i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
>   i386: Emulate MMX plusminus/sat_plusminus with SSE
>   i386: Emulate MMX mulv4hi3 with SSE
>   i386: Emulate MMX smulv4hi3_highpart with SSE
>   i386: Emulate MMX mmx_pmaddwd with SSE
>   i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 with SSE
>   i386: Emulate MMX <any_logic><mode>3 with SSE
>   i386: Emulate MMX mmx_andnot<mode>3 with SSE
>   i386: Emulate MMX mmx_eq/mmx_gt<mode>3 with SSE
>   i386: Emulate MMX vec_dupv2si with SSE
>   i386: Emulate MMX pshufw with SSE
>   i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
>   i386: Emulate MMX sse_cvtpi2ps with SSE
>   i386: Emulate MMX mmx_pextrw with SSE
>   i386: Emulate MMX mmx_pinsrw with SSE
>   i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
>   i386: Emulate MMX mmx_pmovmskb with SSE
>   i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
>   i386: Emulate MMX maskmovq with SSE2 maskmovdqu
>   i386: Emulate MMX mmx_uavgv8qi3 with SSE
>   i386: Emulate MMX mmx_uavgv4hi3 with SSE
>   i386: Emulate MMX mmx_psadbw with SSE
>   i386: Emulate MMX movntq with SSE2 movntidi
>   i386: Emulate MMX umulv1siv1di3 with SSE2
>   i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 with SSE
>   i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE
>   i386: Emulate MMX ssse3_pmaddubsw with SSE
>   i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
>   i386: Emulate MMX pshufb with SSE version
>   i386: Emulate MMX ssse3_psign<mode>3 with SSE
>   i386: Emulate MMX ssse3_palignrdi with SSE
>   i386: Emulate MMX abs<mode>2 with SSE
>   i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
>   i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
>   i386: Allow MMX intrinsic emulation with SSE
>   i386: Add tests for MMX intrinsic emulations with SSE
>   i386: Also enable SSSE3 __m64 tests in 64-bit mode
>   i386: Enable 8-byte vectorizer for TARGET_MMX_WITH_SSE
>   i386: Implement V2SF add/sub/mul with SEE
>   i386: Implement V2SF <-> V2SI conversions with SEE
>   i386: Implement V2SF comparisons with SSE
>
>  gcc/config/i386/constraints.md                |  10 +
>  gcc/config/i386/i386-builtin.def              | 126 +--
>  gcc/config/i386/i386-protos.h                 |   4 +
>  gcc/config/i386/i386.c                        | 186 +++-
>  gcc/config/i386/i386.h                        |  20 +-
>  gcc/config/i386/i386.md                       |  15 +-
>  gcc/config/i386/mmintrin.h                    |  10 +-
>  gcc/config/i386/mmx.md                        | 909 +++++++++++++-----
>  gcc/config/i386/sse.md                        | 440 +++++++--
>  gcc/config/i386/xmmintrin.h                   |  61 ++
>  gcc/testsuite/gcc.dg/tree-ssa/pr84512.c       |   2 +-
>  gcc/testsuite/gcc.target/i386/mmx-vals.h      |  77 ++
>  gcc/testsuite/gcc.target/i386/pr82483-1.c     |   2 +-
>  gcc/testsuite/gcc.target/i386/pr82483-2.c     |   2 +-
>  gcc/testsuite/gcc.target/i386/pr89028-1.c     |  10 +
>  gcc/testsuite/gcc.target/i386/pr89028-10.c    |  39 +
>  gcc/testsuite/gcc.target/i386/pr89028-11.c    |  39 +
>  gcc/testsuite/gcc.target/i386/pr89028-12.c    |  39 +
>  gcc/testsuite/gcc.target/i386/pr89028-13.c    |  39 +
>  gcc/testsuite/gcc.target/i386/pr89028-2.c     |  11 +
>  gcc/testsuite/gcc.target/i386/pr89028-3.c     |  14 +
>  gcc/testsuite/gcc.target/i386/pr89028-4.c     |  14 +
>  gcc/testsuite/gcc.target/i386/pr89028-5.c     |  11 +
>  gcc/testsuite/gcc.target/i386/pr89028-6.c     |  14 +
>  gcc/testsuite/gcc.target/i386/pr89028-7.c     |  14 +
>  gcc/testsuite/gcc.target/i386/pr89028-8.c     |  12 +
>  gcc/testsuite/gcc.target/i386/pr89028-9.c     |  12 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-10.c   |  42 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-11.c   |  39 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-12.c   |  41 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-13.c   |  40 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-14.c   |  30 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-15.c   |  35 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-16.c   |  39 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-17.c   |  50 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-18.c   |  13 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-19.c   |  11 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-2.c    |  12 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-20.c   |  11 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-21.c   |  13 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-3.c    |  12 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-4.c    |   4 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-5.c    |  12 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-6.c    |  12 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-7.c    |  12 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-8.c    |   4 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-9.c    |  79 ++
>  .../gcc.target/i386/sse2-mmx-cvtpi2ps.c       |  42 +
>  .../gcc.target/i386/sse2-mmx-cvtps2pi.c       |  35 +
>  .../gcc.target/i386/sse2-mmx-cvttps2pi.c      |  35 +
>  .../gcc.target/i386/sse2-mmx-maskmovq.c       |  98 ++
>  .../gcc.target/i386/sse2-mmx-packssdw.c       |  51 +
>  .../gcc.target/i386/sse2-mmx-packsswb.c       |  51 +
>  .../gcc.target/i386/sse2-mmx-packuswb.c       |  51 +
>  .../gcc.target/i386/sse2-mmx-paddb.c          |  47 +
>  .../gcc.target/i386/sse2-mmx-paddd.c          |  47 +
>  .../gcc.target/i386/sse2-mmx-paddq.c          |  42 +
>  .../gcc.target/i386/sse2-mmx-paddsb.c         |  47 +
>  .../gcc.target/i386/sse2-mmx-paddsw.c         |  47 +
>  .../gcc.target/i386/sse2-mmx-paddusb.c        |  47 +
>  .../gcc.target/i386/sse2-mmx-paddusw.c        |  47 +
>  .../gcc.target/i386/sse2-mmx-paddw.c          |  47 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c |  43 +
>  .../gcc.target/i386/sse2-mmx-pandn.c          |  43 +
>  .../gcc.target/i386/sse2-mmx-pavgb.c          |  51 +
>  .../gcc.target/i386/sse2-mmx-pavgw.c          |  51 +
>  .../gcc.target/i386/sse2-mmx-pcmpeqb.c        |  47 +
>  .../gcc.target/i386/sse2-mmx-pcmpeqd.c        |  47 +
>  .../gcc.target/i386/sse2-mmx-pcmpeqw.c        |  47 +
>  .../gcc.target/i386/sse2-mmx-pcmpgtb.c        |  47 +
>  .../gcc.target/i386/sse2-mmx-pcmpgtd.c        |  47 +
>  .../gcc.target/i386/sse2-mmx-pcmpgtw.c        |  47 +
>  .../gcc.target/i386/sse2-mmx-pextrw.c         |  58 ++
>  .../gcc.target/i386/sse2-mmx-pinsrw.c         |  60 ++
>  .../gcc.target/i386/sse2-mmx-pmaddwd.c        |  46 +
>  .../gcc.target/i386/sse2-mmx-pmaxsw.c         |  47 +
>  .../gcc.target/i386/sse2-mmx-pmaxub.c         |  47 +
>  .../gcc.target/i386/sse2-mmx-pminsw.c         |  47 +
>  .../gcc.target/i386/sse2-mmx-pminub.c         |  47 +
>  .../gcc.target/i386/sse2-mmx-pmovmskb.c       |  45 +
>  .../gcc.target/i386/sse2-mmx-pmulhuw.c        |  50 +
>  .../gcc.target/i386/sse2-mmx-pmulhw.c         |  52 +
>  .../gcc.target/i386/sse2-mmx-pmullw.c         |  51 +
>  .../gcc.target/i386/sse2-mmx-pmuludq.c        |  46 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-por.c  |  43 +
>  .../gcc.target/i386/sse2-mmx-psadbw.c         |  57 ++
>  .../gcc.target/i386/sse2-mmx-pshufw.c         | 247 +++++
>  .../gcc.target/i386/sse2-mmx-pslld.c          |  51 +
>  .../gcc.target/i386/sse2-mmx-pslldi.c         | 152 +++
>  .../gcc.target/i386/sse2-mmx-psllq.c          |  46 +
>  .../gcc.target/i386/sse2-mmx-psllqi.c         | 244 +++++
>  .../gcc.target/i386/sse2-mmx-psllw.c          |  51 +
>  .../gcc.target/i386/sse2-mmx-psllwi.c         | 104 ++
>  .../gcc.target/i386/sse2-mmx-psrad.c          |  51 +
>  .../gcc.target/i386/sse2-mmx-psradi.c         | 152 +++
>  .../gcc.target/i386/sse2-mmx-psraw.c          |  51 +
>  .../gcc.target/i386/sse2-mmx-psrawi.c         | 104 ++
>  .../gcc.target/i386/sse2-mmx-psrld.c          |  51 +
>  .../gcc.target/i386/sse2-mmx-psrldi.c         | 152 +++
>  .../gcc.target/i386/sse2-mmx-psrlq.c          |  46 +
>  .../gcc.target/i386/sse2-mmx-psrlqi.c         | 244 +++++
>  .../gcc.target/i386/sse2-mmx-psrlw.c          |  51 +
>  .../gcc.target/i386/sse2-mmx-psrlwi.c         | 104 ++
>  .../gcc.target/i386/sse2-mmx-psubb.c          |  47 +
>  .../gcc.target/i386/sse2-mmx-psubd.c          |  47 +
>  .../gcc.target/i386/sse2-mmx-psubq.c          |  42 +
>  .../gcc.target/i386/sse2-mmx-psubusb.c        |  47 +
>  .../gcc.target/i386/sse2-mmx-psubusw.c        |  47 +
>  .../gcc.target/i386/sse2-mmx-psubw.c          |  47 +
>  .../gcc.target/i386/sse2-mmx-punpckhbw.c      |  52 +
>  .../gcc.target/i386/sse2-mmx-punpckhdq.c      |  46 +
>  .../gcc.target/i386/sse2-mmx-punpckhwd.c      |  48 +
>  .../gcc.target/i386/sse2-mmx-punpcklbw.c      |  52 +
>  .../gcc.target/i386/sse2-mmx-punpckldq.c      |  46 +
>  .../gcc.target/i386/sse2-mmx-punpcklwd.c      |  48 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c |  43 +
>  gcc/testsuite/gcc.target/i386/sse2-mmx.c      |   1 -
>  gcc/testsuite/gcc.target/i386/ssse3-pabsb.c   |   4 +-
>  gcc/testsuite/gcc.target/i386/ssse3-pabsd.c   |   4 +-
>  gcc/testsuite/gcc.target/i386/ssse3-pabsw.c   |   4 +-
>  gcc/testsuite/gcc.target/i386/ssse3-palignr.c |   6 +-
>  gcc/testsuite/gcc.target/i386/ssse3-phaddd.c  |   4 +-
>  gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c |   4 +-
>  gcc/testsuite/gcc.target/i386/ssse3-phaddw.c  |   4 +-
>  gcc/testsuite/gcc.target/i386/ssse3-phsubd.c  |   4 +-
>  gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c |   4 +-
>  gcc/testsuite/gcc.target/i386/ssse3-phsubw.c  |   4 +-
>  .../gcc.target/i386/ssse3-pmaddubsw.c         |   4 +-
>  .../gcc.target/i386/ssse3-pmulhrsw.c          |   4 +-
>  gcc/testsuite/gcc.target/i386/ssse3-pshufb.c  |   6 +-
>  gcc/testsuite/gcc.target/i386/ssse3-psignb.c  |   4 +-
>  gcc/testsuite/gcc.target/i386/ssse3-psignd.c  |   4 +-
>  gcc/testsuite/gcc.target/i386/ssse3-psignw.c  |   4 +-
>  133 files changed, 6675 insertions(+), 450 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/mmx-vals.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-10.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-11.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-12.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-13.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-7.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-8.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-9.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-18.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c
>
> --
> 2.20.1
>
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers
  2019-02-09 13:25 ` [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers H.J. Lu
@ 2019-02-09 14:09   ` Uros Bizjak
  2019-02-09 14:32     ` H.J. Lu
  0 siblings, 1 reply; 60+ messages in thread
From: Uros Bizjak @ 2019-02-09 14:09 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> In 64-bit mode, SSE2 can be used to emulate MMX instructions without
> 3DNOW.  We can use SSE2 to support 64-bit vectors.
>
> 	PR target/89021
> 	* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
> 	* config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
> 	modes for TARGET_MMX_WITH_SSE.
> 	(SSE_REG_MODE_P): Likewise.
> ---
>  gcc/config/i386/i386.h | 20 ++++++++++++++++++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 83b025e0cf5..c1df3ec3326 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -585,6 +585,11 @@ extern unsigned char
> ix86_arch_features[X86_ARCH_LAST];
>
>  #define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)
>
> +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
> +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
> +#define TARGET_MMX_WITH_SSE \
> +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
> +
>  extern unsigned char x86_prefetch_sse;
>  #define TARGET_PREFETCH_SSE	x86_prefetch_sse
>
> @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc,
> const char **argv);
>     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\
>     || (MODE) == TFmode || (MODE) == V1TImode)
>
> +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
> +   want to include 8-byte vector modes, like V2SFmode, but not DImode
> +   nor SImode.  */

This is strange, since we already allow all MMX modes in SSE
registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
return:

return ((TARGET_AVX
             && VALID_AVX256_REG_OR_OI_MODE (mode))
            || VALID_SSE_REG_MODE (mode)
            || VALID_SSE2_REG_MODE (mode)
            || VALID_MMX_REG_MODE (mode)
            || VALID_MMX_REG_MODE_3DNOW (mode));

I'd expect that changed VALID_SSE2_REG_MODE affects only
ix86_vector_mode_supported_p when MMX is disabled and perhaps
ix86_set_reg_reg_cost cost function.

Are there any concrete issues when allowing all MMX (including 3DNOW?)
modes in VALID_SSE2_REG_MODE?

Uros.

>  #define VALID_SSE2_REG_MODE(MODE)					\
>    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
> -   || (MODE) == V2DImode || (MODE) == DFmode)
> +   || (MODE) == V2DImode || (MODE) == DFmode				\
> +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode	\
> +			       || (MODE) == V4HImode			\
> +			       || (MODE) == V2SImode			\
> +			       || (MODE) == V2SFmode)))
>
>  #define VALID_SSE_REG_MODE(MODE)					\
>    ((MODE) == V1TImode || (MODE) == TImode				\
> @@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc,
> const char **argv);
>     || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode	\
>     || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode	\
>     || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode	\
> -   || (MODE) == V16SFmode)
> +   || (MODE) == V16SFmode						\
> +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode	\
> +			       || (MODE) == V4HImode			\
> +			       || (MODE) == V2SImode			\
> +			       || (MODE) == V2SFmode)))
>
>  #define X87_FLOAT_MODE_P(MODE)	\
>    (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) ==
> XFmode))
> --
> 2.20.1
>
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 04/43] i386: Emulate MMX plusminus/sat_plusminus with SSE
  2019-02-09 13:24 ` [PATCH 04/43] i386: Emulate MMX plusminus/sat_plusminus " H.J. Lu
@ 2019-02-09 14:19   ` Uros Bizjak
  0 siblings, 0 replies; 60+ messages in thread
From: Uros Bizjak @ 2019-02-09 14:19 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> Emulate MMX plusminus/sat_plusminus with SSE.  Only SSE register source
> operand is allowed.
>
> 2019-02-08  H.J. Lu  <hongjiu.lu@intel.com>
> 	    Uros Bizjak  <ubizjak@gmail.com>
>
> 	PR target/89021
> 	* config/i386/mmx.md (MMXMODEI8): Require TARGET_SSE2 for V1DI.
> 	(<plusminus_insn><mode>3): New.
> 	(*mmx_<plusminus_insn><mode>3): Changed to define_insn_and_split
> 	to support SSE emulation.
> 	(*mmx_<plusminus_insn><mode>3): Likewise.
> 	(mmx_<plusminus_insn><mode>3): Also allow TARGET_MMX_WITH_SSE.
> ---
>  gcc/config/i386/mmx.md | 51 +++++++++++++++++++++++++++++-------------
>  1 file changed, 35 insertions(+), 16 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 1d5ed83e7b2..01a71aa128b 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -45,7 +45,7 @@
>
>  ;; 8 byte integral modes handled by MMX (and by extension, SSE)
>  (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
> -(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
> +(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
>
>  ;; All 8-byte vector modes handled by MMX
>  (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
> @@ -698,34 +698,53 @@
>    "TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)"
>    "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
>
> +(define_expand "<plusminus_insn><mode>3"
> +  [(set (match_operand:MMXMODEI 0 "register_operand")
> +	(plusminus:MMXMODEI
> +	  (match_operand:MMXMODEI 1 "nonimmediate_operand")
> +	  (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
> +  "TARGET_MMX_WITH_SSE"
> +  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
> +
>  (define_insn "*mmx_<plusminus_insn><mode>3"
> -  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
> +  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,Yx,Yy")
>          (plusminus:MMXMODEI8
> -	  (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0")
> -	  (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
> -  "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
> +	  (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,Yy")
> +	  (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,Yx,Yy")))]
> +  "(TARGET_MMX
> +    || TARGET_MMX_WITH_SSE
> +    || (TARGET_SSE2 && <MODE>mode == V1DImode))

You don't need V1DImode bypass. This was wrong before the patch and
would break for -msse2 -mno-mmx, since the pattern uses MMX registers.

On a related note, all SSE2 mmx patterns (also in sse.md) should
depend on TARGET_MMX, since they currently use MMX registers. Before
your patch serie, this didn't trigger problems since 8-byte vector
modes were rarely used, but with a new autovectorizer opportunities,
some of these problems can and will trigger. Also note that we
currently enable MMX for SSE2 builtins to mitigate this problem.

Uros.

>     && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
> -  "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "mmxadd")
> -   (set_attr "mode" "DI")])
> +  "@
> +   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
> +   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
> +   vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> +   (set_attr "type" "mmxadd,sseadd,sseadd")
> +   (set_attr "mode" "DI,TI,TI")])
>
>  (define_expand "mmx_<plusminus_insn><mode>3"
>    [(set (match_operand:MMXMODE12 0 "register_operand")
>  	(sat_plusminus:MMXMODE12
>  	  (match_operand:MMXMODE12 1 "nonimmediate_operand")
>  	  (match_operand:MMXMODE12 2 "nonimmediate_operand")))]
> -  "TARGET_MMX"
> +  "TARGET_MMX || TARGET_MMX_WITH_SSE"
>    "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
>
>  (define_insn "*mmx_<plusminus_insn><mode>3"
> -  [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
> +  [(set (match_operand:MMXMODE12 0 "register_operand" "=y,Yx,Yy")
>          (sat_plusminus:MMXMODE12
> -	  (match_operand:MMXMODE12 1 "nonimmediate_operand" "<comm>0")
> -	  (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
> -  "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
> -  "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "mmxadd")
> -   (set_attr "mode" "DI")])
> +	  (match_operand:MMXMODE12 1 "nonimmediate_operand" "<comm>0,0,Yy")
> +	  (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym,Yx,Yy")))]
> +  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> +   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
> +  "@
> +   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
> +   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
> +   vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> +   (set_attr "type" "mmxadd,sseadd,sseadd")
> +   (set_attr "mode" "DI,TI,TI")])
>
>  (define_expand "mmx_mulv4hi3"
>    [(set (match_operand:V4HI 0 "register_operand")
> --
> 2.20.1
>
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 14/43] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
  2019-02-09 13:24 ` [PATCH 14/43] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi " H.J. Lu
@ 2019-02-09 14:28   ` Uros Bizjak
  0 siblings, 0 replies; 60+ messages in thread
From: Uros Bizjak @ 2019-02-09 14:28 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE.
>
> 	PR target/89021
> 	* config/i386/mmx.md (sse_cvtps2pi): Add SSE emulation.
> 	(sse_cvttps2pi): Likewise.
> ---
>  gcc/config/i386/sse.md | 26 ++++++++++++++++----------
>  1 file changed, 16 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 7d2c0367911..4321c5c46db 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -4668,26 +4668,32 @@
>     (set_attr "mode" "V4SF")])
>
>  (define_insn "sse_cvtps2pi"
> -  [(set (match_operand:V2SI 0 "register_operand" "=y")
> +  [(set (match_operand:V2SI 0 "register_operand" "=y,Yy")
>  	(vec_select:V2SI
> -	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
> +	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YyBm")]
>  		       UNSPEC_FIX_NOTRUNC)
>  	  (parallel [(const_int 0) (const_int 1)])))]
>    "TARGET_SSE"

Patterns that use MMX registers should depend on
(TARGET_MMX || TARGET_MMX_WITH_SSE).

Since the above pattern depends on MMX registers, the condition should read:

(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE.

to disable the pattern for -msse -mno-mmx on 32bit target.

All patterns that use MMX registers (especially those in sse.md)
should be reviewed for the above change.

Uros.

> -  "cvtps2pi\t{%1, %0|%0, %q1}"
> -  [(set_attr "type" "ssecvt")
> -   (set_attr "unit" "mmx")
> +  "@
> +   cvtps2pi\t{%1, %0|%0, %q1}
> +   %vcvtps2dq\t{%1, %0|%0, %1}"
> +  [(set_attr "mmx_isa" "native,x64")
> +   (set_attr "type" "ssecvt")
> +   (set_attr "unit" "mmx,*")
>     (set_attr "mode" "DI")])
>
>  (define_insn "sse_cvttps2pi"
> -  [(set (match_operand:V2SI 0 "register_operand" "=y")
> +  [(set (match_operand:V2SI 0 "register_operand" "=y,Yy")
>  	(vec_select:V2SI
> -	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
> +	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YyBm"))
>  	  (parallel [(const_int 0) (const_int 1)])))]
>    "TARGET_SSE"
> -  "cvttps2pi\t{%1, %0|%0, %q1}"
> -  [(set_attr "type" "ssecvt")
> -   (set_attr "unit" "mmx")
> +  "@
> +   cvttps2pi\t{%1, %0|%0, %q1}
> +   %vcvttps2dq\t{%1, %0|%0, %1}"
> +  [(set_attr "mmx_isa" "native,x64")
> +   (set_attr "type" "ssecvt")
> +   (set_attr "unit" "mmx,*")
>     (set_attr "prefix_rep" "0")
>     (set_attr "mode" "SF")])
>
> --
> 2.20.1
>
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers
  2019-02-09 14:09   ` Uros Bizjak
@ 2019-02-09 14:32     ` H.J. Lu
  2019-02-09 15:03       ` Uros Bizjak
  0 siblings, 1 reply; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 14:32 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches

On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
> >
> >       PR target/89021
> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
> >       modes for TARGET_MMX_WITH_SSE.
> >       (SSE_REG_MODE_P): Likewise.
> > ---
> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--
> >  1 file changed, 18 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index 83b025e0cf5..c1df3ec3326 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -585,6 +585,11 @@ extern unsigned char
> > ix86_arch_features[X86_ARCH_LAST];
> >
> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)
> >
> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
> > +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
> > +#define TARGET_MMX_WITH_SSE \
> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
> > +
> >  extern unsigned char x86_prefetch_sse;
> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
> >
> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc,
> > const char **argv);
> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \
> >     || (MODE) == TFmode || (MODE) == V1TImode)
> >
> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
> > +   want to include 8-byte vector modes, like V2SFmode, but not DImode
> > +   nor SImode.  */
>
> This is strange, since we already allow all MMX modes in SSE
> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
> return:
>
> return ((TARGET_AVX
>              && VALID_AVX256_REG_OR_OI_MODE (mode))
>             || VALID_SSE_REG_MODE (mode)
>             || VALID_SSE2_REG_MODE (mode)
>             || VALID_MMX_REG_MODE (mode)
>             || VALID_MMX_REG_MODE_3DNOW (mode));
>
> I'd expect that changed VALID_SSE2_REG_MODE affects only
> ix86_vector_mode_supported_p when MMX is disabled and perhaps
> ix86_set_reg_reg_cost cost function.
>
> Are there any concrete issues when allowing all MMX (including 3DNOW?)
> modes in VALID_SSE2_REG_MODE?

The problem is with DImode and SImode.  All other vector modes,  including
V2SF is OK.  With DImode and SImode, I got following regressions:

FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
cannot be created; target is discardable"
FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n
FAIL: go test misc/cgo/testcarchive

 gcc.dg/pr39323-3.c  is due to

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261

and

/* Decide whether a variable of mode MODE should be 128 bit aligned.  */
#define ALIGN_MODE_128(MODE) \
 ((MODE) == XFmode || SSE_REG_MODE_P (MODE))

SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
places.   i386 backend may not be prepared to deal them in SSE_REG_MODE_P
nor VALID_SSE2_REG_MODE.

> Uros.
>
> >  #define VALID_SSE2_REG_MODE(MODE)                                    \
> >    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode   \
> > -   || (MODE) == V2DImode || (MODE) == DFmode)
> > +   || (MODE) == V2DImode || (MODE) == DFmode                         \
> > +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode      \
> > +                            || (MODE) == V4HImode                    \
> > +                            || (MODE) == V2SImode                    \
> > +                            || (MODE) == V2SFmode)))
> >
> >  #define VALID_SSE_REG_MODE(MODE)                                     \
> >    ((MODE) == V1TImode || (MODE) == TImode                            \
> > @@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc,
> > const char **argv);
> >     || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \
> >     || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode        \
> >     || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode       \
> > -   || (MODE) == V16SFmode)
> > +   || (MODE) == V16SFmode                                            \
> > +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode      \
> > +                            || (MODE) == V4HImode                    \
> > +                            || (MODE) == V2SImode                    \
> > +                            || (MODE) == V2SFmode)))
> >
> >  #define X87_FLOAT_MODE_P(MODE)       \
> >    (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) ==
> > XFmode))
> > --
> > 2.20.1
> >
> >



-- 
H.J.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 37/43] i386: Allow MMX intrinsic emulation with SSE
  2019-02-09 13:25 ` [PATCH 37/43] i386: Allow MMX intrinsic emulation " H.J. Lu
@ 2019-02-09 14:43   ` Uros Bizjak
  0 siblings, 0 replies; 60+ messages in thread
From: Uros Bizjak @ 2019-02-09 14:43 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> Allow MMX intrinsic emulation with SSE/SSE2/SSSE3.  For pr82483-1.c and
> pr82483-2.c, "-mssse3 -mno-mmx" no longer ICEs in 64-bit mode since MMX
> intrinsics can be emulated wit SSE.
>
> gcc/
>
> 	PR target/89021
> 	* config/i386/i386-builtin.def: Enable MMX intrinsics with
> 	SSE/SSE2/SSSE3.
> 	* config/i386/i386.c (bdesc_tm): Likewise.
> 	(ix86_init_mmx_sse_builtins): Likewise.
> 	(ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
> 	intrinsics in 64-bit mode without MMX.
> 	* config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
>
> gcc/testsuite/
>
> 	PR target/89021
> 	* gcc.target/i386/pr82483-1.c: Error only on ia32.
> 	* gcc.target/i386/pr82483-2.c: Likewise.
> ---
>  gcc/config/i386/i386-builtin.def          | 126 +++++++++++-----------
>  gcc/config/i386/i386.c                    |  45 +++++---
>  gcc/config/i386/mmintrin.h                |  10 +-
>  gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
>  gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
>  5 files changed, 107 insertions(+), 78 deletions(-)

Please note we have following gems in i386.c, ix86_option_override_internal:

4168       /* Enable by default the SSE and MMX builtins.  Do allow the user to
4169          explicitly disable any of these.  In particular, disabling SSE and
4170          MMX for kernel code is extremely useful.  */
4171       if (!ix86_arch_specified)
4172       opts->x_ix86_isa_flags
4173         |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE |
OPTION_MASK_ISA_MMX
4174              | TARGET_SUBTARGET64_ISA_DEFAULT)
4175             & ~opts->x_ix86_isa_flags_explicit);


4219   /* Turn on MMX builtins for -msse.  */
4220   if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4221     opts->x_ix86_isa_flags
4222       |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;

These should probably involve TARGET_MMX_WITH_SSE now. At least we
don't need to silently enable MMX anymore.

Uros.

> diff --git a/gcc/config/i386/i386-builtin.def
> b/gcc/config/i386/i386-builtin.def
> index 88005f4687f..10a9d631f29 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw",
> IX86_BUILTIN_FNSTSW, UNKN
>  BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX,
> UNKNOWN, (int) VOID_FTYPE_VOID)
>
>  /* MMX */
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms",
> IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms,
> "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
>
>  /* 3DNow! */
>  BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms,
> "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
> @@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi",
> IX86_BUILTIN_RORQI, UNKNO
>  BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI,
> UNKNOWN, (int) UINT16_FTYPE_UINT16_INT)
>
>  /* MMX */
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3,
> "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3,
> "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3,
> "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3,
> "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3,
> "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3,
> "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3,
> "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3,
> "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3,
> "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3,
> "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3,
> "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3,
> "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3,
> "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3,
> "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3,
> "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart,
> "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3,
> "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3,
> "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por",
> IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3,
> "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv8qi3,
> "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv4hi3,
> "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv2si3,
> "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv8qi3,
> "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv4hi3,
> "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv2si3,
> "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhbw,
> "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhwd,
> "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhdq,
> "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpcklbw,
> "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpcklwd,
> "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckldq,
> "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packsswb,
> "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int)
> V8QI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packssdw,
> "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int)
> V4HI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packuswb,
> "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int)
> V8QI_FTYPE_V4HI_V4HI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_pmaddwd,
> "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int)
> V2SI_FTYPE_V4HI_V4HI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv4hi3,
> "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv2si3,
> "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv1di3,
> "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int)
> V1DI_FTYPE_V1DI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv4hi3,
> "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv2si3,
> "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv1di3,
> "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int)
> V1DI_FTYPE_V1DI_V1DI_COUNT)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv4hi3,
> "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv2si3,
> "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv1di3,
> "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int)
> V1DI_FTYPE_V1DI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv4hi3,
> "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv2si3,
> "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv1di3,
> "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int)
> V1DI_FTYPE_V1DI_V1DI_COUNT)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv4hi3,
> "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv2si3,
> "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_SI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv4hi3,
> "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI_COUNT)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv2si3,
> "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN,
> (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN,
> (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN,
> (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN,
> (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN,
> (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN,
> (int) V2SI_FTYPE_V2SI_V2SI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB,
> UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW,
> UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB,
> UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW,
> UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB,
> UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW,
> UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB,
> UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW,
> UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW,
> UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw",
> IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN,
> (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN,
> UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN,
> (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN,
> (int) V2SI_FTYPE_V2SI_V2SI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv8qi3,
> "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv4hi3,
> "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv2si3,
> "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv8qi3,
> "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv4hi3,
> "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv2si3,
> "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW,
> UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD,
> UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ,
> UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW,
> UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD,
> UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ,
> UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB,
> UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW,
> UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB,
> UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_pmaddwd,
> "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int)
> V2SI_FTYPE_V4HI_V4HI)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI,
> UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI,
> UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI,
> UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN,
> (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN,
> (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN,
> (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI,
> UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI,
> UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI,
> UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN,
> (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN,
> (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN,
> (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
> +
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI,
> UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI,
> UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN,
> (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN,
> (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
>
>  /* 3DNow! */
>  BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_pf2id,
> "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF)
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 3770bb882d4..e45284ce1a2 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -30810,13 +30810,13 @@ static const struct builtin_description
> bdesc_##kind[] =		    \
>     we're lazy.  Add casts to make them fit.  */
>  static const struct builtin_description bdesc_tm[] =
>  {
> -  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum
> ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
> -  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64",
> (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN,
> VOID_FTYPE_PV2SI_V2SI },
> -  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64",
> (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN,
> VOID_FTYPE_PV2SI_V2SI },
> -  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum
> ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
> -  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64",
> (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI
> },
> -  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64",
> (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI
> },
> -  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64",
> (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI
> },
> +  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing,
> "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN,
> VOID_FTYPE_PV2SI_V2SI },
> +  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing,
> "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64,
> UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
> +  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing,
> "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64,
> UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
> +  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing,
> "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN,
> V2SI_FTYPE_PCV2SI },
> +  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing,
> "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64,
> UNKNOWN, V2SI_FTYPE_PCV2SI },
> +  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing,
> "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64,
> UNKNOWN, V2SI_FTYPE_PCV2SI },
> +  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing,
> "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64,
> UNKNOWN, V2SI_FTYPE_PCV2SI },
>
>    { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum
> ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
>    { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128",
> (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN,
> VOID_FTYPE_PV4SF_V4SF },
> @@ -30834,7 +30834,7 @@ static const struct builtin_description bdesc_tm[]
> =
>    { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256",
> (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF
> },
>    { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256",
> (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF
> },
>
> -  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum
> ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
> +  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing,
> "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN,
> VOID_FTYPE_PCVOID },
>    { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum
> ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
>    { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum
> ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
>  };
> @@ -31509,14 +31509,17 @@ ix86_init_mmx_sse_builtins (void)
>  	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
>
>    /* MMX access to the vec_init patterns.  */
> -  def_builtin_const (OPTION_MASK_ISA_MMX, 0,
> "__builtin_ia32_vec_init_v2si",
> +  def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> +		     "__builtin_ia32_vec_init_v2si",
>  		     V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
>
> -  def_builtin_const (OPTION_MASK_ISA_MMX, 0,
> "__builtin_ia32_vec_init_v4hi",
> +  def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> +		     "__builtin_ia32_vec_init_v4hi",
>  		     V4HI_FTYPE_HI_HI_HI_HI,
>  		     IX86_BUILTIN_VEC_INIT_V4HI);
>
> -  def_builtin_const (OPTION_MASK_ISA_MMX, 0,
> "__builtin_ia32_vec_init_v8qi",
> +  def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> +		     "__builtin_ia32_vec_init_v8qi",
>  		     V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
>  		     IX86_BUILTIN_VEC_INIT_V8QI);
>
> @@ -31538,7 +31541,8 @@ ix86_init_mmx_sse_builtins (void)
>  		     "__builtin_ia32_vec_ext_v4hi",
>  		     HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
>
> -  def_builtin_const (OPTION_MASK_ISA_MMX, 0,
> "__builtin_ia32_vec_ext_v2si",
> +  def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
> +		     "__builtin_ia32_vec_ext_v2si",
>  		     SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
>
>    def_builtin_const (OPTION_MASK_ISA_SSE2, 0,
> "__builtin_ia32_vec_ext_v16qi",
> @@ -36671,6 +36675,23 @@ ix86_expand_builtin (tree exp, rtx target, rtx
> subtarget,
>         == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
>        && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
>      isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
> +  /* Use SSE/SSE2/SSSE3 to emulate MMX intrinsics in 64-bit mode when
> +     MMX is disabled.  */
> +  if (TARGET_MMX_WITH_SSE)
> +    {
> +      if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
> +	   == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
> +	  && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX)) != 0)
> +	isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX);
> +      if (((bisa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
> +	   == (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
> +	  && (isa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX)) != 0)
> +	isa |= (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX);
> +      if (((bisa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
> +	   == (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
> +	  && (isa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX)) != 0)
> +	isa |= (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX);
> +    }
>    if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
>      {
>        char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
> diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
> index 238b3df3121..7b613658111 100644
> --- a/gcc/config/i386/mmintrin.h
> +++ b/gcc/config/i386/mmintrin.h
> @@ -30,7 +30,7 @@
>  #if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
>  #pragma GCC push_options
>  #ifdef __x86_64__
> -#pragma GCC target("sse,mmx")
> +#pragma GCC target("sse2")
>  #else
>  #pragma GCC target("mmx")
>  #endif
> @@ -315,7 +315,11 @@ _m_paddd (__m64 __m1, __m64 __m2)
>  /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
>  #ifndef __SSE2__
>  #pragma GCC push_options
> +#ifdef __x86_64__
> +#pragma GCC target("sse2")
> +#else
>  #pragma GCC target("sse2,mmx")
> +#endif
>  #define __DISABLE_SSE2__
>  #endif /* __SSE2__ */
>
> @@ -427,7 +431,11 @@ _m_psubd (__m64 __m1, __m64 __m2)
>  /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
>  #ifndef __SSE2__
>  #pragma GCC push_options
> +#ifdef __x86_64__
> +#pragma GCC target("sse2")
> +#else
>  #pragma GCC target("sse2,mmx")
> +#endif
>  #define __DISABLE_SSE2__
>  #endif /* __SSE2__ */
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr82483-1.c
> b/gcc/testsuite/gcc.target/i386/pr82483-1.c
> index 59a59dc8dfe..b2028d8dc5e 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82483-1.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82483-1.c
> @@ -1,7 +1,7 @@
>  /* PR target/82483 */
>  /* { dg-do compile } */
>  /* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
> -/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
> +/* { dg-error "needs isa option" "" { target ia32 } 0 } */
>
>  #include <x86intrin.h>
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr82483-2.c
> b/gcc/testsuite/gcc.target/i386/pr82483-2.c
> index 305ddbd6c64..c92de405cb3 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82483-2.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82483-2.c
> @@ -1,7 +1,7 @@
>  /* PR target/82483 */
>  /* { dg-do compile } */
>  /* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
> -/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
> +/* { dg-error "needs isa option" "" { target ia32 } 0 } */
>
>  #include <x86intrin.h>
>
> --
> 2.20.1
>
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers
  2019-02-09 14:32     ` H.J. Lu
@ 2019-02-09 15:03       ` Uros Bizjak
  2019-02-09 15:08         ` H.J. Lu
  0 siblings, 1 reply; 60+ messages in thread
From: Uros Bizjak @ 2019-02-09 15:03 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GCC Patches

On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>>
>> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
>> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
>> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
>> >
>> >       PR target/89021
>> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
>> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
>> >       modes for TARGET_MMX_WITH_SSE.
>> >       (SSE_REG_MODE_P): Likewise.
>> > ---
>> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--
>> >  1 file changed, 18 insertions(+), 2 deletions(-)
>> >
>> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
>> > index 83b025e0cf5..c1df3ec3326 100644
>> > --- a/gcc/config/i386/i386.h
>> > +++ b/gcc/config/i386/i386.h
>> > @@ -585,6 +585,11 @@ extern unsigned char
>> > ix86_arch_features[X86_ARCH_LAST];
>> >
>> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)
>> >
>> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
>> > +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.
>> > */
>> > +#define TARGET_MMX_WITH_SSE \
>> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
>> > +
>> >  extern unsigned char x86_prefetch_sse;
>> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
>> >
>> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int
>> > argc,
>> > const char **argv);
>> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \
>> >     || (MODE) == TFmode || (MODE) == V1TImode)
>> >
>> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
>> > +   want to include 8-byte vector modes, like V2SFmode, but not DImode
>> > +   nor SImode.  */
>>
>> This is strange, since we already allow all MMX modes in SSE
>> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
>> return:
>>
>> return ((TARGET_AVX
>>              && VALID_AVX256_REG_OR_OI_MODE (mode))
>>             || VALID_SSE_REG_MODE (mode)
>>             || VALID_SSE2_REG_MODE (mode)
>>             || VALID_MMX_REG_MODE (mode)
>>             || VALID_MMX_REG_MODE_3DNOW (mode));
>>
>> I'd expect that changed VALID_SSE2_REG_MODE affects only
>> ix86_vector_mode_supported_p when MMX is disabled and perhaps
>> ix86_set_reg_reg_cost cost function.
>>
>> Are there any concrete issues when allowing all MMX (including 3DNOW?)
>> modes in VALID_SSE2_REG_MODE?
>
> The problem is with DImode and SImode.  All other vector modes,  including
> V2SF is OK.  With DImode and SImode, I got following regressions:
>
> FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
> cannot be created; target is discardable"
> FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n
> FAIL: go test misc/cgo/testcarchive
>
>  gcc.dg/pr39323-3.c  is due to
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261
>
> and
>
> /* Decide whether a variable of mode MODE should be 128 bit aligned.  */
> #define ALIGN_MODE_128(MODE) \
>  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))

Hm, this is a bit worrying, we don't want to introduce ABI
incompatibilites w.r.t. alignment. We still need to be ABI compatible
for MMX values and emit unaligned loads/stores when necessary.

> SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
> places.   i386 backend may not be prepared to deal them in SSE_REG_MODE_P
> nor VALID_SSE2_REG_MODE.

I think we have to review the usage of these two changed defines to
prevent any ABI issues or other hidden issues.

Uros.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers
  2019-02-09 15:03       ` Uros Bizjak
@ 2019-02-09 15:08         ` H.J. Lu
  2019-02-09 18:27           ` Uros Bizjak
  0 siblings, 1 reply; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 15:08 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches

On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >>
> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
> >> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
> >> >
> >> >       PR target/89021
> >> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
> >> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
> >> >       modes for TARGET_MMX_WITH_SSE.
> >> >       (SSE_REG_MODE_P): Likewise.
> >> > ---
> >> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--
> >> >  1 file changed, 18 insertions(+), 2 deletions(-)
> >> >
> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> >> > index 83b025e0cf5..c1df3ec3326 100644
> >> > --- a/gcc/config/i386/i386.h
> >> > +++ b/gcc/config/i386/i386.h
> >> > @@ -585,6 +585,11 @@ extern unsigned char
> >> > ix86_arch_features[X86_ARCH_LAST];
> >> >
> >> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)
> >> >
> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
> >> > +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.
> >> > */
> >> > +#define TARGET_MMX_WITH_SSE \
> >> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
> >> > +
> >> >  extern unsigned char x86_prefetch_sse;
> >> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
> >> >
> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int
> >> > argc,
> >> > const char **argv);
> >> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \
> >> >     || (MODE) == TFmode || (MODE) == V1TImode)
> >> >
> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
> >> > +   want to include 8-byte vector modes, like V2SFmode, but not DImode
> >> > +   nor SImode.  */
> >>
> >> This is strange, since we already allow all MMX modes in SSE
> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
> >> return:
> >>
> >> return ((TARGET_AVX
> >>              && VALID_AVX256_REG_OR_OI_MODE (mode))
> >>             || VALID_SSE_REG_MODE (mode)
> >>             || VALID_SSE2_REG_MODE (mode)
> >>             || VALID_MMX_REG_MODE (mode)
> >>             || VALID_MMX_REG_MODE_3DNOW (mode));
> >>
> >> I'd expect that changed VALID_SSE2_REG_MODE affects only
> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps
> >> ix86_set_reg_reg_cost cost function.
> >>
> >> Are there any concrete issues when allowing all MMX (including 3DNOW?)
> >> modes in VALID_SSE2_REG_MODE?
> >
> > The problem is with DImode and SImode.  All other vector modes,  including
> > V2SF is OK.  With DImode and SImode, I got following regressions:
> >
> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
> > cannot be created; target is discardable"
> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n
> > FAIL: go test misc/cgo/testcarchive
> >
> >  gcc.dg/pr39323-3.c  is due to
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261
> >
> > and
> >
> > /* Decide whether a variable of mode MODE should be 128 bit aligned.  */
> > #define ALIGN_MODE_128(MODE) \
> >  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
>
> Hm, this is a bit worrying, we don't want to introduce ABI
> incompatibilites w.r.t. alignment. We still need to be ABI compatible
> for MMX values and emit unaligned loads/stores when necessary.

We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
And I don't think we should put DI and SI in them.

> > SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
> > places.   i386 backend may not be prepared to deal them in SSE_REG_MODE_P
> > nor VALID_SSE2_REG_MODE.
>
> I think we have to review the usage of these two changed defines to
> prevent any ABI issues or other hidden issues.
>

Absolutely.

-- 
H.J.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers
  2019-02-09 15:08         ` H.J. Lu
@ 2019-02-09 18:27           ` Uros Bizjak
  2019-02-09 18:32             ` H.J. Lu
  0 siblings, 1 reply; 60+ messages in thread
From: Uros Bizjak @ 2019-02-09 18:27 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GCC Patches

On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>>
>> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
>> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>> >>
>> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
>> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
>> >> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
>> >> >
>> >> >       PR target/89021
>> >> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
>> >> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit
>> >> > vector
>> >> >       modes for TARGET_MMX_WITH_SSE.
>> >> >       (SSE_REG_MODE_P): Likewise.
>> >> > ---
>> >> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--
>> >> >  1 file changed, 18 insertions(+), 2 deletions(-)
>> >> >
>> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
>> >> > index 83b025e0cf5..c1df3ec3326 100644
>> >> > --- a/gcc/config/i386/i386.h
>> >> > +++ b/gcc/config/i386/i386.h
>> >> > @@ -585,6 +585,11 @@ extern unsigned char
>> >> > ix86_arch_features[X86_ARCH_LAST];
>> >> >
>> >> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)
>> >> >
>> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
>> >> > +   FIXME: All 3DNOW patterns needs to be updated with SSE
>> >> > emulation.
>> >> > */
>> >> > +#define TARGET_MMX_WITH_SSE \
>> >> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
>> >> > +
>> >> >  extern unsigned char x86_prefetch_sse;
>> >> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
>> >> >
>> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int
>> >> > argc,
>> >> > const char **argv);
>> >> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode
>> >> > \
>> >> >     || (MODE) == TFmode || (MODE) == V1TImode)
>> >> >
>> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since
>> >> > we
>> >> > +   want to include 8-byte vector modes, like V2SFmode, but not
>> >> > DImode
>> >> > +   nor SImode.  */
>> >>
>> >> This is strange, since we already allow all MMX modes in SSE
>> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
>> >> return:
>> >>
>> >> return ((TARGET_AVX
>> >>              && VALID_AVX256_REG_OR_OI_MODE (mode))
>> >>             || VALID_SSE_REG_MODE (mode)
>> >>             || VALID_SSE2_REG_MODE (mode)
>> >>             || VALID_MMX_REG_MODE (mode)
>> >>             || VALID_MMX_REG_MODE_3DNOW (mode));
>> >>
>> >> I'd expect that changed VALID_SSE2_REG_MODE affects only
>> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps
>> >> ix86_set_reg_reg_cost cost function.
>> >>
>> >> Are there any concrete issues when allowing all MMX (including 3DNOW?)
>> >> modes in VALID_SSE2_REG_MODE?
>> >
>> > The problem is with DImode and SImode.  All other vector modes,
>> > including
>> > V2SF is OK.  With DImode and SImode, I got following regressions:
>> >
>> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
>> > cannot be created; target is discardable"
>> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[
>> > \t]*\n
>> > FAIL: go test misc/cgo/testcarchive
>> >
>> >  gcc.dg/pr39323-3.c  is due to
>> >
>> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261
>> >
>> > and
>> >
>> > /* Decide whether a variable of mode MODE should be 128 bit aligned.
>> > */
>> > #define ALIGN_MODE_128(MODE) \
>> >  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
>>
>> Hm, this is a bit worrying, we don't want to introduce ABI
>> incompatibilites w.r.t. alignment. We still need to be ABI compatible
>> for MMX values and emit unaligned loads/stores when necessary.
>
> We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
> And I don't think we should put DI and SI in them.

Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to
a big-hammer approach of changing wide-used defines like
SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example,
ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P,
while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is
changed.

Uros.


>
>> > SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
>> > places.   i386 backend may not be prepared to deal them in
>> > SSE_REG_MODE_P
>> > nor VALID_SSE2_REG_MODE.
>>
>> I think we have to review the usage of these two changed defines to
>> prevent any ABI issues or other hidden issues.
>>
>
> Absolutely.
>
> --
> H.J.
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers
  2019-02-09 18:27           ` Uros Bizjak
@ 2019-02-09 18:32             ` H.J. Lu
  2019-02-09 18:42               ` Uros Bizjak
  0 siblings, 1 reply; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 18:32 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches

On Sat, Feb 9, 2019 at 10:27 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> > On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >>
> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> >> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >> >>
> >> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> >> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
> >> >> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
> >> >> >
> >> >> >       PR target/89021
> >> >> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
> >> >> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit
> >> >> > vector
> >> >> >       modes for TARGET_MMX_WITH_SSE.
> >> >> >       (SSE_REG_MODE_P): Likewise.
> >> >> > ---
> >> >> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--
> >> >> >  1 file changed, 18 insertions(+), 2 deletions(-)
> >> >> >
> >> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> >> >> > index 83b025e0cf5..c1df3ec3326 100644
> >> >> > --- a/gcc/config/i386/i386.h
> >> >> > +++ b/gcc/config/i386/i386.h
> >> >> > @@ -585,6 +585,11 @@ extern unsigned char
> >> >> > ix86_arch_features[X86_ARCH_LAST];
> >> >> >
> >> >> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)
> >> >> >
> >> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
> >> >> > +   FIXME: All 3DNOW patterns needs to be updated with SSE
> >> >> > emulation.
> >> >> > */
> >> >> > +#define TARGET_MMX_WITH_SSE \
> >> >> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
> >> >> > +
> >> >> >  extern unsigned char x86_prefetch_sse;
> >> >> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
> >> >> >
> >> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int
> >> >> > argc,
> >> >> > const char **argv);
> >> >> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode
> >> >> > \
> >> >> >     || (MODE) == TFmode || (MODE) == V1TImode)
> >> >> >
> >> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since
> >> >> > we
> >> >> > +   want to include 8-byte vector modes, like V2SFmode, but not
> >> >> > DImode
> >> >> > +   nor SImode.  */
> >> >>
> >> >> This is strange, since we already allow all MMX modes in SSE
> >> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
> >> >> return:
> >> >>
> >> >> return ((TARGET_AVX
> >> >>              && VALID_AVX256_REG_OR_OI_MODE (mode))
> >> >>             || VALID_SSE_REG_MODE (mode)
> >> >>             || VALID_SSE2_REG_MODE (mode)
> >> >>             || VALID_MMX_REG_MODE (mode)
> >> >>             || VALID_MMX_REG_MODE_3DNOW (mode));
> >> >>
> >> >> I'd expect that changed VALID_SSE2_REG_MODE affects only
> >> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps
> >> >> ix86_set_reg_reg_cost cost function.
> >> >>
> >> >> Are there any concrete issues when allowing all MMX (including 3DNOW?)
> >> >> modes in VALID_SSE2_REG_MODE?
> >> >
> >> > The problem is with DImode and SImode.  All other vector modes,
> >> > including
> >> > V2SF is OK.  With DImode and SImode, I got following regressions:
> >> >
> >> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
> >> > cannot be created; target is discardable"
> >> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[
> >> > \t]*\n
> >> > FAIL: go test misc/cgo/testcarchive
> >> >
> >> >  gcc.dg/pr39323-3.c  is due to
> >> >
> >> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261
> >> >
> >> > and
> >> >
> >> > /* Decide whether a variable of mode MODE should be 128 bit aligned.
> >> > */
> >> > #define ALIGN_MODE_128(MODE) \
> >> >  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
> >>
> >> Hm, this is a bit worrying, we don't want to introduce ABI
> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible
> >> for MMX values and emit unaligned loads/stores when necessary.
> >
> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
> > And I don't think we should put DI and SI in them.
>
> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to

Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF.
We only want 8-byte vector modes here.

> a big-hammer approach of changing wide-used defines like
> SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example,
> ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P,
> while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is
> changed.

I will give it a try.

-- 
H.J.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers
  2019-02-09 18:32             ` H.J. Lu
@ 2019-02-09 18:42               ` Uros Bizjak
  2019-02-09 18:53                 ` H.J. Lu
  0 siblings, 1 reply; 60+ messages in thread
From: Uros Bizjak @ 2019-02-09 18:42 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GCC Patches

On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
>> >> Hm, this is a bit worrying, we don't want to introduce ABI
>> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible
>> >> for MMX values and emit unaligned loads/stores when necessary.
>> >
>> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
>> > And I don't think we should put DI and SI in them.
>>
>> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
>> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
>> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to
>
> Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF.
> We only want 8-byte vector modes here.

Well, I'm not forcing VALID_MMX_REG_MODE here, it is just an example;
the important part is in the addition of (TARGET_MMX_WITH_SSE &&
some_modes). Surely, we don't want to align SImode to 128 bits in
ALIGN_MODE_128.

Uros.

>> a big-hammer approach of changing wide-used defines like
>> SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example,
>> ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P,
>> while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is
>> changed.
>
> I will give it a try.
>
> --
> H.J.
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers
  2019-02-09 18:42               ` Uros Bizjak
@ 2019-02-09 18:53                 ` H.J. Lu
  0 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-09 18:53 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 1077 bytes --]

On Sat, Feb 9, 2019 at 10:41 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> >> >> Hm, this is a bit worrying, we don't want to introduce ABI
> >> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible
> >> >> for MMX values and emit unaligned loads/stores when necessary.
> >> >
> >> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
> >> > And I don't think we should put DI and SI in them.
> >>
> >> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
> >> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
> >> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to
> >
> > Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF.
> > We only want 8-byte vector modes here.
>
> Well, I'm not forcing VALID_MMX_REG_MODE here, it is just an example;
> the important part is in the addition of (TARGET_MMX_WITH_SSE &&
> some_modes). Surely, we don't want to align SImode to 128 bits in
> ALIGN_MODE_128.
>

I am testing this.

-- 
H.J.

[-- Attachment #2: 0001-i386-Allow-64-bit-vector-modes-in-SSE-registers.patch --]
[-- Type: text/x-patch, Size: 3077 bytes --]

From 1a3a4c4d2e133d99c6671788a8475efe39804dbb Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Thu, 24 Jan 2019 08:27:41 -0800
Subject: [PATCH] i386: Allow 64-bit vector modes in SSE registers

In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support 64-bit vectors.

	PR target/89021
	* config/i386/i386.c (ix86_set_reg_reg_cost): Also support
	VALID_MMX_WITH_SSE_REG_MODE.
	(ix86_vector_mode_supported_p): Likewise.
	* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
	(TARGET_MMX_WITH_SSE_P): Likewise.
	(VALID_MMX_WITH_SSE_REG_MODE): Likewise.
---
 gcc/config/i386/i386.c |  3 +++
 gcc/config/i386/i386.h | 14 ++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 12bc7926f86..ba02c26c8b2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -40235,6 +40235,7 @@ ix86_set_reg_reg_cost (machine_mode mode)
 	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
 	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
 	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
+	  || (TARGET_MMX_WITH_SSE && VALID_MMX_WITH_SSE_REG_MODE (mode))
 	  || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
 	units = GET_MODE_SIZE (mode);
     }
@@ -44057,6 +44058,8 @@ ix86_vector_mode_supported_p (machine_mode mode)
     return true;
   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
     return true;
+  if (TARGET_MMX_WITH_SSE && VALID_MMX_WITH_SSE_REG_MODE (mode))
+    return true;
   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
     return true;
   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5..f75fd426293 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -201,6 +201,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_16BIT	TARGET_CODE16
 #define TARGET_16BIT_P(x)	TARGET_CODE16_P(x)
 
+/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
+   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
+#define TARGET_MMX_WITH_SSE \
+  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
+#define TARGET_MMX_WITH_SSE_P(x) \
+  (TARGET_64BIT_P (x) && TARGET_SSE2_P (x) && !TARGET_3DNOW_P (x))
+
 #include "config/vxworks-dummy.h"
 
 #include "config/i386/i386-opts.h"
@@ -1143,6 +1150,13 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\
    || (MODE) == TFmode || (MODE) == V1TImode)
 
+/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
+   want to include 8-byte vector modes, like V2SFmode, but not DImode
+   nor SImode.  */
+#define VALID_MMX_WITH_SSE_REG_MODE(MODE)				\
+  ((MODE) == V1DImode || (MODE) == V8QImode || (MODE) == V4HImode	\
+   || (MODE) == V2SImode || (MODE) == V2SFmode)
+
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
    || (MODE) == V2DImode || (MODE) == DFmode)
-- 
2.20.1


^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 05/43] i386: Emulate MMX mulv4hi3 with SSE
  2019-02-09 13:25 ` [PATCH 05/43] i386: Emulate MMX mulv4hi3 with SSE H.J. Lu
@ 2019-02-10 10:17   ` Uros Bizjak
  0 siblings, 0 replies; 60+ messages in thread
From: Uros Bizjak @ 2019-02-10 10:17 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> Emulate MMX mulv4hi3 with SSE.  Only SSE register source operand is
> allowed.
>
> 	PR target/89021
> 	* config/i386/mmx.md (mulv4hi3): New.
> 	(*mmx_mulv4hi3): Also allow TARGET_MMX_WITH_SSE.  Add SSE
> 	support.

OK.

Uros.

> ---
>  gcc/config/i386/mmx.md | 26 +++++++++++++++++++-------
>  1 file changed, 19 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 01a71aa128b..2712a86ea3c 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -753,14 +753,26 @@
>    "TARGET_MMX"
>    "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
>
> +(define_expand "mulv4hi3"
> +  [(set (match_operand:V4HI 0 "register_operand")
> +        (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand")
> +		   (match_operand:V4HI 2 "nonimmediate_operand")))]
> +  "TARGET_MMX_WITH_SSE"
> +  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
> +
>  (define_insn "*mmx_mulv4hi3"
> -  [(set (match_operand:V4HI 0 "register_operand" "=y")
> -        (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
> -		   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
> -  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
> -  "pmullw\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "mmxmul")
> -   (set_attr "mode" "DI")])
> +  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
> +        (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy")
> +		   (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
> +  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> +   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
> +  "@
> +   pmullw\t{%2, %0|%0, %2}
> +   pmullw\t{%2, %0|%0, %2}
> +   vpmullw\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> +   (set_attr "type" "mmxmul,ssemul,ssemul")
> +   (set_attr "mode" "DI,TI,TI")])
>
>  (define_expand "mmx_smulv4hi3_highpart"
>    [(set (match_operand:V4HI 0 "register_operand")
> --
> 2.20.1
>
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 09/43] i386: Emulate MMX <any_logic><mode>3 with SSE
  2019-02-09 13:24 ` [PATCH 09/43] i386: Emulate MMX <any_logic><mode>3 " H.J. Lu
@ 2019-02-10 10:27   ` Uros Bizjak
  0 siblings, 0 replies; 60+ messages in thread
From: Uros Bizjak @ 2019-02-10 10:27 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> Emulate MMX <any_logic><mode>3 with SSE.  Only SSE register source
> operand is allowed.
>
> 	PR target/89021
> 	* config/i386/mmx.md (any_logic:<code><mode>3): New.
> 	(any_logic:*mmx_<code><mode>3): Also allow TARGET_MMX_WITH_SSE.
> 	Add SSE support.

OK.

Uros.

> ---
>  gcc/config/i386/mmx.md | 27 ++++++++++++++++++++-------
>  1 file changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 69c66e968b5..fae2e43af24 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -1110,15 +1110,28 @@
>    "TARGET_MMX"
>    "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
>
> +(define_expand "<code><mode>3"
> +  [(set (match_operand:MMXMODEI 0 "register_operand")
> +	(any_logic:MMXMODEI
> +	  (match_operand:MMXMODEI 1 "nonimmediate_operand")
> +	  (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
> +  "TARGET_MMX_WITH_SSE"
> +  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
> +
>  (define_insn "*mmx_<code><mode>3"
> -  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
> +  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yx,Yy")
>          (any_logic:MMXMODEI
> -	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
> -	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
> -  "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
> -  "p<logic>\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "mmxadd")
> -   (set_attr "mode" "DI")])
> +	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0,0,Yy")
> +	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
> +  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> +   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
> +  "@
> +   p<logic>\t{%2, %0|%0, %2}
> +   p<logic>\t{%2, %0|%0, %2}
> +   vp<logic>\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> +   (set_attr "type" "mmxadd,sselog,sselog")
> +   (set_attr "mode" "DI,TI,TI")])
>
>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>  ;;
> --
> 2.20.1
>
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 10/43] i386: Emulate MMX mmx_andnot<mode>3 with SSE
  2019-02-09 13:25 ` [PATCH 10/43] i386: Emulate MMX mmx_andnot<mode>3 with SSE H.J. Lu
@ 2019-02-10 10:28   ` Uros Bizjak
  0 siblings, 0 replies; 60+ messages in thread
From: Uros Bizjak @ 2019-02-10 10:28 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> Emulate MMX mmx_andnot<mode>3 with SSE.  Only SSE register source operand
> is allowed.
>
> 	PR target/89021
> 	* config/i386/mmx.md (mmx_andnot<mode>3): Also allow
> 	TARGET_MMX_WITH_SSE.  Add SSE support.

OK.

Uros.

> ---
>  gcc/config/i386/mmx.md | 18 +++++++++++-------
>  1 file changed, 11 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index fae2e43af24..1e235bfcde4 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -1093,14 +1093,18 @@
>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>
>  (define_insn "mmx_andnot<mode>3"
> -  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
> +  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yx,Yy")
>  	(and:MMXMODEI
> -	  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
> -	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
> -  "TARGET_MMX"
> -  "pandn\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "mmxadd")
> -   (set_attr "mode" "DI")])
> +	  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yy"))
> +	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
> +  "TARGET_MMX || TARGET_MMX_WITH_SSE"
> +  "@
> +   pandn\t{%2, %0|%0, %2}
> +   pandn\t{%2, %0|%0, %2}
> +   vpandn\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> +   (set_attr "type" "mmxadd,sselog,sselog")
> +   (set_attr "mode" "DI,TI,TI")])
>
>  (define_expand "mmx_<code><mode>3"
>    [(set (match_operand:MMXMODEI 0 "register_operand")
> --
> 2.20.1
>
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 10/43] i386: Emulate MMX mmx_andnot<mode>3 with SSE
  2019-02-10  0:19 [PATCH 00/43] V3: " H.J. Lu
@ 2019-02-10  0:20 ` H.J. Lu
  0 siblings, 0 replies; 60+ messages in thread
From: H.J. Lu @ 2019-02-10  0:20 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Emulate MMX mmx_andnot<mode>3 with SSE.  Only SSE register source operand
is allowed.

	PR target/89021
	* config/i386/mmx.md (mmx_andnot<mode>3): Also allow
	TARGET_MMX_WITH_SSE.  Add SSE support.
---
 gcc/config/i386/mmx.md | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b7cbe2155b6..8945ece2a03 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1093,14 +1093,18 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (define_insn "mmx_andnot<mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
 	(and:MMXMODEI
-	  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
-	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+	  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv"))
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,x,Yv")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pandn\t{%2, %0|%0, %2}
+   pandn\t{%2, %0|%0, %2}
+   vpandn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_<code><mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
-- 
2.20.1

^ permalink raw reply	[flat|nested] 60+ messages in thread

end of thread, other threads:[~2019-02-10 10:28 UTC | newest]

Thread overview: 60+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics with SSE H.J. Lu
2019-02-09 13:23 ` [PATCH 08/43] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 " H.J. Lu
2019-02-09 13:24 ` [PATCH 17/43] i386: Emulate MMX mmx_pinsrw " H.J. Lu
2019-02-09 13:24 ` [PATCH 03/43] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX H.J. Lu
2019-02-09 13:24 ` [PATCH 07/43] i386: Emulate MMX mmx_pmaddwd with SSE H.J. Lu
2019-02-09 13:24 ` [PATCH 09/43] i386: Emulate MMX <any_logic><mode>3 " H.J. Lu
2019-02-10 10:27   ` Uros Bizjak
2019-02-09 13:24 ` [PATCH 04/43] i386: Emulate MMX plusminus/sat_plusminus " H.J. Lu
2019-02-09 14:19   ` Uros Bizjak
2019-02-09 13:24 ` [PATCH 02/43] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 H.J. Lu
2019-02-09 13:24 ` [PATCH 15/43] i386: Emulate MMX sse_cvtpi2ps with SSE H.J. Lu
2019-02-09 13:24 ` [PATCH 06/43] i386: Emulate MMX smulv4hi3_highpart " H.J. Lu
2019-02-09 13:24 ` [PATCH 14/43] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi " H.J. Lu
2019-02-09 14:28   ` Uros Bizjak
2019-02-09 13:24 ` [PATCH 11/43] i386: Emulate MMX mmx_eq/mmx_gt<mode>3 " H.J. Lu
2019-02-09 13:24 ` [PATCH 12/43] i386: Emulate MMX vec_dupv2si " H.J. Lu
2019-02-09 13:25 ` [PATCH 34/43] i386: Emulate MMX abs<mode>2 " H.J. Lu
2019-02-09 13:25 ` [PATCH 43/43] i386: Implement V2SF comparisons " H.J. Lu
2019-02-09 13:25 ` [PATCH 23/43] i386: Emulate MMX mmx_uavgv4hi3 " H.J. Lu
2019-02-09 13:25 ` [PATCH 29/43] i386: Emulate MMX ssse3_pmaddubsw " H.J. Lu
2019-02-09 13:25 ` [PATCH 24/43] i386: Emulate MMX mmx_psadbw " H.J. Lu
2019-02-09 13:25 ` [PATCH 40/43] i386: Enable 8-byte vectorizer for TARGET_MMX_WITH_SSE H.J. Lu
2019-02-09 13:25 ` [PATCH 16/43] i386: Emulate MMX mmx_pextrw with SSE H.J. Lu
2019-02-09 13:25 ` [PATCH 13/43] i386: Emulate MMX pshufw " H.J. Lu
2019-02-09 13:25 ` [PATCH 26/43] i386: Emulate MMX umulv1siv1di3 with SSE2 H.J. Lu
2019-02-09 13:25 ` [PATCH 22/43] i386: Emulate MMX mmx_uavgv8qi3 with SSE H.J. Lu
2019-02-09 13:25 ` [PATCH 30/43] i386: Emulate MMX ssse3_pmulhrswv4hi3 " H.J. Lu
2019-02-09 13:25 ` [PATCH 25/43] i386: Emulate MMX movntq with SSE2 movntidi H.J. Lu
2019-02-09 13:25 ` [PATCH 05/43] i386: Emulate MMX mulv4hi3 with SSE H.J. Lu
2019-02-10 10:17   ` Uros Bizjak
2019-02-09 13:25 ` [PATCH 20/43] i386: Emulate MMX mmx_umulv4hi3_highpart " H.J. Lu
2019-02-09 13:25 ` [PATCH 38/43] i386: Add tests for MMX intrinsic emulations " H.J. Lu
2019-02-09 13:25 ` [PATCH 33/43] i386: Emulate MMX ssse3_palignrdi " H.J. Lu
2019-02-09 13:25 ` [PATCH 27/43] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 " H.J. Lu
2019-02-09 13:25 ` [PATCH 32/43] i386: Emulate MMX ssse3_psign<mode>3 " H.J. Lu
2019-02-09 13:25 ` [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers H.J. Lu
2019-02-09 14:09   ` Uros Bizjak
2019-02-09 14:32     ` H.J. Lu
2019-02-09 15:03       ` Uros Bizjak
2019-02-09 15:08         ` H.J. Lu
2019-02-09 18:27           ` Uros Bizjak
2019-02-09 18:32             ` H.J. Lu
2019-02-09 18:42               ` Uros Bizjak
2019-02-09 18:53                 ` H.J. Lu
2019-02-09 13:25 ` [PATCH 31/43] i386: Emulate MMX pshufb with SSE version H.J. Lu
2019-02-09 13:25 ` [PATCH 36/43] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE H.J. Lu
2019-02-09 13:25 ` [PATCH 10/43] i386: Emulate MMX mmx_andnot<mode>3 with SSE H.J. Lu
2019-02-10 10:28   ` Uros Bizjak
2019-02-09 13:25 ` [PATCH 41/43] i386: Implement V2SF add/sub/mul with SEE H.J. Lu
2019-02-09 13:25 ` [PATCH 39/43] i386: Also enable SSSE3 __m64 tests in 64-bit mode H.J. Lu
2019-02-09 13:25 ` [PATCH 42/43] i386: Implement V2SF <-> V2SI conversions with SEE H.J. Lu
2019-02-09 13:25 ` [PATCH 21/43] i386: Emulate MMX maskmovq with SSE2 maskmovdqu H.J. Lu
2019-02-09 13:25 ` [PATCH 35/43] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE H.J. Lu
2019-02-09 13:25 ` [PATCH 28/43] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE H.J. Lu
2019-02-09 13:25 ` [PATCH 18/43] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin " H.J. Lu
2019-02-09 13:25 ` [PATCH 37/43] i386: Allow MMX intrinsic emulation " H.J. Lu
2019-02-09 14:43   ` Uros Bizjak
2019-02-09 13:25 ` [PATCH 19/43] i386: Emulate MMX mmx_pmovmskb " H.J. Lu
2019-02-09 13:53 ` [PATCH 00/43] V2: Emulate MMX intrinsics " Uros Bizjak
2019-02-10  0:19 [PATCH 00/43] V3: " H.J. Lu
2019-02-10  0:20 ` [PATCH 10/43] i386: Emulate MMX mmx_andnot<mode>3 " H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).