public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: gcc-patches@gcc.gnu.org
Cc: Uros Bizjak <ubizjak@gmail.com>
Subject: [PATCH 03/41] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
Date: Sat, 16 Feb 2019 22:40:00 -0000	[thread overview]
Message-ID: <20190216224032.4889-4-hjl.tools@gmail.com> (raw)
In-Reply-To: <20190216224032.4889-1-hjl.tools@gmail.com>

Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX.  For MMX punpckhXX,
move bits 64:127 to bits 0:63 in SSE register.  Only SSE register source
operand is allowed.

	PR target/89021
	* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
	prototype.
	* config/i386/i386.c (ix86_split_mmx_punpck): New function.
	* config/i386/mmx.m (mmx_punpckhbw): Changed to
	define_insn_and_split to support SSE emulation.
	(mmx_punpcklbw): Likewise.
	(mmx_punpckhwd): Likewise.
	(mmx_punpcklwd): Likewise.
	(mmx_punpckhdq): Likewise.
	(mmx_punpckldq): Likewise.
---
 gcc/config/i386/i386-protos.h |   1 +
 gcc/config/i386/i386.c        |  77 +++++++++++++++++++
 gcc/config/i386/mmx.md        | 138 ++++++++++++++++++++++------------
 3 files changed, 168 insertions(+), 48 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index a53b48438ec..37581837a32 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -204,6 +204,7 @@ extern rtx ix86_split_stack_guard (void);
 
 extern void ix86_move_vector_high_sse_to_mmx (rtx);
 extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+extern void ix86_split_mmx_punpck (rtx[], bool);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d31b69d9a82..a76c17beece 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20275,6 +20275,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
   ix86_move_vector_high_sse_to_mmx (op0);
 }
 
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX.  */
+
+void
+ix86_split_mmx_punpck (rtx operands[], bool high_p)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  machine_mode mode = GET_MODE (op0);
+  rtx mask;
+  /* The corresponding SSE mode.  */
+  machine_mode sse_mode, double_sse_mode;
+
+  switch (mode)
+    {
+    case E_V8QImode:
+      sse_mode = V16QImode;
+      double_sse_mode = V32QImode;
+      mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (16,
+					  GEN_INT (0), GEN_INT (16),
+					  GEN_INT (1), GEN_INT (17),
+					  GEN_INT (2), GEN_INT (18),
+					  GEN_INT (3), GEN_INT (19),
+					  GEN_INT (4), GEN_INT (20),
+					  GEN_INT (5), GEN_INT (21),
+					  GEN_INT (6), GEN_INT (22),
+					  GEN_INT (7), GEN_INT (23)));
+      break;
+
+    case E_V4HImode:
+      sse_mode = V8HImode;
+      double_sse_mode = V16HImode;
+      mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (8,
+					  GEN_INT (0), GEN_INT (8),
+					  GEN_INT (1), GEN_INT (9),
+					  GEN_INT (2), GEN_INT (10),
+					  GEN_INT (3), GEN_INT (11)));
+      break;
+
+    case E_V2SImode:
+      sse_mode = V4SImode;
+      double_sse_mode = V8SImode;
+      mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (4,
+					  GEN_INT (0), GEN_INT (4),
+					  GEN_INT (1), GEN_INT (5)));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Generate SSE punpcklXX.  */
+  rtx dest = lowpart_subreg (sse_mode, op0, GET_MODE (op0));
+  op1 = lowpart_subreg (sse_mode, op1, GET_MODE (op1));
+  op2 = lowpart_subreg (sse_mode, op2, GET_MODE (op2));
+
+  op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
+  op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
+  rtx insn = gen_rtx_SET (dest, op2);
+  emit_insn (insn);
+
+  if (high_p)
+    {
+      /* Move bits 64:127 to bits 0:63.  */
+      mask = gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+					  GEN_INT (0), GEN_INT (0)));
+      dest = lowpart_subreg (V4SImode, dest, GET_MODE (dest));
+      op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+      insn = gen_rtx_SET (dest, op1);
+      emit_insn (insn);
+    }
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
    operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 63a390923b6..0aa793395fb 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1064,87 +1064,129 @@
    (set_attr "type" "mmxshft,sselog,sselog")
    (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
-	    (match_operand:V8QI 1 "register_operand" "0")
-	    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+	    (match_operand:V8QI 1 "register_operand" "0,0,Yv")
+	    (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
           (parallel [(const_int 4) (const_int 12)
                      (const_int 5) (const_int 13)
                      (const_int 6) (const_int 14)
                      (const_int 7) (const_int 15)])))]
-  "TARGET_MMX"
-  "punpckhbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckhbw\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, true); DONE;"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpcklbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
-	    (match_operand:V8QI 1 "register_operand" "0")
-	    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+	    (match_operand:V8QI 1 "register_operand" "0,0,Yv")
+	    (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
           (parallel [(const_int 0) (const_int 8)
                      (const_int 1) (const_int 9)
                      (const_int 2) (const_int 10)
                      (const_int 3) (const_int 11)])))]
-  "TARGET_MMX"
-  "punpcklbw\t{%2, %0|%0, %k2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpcklbw\t{%2, %0|%0, %k2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, false); DONE;"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhwd"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
 	(vec_select:V4HI
 	  (vec_concat:V8HI
-	    (match_operand:V4HI 1 "register_operand" "0")
-	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+	    (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+	    (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
           (parallel [(const_int 2) (const_int 6)
                      (const_int 3) (const_int 7)])))]
-  "TARGET_MMX"
-  "punpckhwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckhwd\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, true); DONE;"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpcklwd"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
 	(vec_select:V4HI
 	  (vec_concat:V8HI
-	    (match_operand:V4HI 1 "register_operand" "0")
-	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+	    (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+	    (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
           (parallel [(const_int 0) (const_int 4)
                      (const_int 1) (const_int 5)])))]
-  "TARGET_MMX"
-  "punpcklwd\t{%2, %0|%0, %k2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpcklwd\t{%2, %0|%0, %k2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, false); DONE;"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhdq"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhdq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
 	(vec_select:V2SI
 	  (vec_concat:V4SI
-	    (match_operand:V2SI 1 "register_operand" "0")
-	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+	    (match_operand:V2SI 1 "register_operand" "0,0,Yv")
+	    (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
 	  (parallel [(const_int 1)
 		     (const_int 3)])))]
-  "TARGET_MMX"
-  "punpckhdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckhdq\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, true); DONE;"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckldq"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckldq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
 	(vec_select:V2SI
 	  (vec_concat:V4SI
-	    (match_operand:V2SI 1 "register_operand" "0")
-	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+	    (match_operand:V2SI 1 "register_operand" "0,0,Yv")
+	    (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
 	  (parallel [(const_int 0)
 		     (const_int 2)])))]
-  "TARGET_MMX"
-  "punpckldq\t{%2, %0|%0, %k2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t{%2, %0|%0, %k2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, false); DONE;"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pinsrw"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1

  reply	other threads:[~2019-02-16 22:40 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-16 22:41 [PATCH 00/41] V8: Emulate MMX intrinsics with SSE H.J. Lu
2019-02-16 22:40 ` H.J. Lu [this message]
2019-02-16 22:40 ` [PATCH 08/41] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 " H.J. Lu
2019-02-16 22:40 ` [PATCH 01/41] i386: Allow MMX register modes in SSE registers H.J. Lu
2019-02-16 22:40 ` [PATCH 11/41] i386: Emulate MMX mmx_eq/mmx_gt<mode>3 with SSE H.J. Lu
2019-02-16 22:40 ` [PATCH 12/41] i386: Emulate MMX vec_dupv2si " H.J. Lu
2019-02-16 22:40 ` [PATCH 04/41] i386: Emulate MMX plusminus/sat_plusminus " H.J. Lu
2019-02-16 22:40 ` [PATCH 06/41] i386: Emulate MMX smulv4hi3_highpart " H.J. Lu
2019-02-16 22:40 ` [PATCH 15/41] i386: Emulate MMX sse_cvtpi2ps " H.J. Lu
2019-02-16 22:40 ` [PATCH 09/41] i386: Emulate MMX <any_logic><mode>3 " H.J. Lu
2019-02-16 22:41 ` [PATCH 16/41] i386: Emulate MMX mmx_pextrw " H.J. Lu
2019-02-16 22:41 ` [PATCH 14/41] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi " H.J. Lu
2019-02-16 22:41 ` [PATCH 31/41] i386: Emulate MMX ssse3_pmulhrswv4hi3 " H.J. Lu
2019-02-16 22:41 ` [PATCH 02/41] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 H.J. Lu
2019-02-16 22:41 ` [PATCH 17/41] i386: Emulate MMX mmx_pinsrw with SSE H.J. Lu
2019-02-16 22:41 ` [PATCH 05/41] i386: Emulate MMX mulv4hi3 " H.J. Lu
2019-02-16 22:41 ` [PATCH 25/41] i386: Emulate MMX movntq with SSE2 movntidi H.J. Lu
2019-02-16 22:41 ` [PATCH 10/41] i386: Emulate MMX mmx_andnot<mode>3 with SSE H.J. Lu
2019-02-16 22:41 ` [PATCH 13/41] i386: Emulate MMX pshufw " H.J. Lu
2019-02-16 22:41 ` [PATCH 33/41] i386: Emulate MMX ssse3_psign<mode>3 " H.J. Lu
2019-02-16 22:41 ` [PATCH 27/41] i386: Make _mm_empty () as NOP for TARGET_MMX_WITH_SSE H.J. Lu
2019-02-17  9:45   ` Uros Bizjak
2019-02-17 13:38     ` H.J. Lu
2019-02-16 22:41 ` [PATCH 28/41] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 with SSE H.J. Lu
2019-02-16 22:41 ` [PATCH 07/41] i386: Emulate MMX mmx_pmaddwd " H.J. Lu
2019-02-16 22:41 ` [PATCH 40/41] i386: Enable TM MMX intrinsics with SSE2 H.J. Lu
2019-02-16 22:42 ` [PATCH 41/41] i386: Add tests for MMX intrinsic emulations with SSE H.J. Lu
2019-02-16 22:46 ` [PATCH 20/41] i386: Emulate MMX mmx_umulv4hi3_highpart " H.J. Lu
2019-02-16 22:46 ` [PATCH 19/41] i386: Emulate MMX mmx_pmovmskb " H.J. Lu
2019-02-16 22:47 ` [PATCH 38/41] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE H.J. Lu
2019-02-17 16:25   ` Uros Bizjak
2019-02-17 17:03     ` H.J. Lu
2019-02-17 17:09       ` Uros Bizjak
2019-02-17 17:15         ` H.J. Lu
2019-02-17 17:22           ` Uros Bizjak
2019-02-17 17:28             ` H.J. Lu
2019-02-17 18:50               ` Uros Bizjak
2019-02-17 20:49                 ` H.J. Lu
2019-02-16 22:47 ` [PATCH 36/41] Prevent allocation of MMX registers " H.J. Lu
2019-02-18 12:56   ` Uros Bizjak
2019-02-16 22:47 ` [PATCH 23/41] i386: Emulate MMX mmx_uavgv4hi3 with SSE H.J. Lu
2019-02-16 22:47 ` [PATCH 18/41] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin " H.J. Lu
2019-02-16 22:47 ` [PATCH 30/41] i386: Emulate MMX ssse3_pmaddubsw " H.J. Lu
2019-02-16 22:47 ` [PATCH 37/41] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE H.J. Lu
2019-02-16 22:47 ` [PATCH 39/41] i386: Allow MMX intrinsic emulation with SSE H.J. Lu
2019-02-16 22:47 ` [PATCH 24/41] i386: Emulate MMX mmx_psadbw " H.J. Lu
2019-02-16 22:47 ` [PATCH 22/41] i386: Emulate MMX mmx_uavgv8qi3 " H.J. Lu
2019-02-16 22:47 ` [PATCH 35/41] i386: Emulate MMX abs<mode>2 " H.J. Lu
2019-02-16 22:47 ` [PATCH 26/41] i386: Emulate MMX umulv1siv1di3 with SSE2 H.J. Lu
2019-02-16 22:47 ` [PATCH 32/41] i386: Emulate MMX pshufb with SSE version H.J. Lu
2019-02-16 22:47 ` [PATCH 21/41] i386: Emulate MMX maskmovq with SSE2 maskmovdqu H.J. Lu
2019-02-16 22:47 ` [PATCH 34/41] i386: Emulate MMX ssse3_palignrdi with SSE H.J. Lu
2019-02-16 22:47 ` [PATCH 29/41] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 " H.J. Lu
2019-02-17 10:33 ` [PATCH 00/41] V8: Emulate MMX intrinsics " Uros Bizjak
2019-02-17 13:42   ` H.J. Lu
2019-02-17 15:54     ` Uros Bizjak
2019-02-17 15:57       ` Uros Bizjak
2019-02-17 17:10         ` H.J. Lu
2019-02-17 17:27           ` Uros Bizjak
2019-02-17 17:37             ` H.J. Lu
2019-02-17 18:49               ` Uros Bizjak
2019-02-17 20:47                 ` H.J. Lu
2019-02-18 14:22                   ` H.J. Lu
2019-02-18 14:37                     ` Uros Bizjak
2019-02-18 14:48                       ` H.J. Lu
2019-02-18 18:39 [PATCH 00/41] V9: " H.J. Lu
2019-02-18 18:38 ` [PATCH 03/41] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX H.J. Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190216224032.4889-4-hjl.tools@gmail.com \
    --to=hjl.tools@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).