public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: gcc-patches@gcc.gnu.org
Cc: Uros Bizjak <ubizjak@gmail.com>
Subject: [PATCH 21/43] i386: Emulate MMX maskmovq with SSE2 maskmovdqu
Date: Sun, 10 Feb 2019 00:23:00 -0000	[thread overview]
Message-ID: <20190210001947.27278-22-hjl.tools@gmail.com> (raw)
In-Reply-To: <20190210001947.27278-1-hjl.tools@gmail.com>

Emulate MMX maskmovq with SSE2 maskmovdqu in 64-bit mode by zero-extending
source and mask operands to 128 bits.  Handle unmapped bits 64:127 at
memory address by adjusting source and mask operands together with memory
address.

	PR target/89021
	* config/i386/xmmintrin.h: Emulate MMX maskmovq with SSE2
	maskmovdqu in 64-bit mode.
---
 gcc/config/i386/xmmintrin.h | 61 +++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 58284378514..e797795f127 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1165,7 +1165,68 @@ _m_pshufw (__m64 __A, int const __N)
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
 {
+#ifdef __x86_64__
+  /* Emulate MMX maskmovq with SSE2 maskmovdqu and handle unmapped bits
+     64:127 at address __P.  */
+  typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+  typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+  /* Zero-extend __A and __N to 128 bits.  */
+  __v2di __A128 = __extension__ (__v2di) { ((__v1di) __A)[0], 0 };
+  __v2di __N128 = __extension__ (__v2di) { ((__v1di) __N)[0], 0 };
+
+  /* Check the alignment of __P.  */
+  __SIZE_TYPE__ offset = ((__SIZE_TYPE__) __P) & 0xf;
+  if (offset)
+    {
+      /* If the misalignment of __P > 8, subtract __P by 8 bytes.
+	 Otherwise, subtract __P by the misalignment.  */
+      if (offset > 8)
+	offset = 8;
+      __P = (char *) (((__SIZE_TYPE__) __P) - offset);
+
+      /* Shift __A128 and __N128 to the left by the adjustment.  */
+      switch (offset)
+	{
+	case 1:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 8);
+	  break;
+	case 2:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 2 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 2 * 8);
+	  break;
+	case 3:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 3 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 3 * 8);
+	  break;
+	case 4:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 4 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 4 * 8);
+	  break;
+	case 5:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 5 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 5 * 8);
+	  break;
+	case 6:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 6 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 6 * 8);
+	  break;
+	case 7:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 7 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 7 * 8);
+	  break;
+	case 8:
+	  __A128 = __builtin_ia32_pslldqi128 (__A128, 8 * 8);
+	  __N128 = __builtin_ia32_pslldqi128 (__N128, 8 * 8);
+	  break;
+	default:
+	  break;
+	}
+    }
+  __builtin_ia32_maskmovdqu ((__v16qi)__A128, (__v16qi)__N128, __P);
+#else
   __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+#endif
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-- 
2.20.1

  parent reply	other threads:[~2019-02-10  0:23 UTC|newest]

Thread overview: 87+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-10  0:19 [PATCH 00/43] V3: Emulate MMX intrinsics with SSE H.J. Lu
2019-02-10  0:19 ` [PATCH 02/43] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 H.J. Lu
2019-02-10  9:56   ` Uros Bizjak
2019-02-10 10:04     ` Uros Bizjak
2019-02-10  0:19 ` [PATCH 04/43] i386: Emulate MMX plusminus/sat_plusminus with SSE H.J. Lu
2019-02-10 10:12   ` Uros Bizjak
2019-02-10  0:19 ` [PATCH 03/43] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX H.J. Lu
2019-02-10 10:07   ` Uros Bizjak
2019-02-10  0:19 ` [PATCH 05/43] i386: Emulate MMX mulv4hi3 with SSE H.J. Lu
2019-02-10  0:20 ` [PATCH 14/43] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi " H.J. Lu
2019-02-10 10:48   ` Uros Bizjak
2019-02-11 19:08     ` H.J. Lu
2019-02-11 19:52       ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 10/43] i386: Emulate MMX mmx_andnot<mode>3 " H.J. Lu
2019-02-10  0:20 ` [PATCH 42/43] i386: Implement V2SF <-> V2SI conversions " H.J. Lu
2019-02-10  0:20 ` [PATCH 27/43] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 " H.J. Lu
2019-02-10 12:23   ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 35/43] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE H.J. Lu
2019-02-10 12:34   ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 20/43] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE H.J. Lu
2019-02-10 12:12   ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 33/43] i386: Emulate MMX ssse3_palignrdi " H.J. Lu
2019-02-10  0:20 ` [PATCH 36/43] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE H.J. Lu
2019-02-10  0:20 ` [PATCH 06/43] i386: Emulate MMX smulv4hi3_highpart with SSE H.J. Lu
2019-02-10 10:18   ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 43/43] i386: Implement V2SF comparisons " H.J. Lu
2019-02-10  0:20 ` [PATCH 08/43] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 " H.J. Lu
2019-02-10 10:26   ` Uros Bizjak
2019-02-10 20:38     ` H.J. Lu
2019-02-10 20:49       ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 19/43] i386: Emulate MMX mmx_pmovmskb " H.J. Lu
2019-02-10 12:11   ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 07/43] i386: Emulate MMX mmx_pmaddwd " H.J. Lu
2019-02-10 10:21   ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 12/43] i386: Emulate MMX vec_dupv2si " H.J. Lu
2019-02-10 10:36   ` Uros Bizjak
2019-02-10 21:01     ` H.J. Lu
2019-02-10 21:46       ` Uros Bizjak
2019-02-10 21:49         ` Uros Bizjak
2019-02-11  1:04           ` H.J. Lu
2019-02-11  7:25             ` Uros Bizjak
2019-02-11 12:27               ` H.J. Lu
2019-02-11 12:51                 ` Uros Bizjak
2019-02-11 13:12                   ` H.J. Lu
2019-02-10  0:20 ` [PATCH 09/43] i386: Emulate MMX <any_logic><mode>3 " H.J. Lu
2019-02-10  0:20 ` [PATCH 18/43] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin " H.J. Lu
2019-02-10 11:36   ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 11/43] i386: Emulate MMX mmx_eq/mmx_gt<mode>3 " H.J. Lu
2019-02-10 10:33   ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 15/43] i386: Emulate MMX sse_cvtpi2ps " H.J. Lu
2019-02-10 10:56   ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers H.J. Lu
2019-02-10  9:43   ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 34/43] i386: Emulate MMX abs<mode>2 with SSE H.J. Lu
2019-02-10 12:32   ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 13/43] i386: Emulate MMX pshufw " H.J. Lu
2019-02-10 11:16   ` Uros Bizjak
2019-02-11 18:09     ` H.J. Lu
2019-02-11 19:36       ` Uros Bizjak
2019-02-10  0:20 ` [PATCH 28/43] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 " H.J. Lu
2019-02-10 12:24   ` Uros Bizjak
2019-02-10  0:23 ` [PATCH 29/43] i386: Emulate MMX ssse3_pmaddubsw " H.J. Lu
2019-02-10 12:26   ` Uros Bizjak
2019-02-10  0:23 ` [PATCH 16/43] i386: Emulate MMX mmx_pextrw " H.J. Lu
2019-02-10 11:20   ` Uros Bizjak
2019-02-10  0:23 ` [PATCH 23/43] i386: Emulate MMX mmx_uavgv4hi3 " H.J. Lu
2019-02-10  0:23 ` [PATCH 26/43] i386: Emulate MMX umulv1siv1di3 with SSE2 H.J. Lu
2019-02-10 12:17   ` Uros Bizjak
2019-02-10  0:23 ` [PATCH 40/43] i386: Enable 8-byte vectorizer for TARGET_MMX_WITH_SSE H.J. Lu
2019-02-10  0:23 ` [PATCH 22/43] i386: Emulate MMX mmx_uavgv8qi3 with SSE H.J. Lu
2019-02-10 12:19   ` Uros Bizjak
2019-02-10  0:23 ` [PATCH 37/43] i386: Allow MMX intrinsic emulation " H.J. Lu
2019-02-10  0:23 ` [PATCH 24/43] i386: Emulate MMX mmx_psadbw " H.J. Lu
2019-02-10  0:23 ` [PATCH 41/43] i386: Implement V2SF add/sub/mul " H.J. Lu
2019-02-10 12:42   ` Uros Bizjak
2019-02-10  0:23 ` [PATCH 17/43] i386: Emulate MMX mmx_pinsrw " H.J. Lu
2019-02-10 11:44   ` Uros Bizjak
2019-02-10  0:23 ` [PATCH 25/43] i386: Emulate MMX movntq with SSE2 movntidi H.J. Lu
2019-02-10  0:23 ` [PATCH 32/43] i386: Emulate MMX ssse3_psign<mode>3 with SSE H.J. Lu
2019-02-10 12:29   ` Uros Bizjak
2019-02-10  0:23 ` H.J. Lu [this message]
2019-02-10  0:24 ` [PATCH 31/43] i386: Emulate MMX pshufb with SSE version H.J. Lu
2019-02-10  0:24 ` [PATCH 39/43] i386: Also enable SSSE3 __m64 tests in 64-bit mode H.J. Lu
2019-02-10  0:24 ` [PATCH 30/43] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE H.J. Lu
2019-02-10 12:27   ` Uros Bizjak
2019-02-10  0:24 ` [PATCH 38/43] i386: Add tests for MMX intrinsic emulations " H.J. Lu
  -- strict thread matches above, loose matches on Subject: below --
2019-02-09 13:24 [PATCH 00/43] V2: Emulate MMX intrinsics " H.J. Lu
2019-02-09 13:25 ` [PATCH 21/43] i386: Emulate MMX maskmovq with SSE2 maskmovdqu H.J. Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190210001947.27278-22-hjl.tools@gmail.com \
    --to=hjl.tools@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).