public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] x86: Cast to unsigned short first for _mm_extract_pi16
@ 2021-01-01 13:34 H.J. Lu
  2021-01-05  3:41 ` Jeff Law
  0 siblings, 1 reply; 3+ messages in thread
From: H.J. Lu @ 2021-01-01 13:34 UTC (permalink / raw)
  To: gcc-patches

_mm_extract_pi16 is intrinsic for pextrw, which should be zero-extended,
not sign-extended.

gcc/

	PR target/98495
	* config/i386/xmmintrin.h (_mm_extract_pi16): Cast to unsigned
	short first.

gcc/testsuite/

	PR target/98495
	* gcc.target/i386/pr98495-1.c: New test.
	* gcc.target/i386/pr98495-2.c: New test.
	* gcc.target/i386/pr98495-3.c: New test.
	* gcc.target/i386/pr98495-4.c: New test.
	* gcc.target/i386/pr98495-5.c: New test.
---
 gcc/config/i386/xmmintrin.h               |  4 ++--
 gcc/testsuite/gcc.target/i386/pr98495-1.c | 10 ++++++++++
 gcc/testsuite/gcc.target/i386/pr98495-2.c | 11 +++++++++++
 gcc/testsuite/gcc.target/i386/pr98495-3.c |  8 ++++++++
 gcc/testsuite/gcc.target/i386/pr98495-4.c |  8 ++++++++
 gcc/testsuite/gcc.target/i386/pr98495-5.c |  9 +++++++++
 6 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98495-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98495-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98495-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98495-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98495-5.c

diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index c6f1c012630..9e27717d0c7 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1022,7 +1022,7 @@ _mm_move_ss (__m128 __A, __m128 __B)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_extract_pi16 (__m64 const __A, int const __N)
 {
-  return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
+  return (unsigned short) __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1032,7 +1032,7 @@ _m_pextrw (__m64 const __A, int const __N)
 }
 #else
 #define _mm_extract_pi16(A, N)	\
-  ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
+  ((int) (unsigned short) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
 
 #define _m_pextrw(A, N) _mm_extract_pi16(A, N)
 #endif
diff --git a/gcc/testsuite/gcc.target/i386/pr98495-1.c b/gcc/testsuite/gcc.target/i386/pr98495-1.c
new file mode 100644
index 00000000000..df24e63d182
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98495-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mmmx -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler-not "movswl" } } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+/* { dg-final { scan-assembler-not "cwtl" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+
+#include <xmmintrin.h>
+
+unsigned int foo16(__m64 x) { return _mm_extract_pi16(x, 3); }
diff --git a/gcc/testsuite/gcc.target/i386/pr98495-2.c b/gcc/testsuite/gcc.target/i386/pr98495-2.c
new file mode 100644
index 00000000000..5fd001eefed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98495-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mmmx -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler-not "movswl" } } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+/* { dg-final { scan-assembler-not "cwtl" } } */
+/* { dg-final { scan-assembler-not "cltq" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+
+#include <xmmintrin.h>
+
+unsigned long long int foo16(__m64 x) { return _mm_extract_pi16(x, 3); }
diff --git a/gcc/testsuite/gcc.target/i386/pr98495-3.c b/gcc/testsuite/gcc.target/i386/pr98495-3.c
new file mode 100644
index 00000000000..458b470cbd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98495-3.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mmmx -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler "(movswl|cwtl)" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+
+#include <xmmintrin.h>
+
+int foo16(__m64 x) { return (short) _mm_extract_pi16(x, 3); }
diff --git a/gcc/testsuite/gcc.target/i386/pr98495-4.c b/gcc/testsuite/gcc.target/i386/pr98495-4.c
new file mode 100644
index 00000000000..16d3cbadb03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98495-4.c
@@ -0,0 +1,8 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mmmx -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler "movswq" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+
+#include <xmmintrin.h>
+
+long long int foo16(__m64 x) { return (short) _mm_extract_pi16(x, 3); }
diff --git a/gcc/testsuite/gcc.target/i386/pr98495-5.c b/gcc/testsuite/gcc.target/i386/pr98495-5.c
new file mode 100644
index 00000000000..a62b42d8eb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98495-5.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -mmmx -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler-not "movswl" } } */
+/* { dg-final { scan-assembler-not "cwtl" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+
+#include <xmmintrin.h>
+
+unsigned int foo16(__m64 x) { return _mm_extract_pi16(x, 3); }
-- 
2.29.2


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] x86: Cast to unsigned short first for _mm_extract_pi16
  2021-01-01 13:34 [PATCH] x86: Cast to unsigned short first for _mm_extract_pi16 H.J. Lu
@ 2021-01-05  3:41 ` Jeff Law
  2021-01-05 19:03   ` [PATCH] x86: Use unsigned short to compute pextrw result H.J. Lu
  0 siblings, 1 reply; 3+ messages in thread
From: Jeff Law @ 2021-01-05  3:41 UTC (permalink / raw)
  To: H.J. Lu, gcc-patches



On 1/1/21 6:34 AM, H.J. Lu via Gcc-patches wrote:
> _mm_extract_pi16 is intrinsic for pextrw, which should be zero-extended,
> not sign-extended.
>
> gcc/
>
> 	PR target/98495
> 	* config/i386/xmmintrin.h (_mm_extract_pi16): Cast to unsigned
> 	short first.
I'd tend to prefer masking with 0xffff  rather than relying on the size
of a particular type being what we need.  But this header is limited to
just x86 and it doesn't look like there's any variance in the size of a
short, across the x86 platforms.

So, OK.
jeff


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] x86: Use unsigned short to compute pextrw result
  2021-01-05  3:41 ` Jeff Law
@ 2021-01-05 19:03   ` H.J. Lu
  0 siblings, 0 replies; 3+ messages in thread
From: H.J. Lu @ 2021-01-05 19:03 UTC (permalink / raw)
  To: Jeff Law; +Cc: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 813 bytes --]

On Mon, Jan 4, 2021 at 7:41 PM Jeff Law <law@redhat.com> wrote:
>
>
>
> On 1/1/21 6:34 AM, H.J. Lu via Gcc-patches wrote:
> > _mm_extract_pi16 is intrinsic for pextrw, which should be zero-extended,
> > not sign-extended.
> >
> > gcc/
> >
> >       PR target/98495
> >       * config/i386/xmmintrin.h (_mm_extract_pi16): Cast to unsigned
> >       short first.
> I'd tend to prefer masking with 0xffff  rather than relying on the size
> of a particular type being what we need.  But this header is limited to
> just x86 and it doesn't look like there's any variance in the size of a
> short, across the x86 platforms.
>
> So, OK.
> jeff
>

I am checking in this patch to use unsigned short to compute the
zero-extended pextrw result.  This fixed:

FAIL: gcc.target/i386/sse2-mmx-pextrw.c execution test

-- 
H.J.

[-- Attachment #2: 0001-x86-Use-unsigned-short-to-compute-pextrw-result.patch --]
[-- Type: text/x-patch, Size: 1059 bytes --]

From 4b3d73a439caffd82eba0a64ee43bae5d5e07de9 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 5 Jan 2021 10:57:20 -0800
Subject: [PATCH] x86: Use unsigned short to compute pextrw result

Use unsigned short to compute the zero-extended pextrw result.

	PR target/98495
	* gcc.target/i386/sse2-mmx-pextrw.c (compute_correct_result): Use
	unsigned short to compute pextrw result.
---
 gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
index bb48740a7ca..edbac919fd8 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
@@ -32,7 +32,7 @@ test_pextrw (__m64 *i, unsigned int imm, int *r)
 static void
 compute_correct_result (__m64 *src_p, unsigned int imm, int *res_p)
 {
-  short *src = (short *) src_p;
+  unsigned short *src = (unsigned short *) src_p;
   if (imm < 4)
     *res_p = src[imm];
 }
-- 
2.29.2


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-01-05 19:03 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-01 13:34 [PATCH] x86: Cast to unsigned short first for _mm_extract_pi16 H.J. Lu
2021-01-05  3:41 ` Jeff Law
2021-01-05 19:03   ` [PATCH] x86: Use unsigned short to compute pextrw result H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).