* [PATCH] x86: Cast to unsigned short first for _mm_extract_pi16
@ 2021-01-01 13:34 H.J. Lu
2021-01-05 3:41 ` Jeff Law
0 siblings, 1 reply; 3+ messages in thread
From: H.J. Lu @ 2021-01-01 13:34 UTC (permalink / raw)
To: gcc-patches
_mm_extract_pi16 is intrinsic for pextrw, which should be zero-extended,
not sign-extended.
gcc/
PR target/98495
* config/i386/xmmintrin.h (_mm_extract_pi16): Cast to unsigned
short first.
gcc/testsuite/
PR target/98495
* gcc.target/i386/pr98495-1.c: New test.
* gcc.target/i386/pr98495-2.c: New test.
* gcc.target/i386/pr98495-3.c: New test.
* gcc.target/i386/pr98495-4.c: New test.
* gcc.target/i386/pr98495-5.c: New test.
---
gcc/config/i386/xmmintrin.h | 4 ++--
gcc/testsuite/gcc.target/i386/pr98495-1.c | 10 ++++++++++
gcc/testsuite/gcc.target/i386/pr98495-2.c | 11 +++++++++++
gcc/testsuite/gcc.target/i386/pr98495-3.c | 8 ++++++++
gcc/testsuite/gcc.target/i386/pr98495-4.c | 8 ++++++++
gcc/testsuite/gcc.target/i386/pr98495-5.c | 9 +++++++++
6 files changed, 48 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr98495-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr98495-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr98495-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr98495-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr98495-5.c
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index c6f1c012630..9e27717d0c7 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1022,7 +1022,7 @@ _mm_move_ss (__m128 __A, __m128 __B)
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_pi16 (__m64 const __A, int const __N)
{
- return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
+ return (unsigned short) __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
}
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1032,7 +1032,7 @@ _m_pextrw (__m64 const __A, int const __N)
}
#else
#define _mm_extract_pi16(A, N) \
- ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
+ ((int) (unsigned short) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
#define _m_pextrw(A, N) _mm_extract_pi16(A, N)
#endif
diff --git a/gcc/testsuite/gcc.target/i386/pr98495-1.c b/gcc/testsuite/gcc.target/i386/pr98495-1.c
new file mode 100644
index 00000000000..df24e63d182
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98495-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mmmx -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler-not "movswl" } } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+/* { dg-final { scan-assembler-not "cwtl" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+
+#include <xmmintrin.h>
+
+unsigned int foo16(__m64 x) { return _mm_extract_pi16(x, 3); }
diff --git a/gcc/testsuite/gcc.target/i386/pr98495-2.c b/gcc/testsuite/gcc.target/i386/pr98495-2.c
new file mode 100644
index 00000000000..5fd001eefed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98495-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mmmx -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler-not "movswl" } } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+/* { dg-final { scan-assembler-not "cwtl" } } */
+/* { dg-final { scan-assembler-not "cltq" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+
+#include <xmmintrin.h>
+
+unsigned long long int foo16(__m64 x) { return _mm_extract_pi16(x, 3); }
diff --git a/gcc/testsuite/gcc.target/i386/pr98495-3.c b/gcc/testsuite/gcc.target/i386/pr98495-3.c
new file mode 100644
index 00000000000..458b470cbd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98495-3.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mmmx -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler "(movswl|cwtl)" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+
+#include <xmmintrin.h>
+
+int foo16(__m64 x) { return (short) _mm_extract_pi16(x, 3); }
diff --git a/gcc/testsuite/gcc.target/i386/pr98495-4.c b/gcc/testsuite/gcc.target/i386/pr98495-4.c
new file mode 100644
index 00000000000..16d3cbadb03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98495-4.c
@@ -0,0 +1,8 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mmmx -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler "movswq" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+
+#include <xmmintrin.h>
+
+long long int foo16(__m64 x) { return (short) _mm_extract_pi16(x, 3); }
diff --git a/gcc/testsuite/gcc.target/i386/pr98495-5.c b/gcc/testsuite/gcc.target/i386/pr98495-5.c
new file mode 100644
index 00000000000..a62b42d8eb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98495-5.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -mmmx -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler-not "movswl" } } */
+/* { dg-final { scan-assembler-not "cwtl" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+
+#include <xmmintrin.h>
+
+unsigned int foo16(__m64 x) { return _mm_extract_pi16(x, 3); }
--
2.29.2
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] x86: Cast to unsigned short first for _mm_extract_pi16
2021-01-01 13:34 [PATCH] x86: Cast to unsigned short first for _mm_extract_pi16 H.J. Lu
@ 2021-01-05 3:41 ` Jeff Law
2021-01-05 19:03 ` [PATCH] x86: Use unsigned short to compute pextrw result H.J. Lu
0 siblings, 1 reply; 3+ messages in thread
From: Jeff Law @ 2021-01-05 3:41 UTC (permalink / raw)
To: H.J. Lu, gcc-patches
On 1/1/21 6:34 AM, H.J. Lu via Gcc-patches wrote:
> _mm_extract_pi16 is intrinsic for pextrw, which should be zero-extended,
> not sign-extended.
>
> gcc/
>
> PR target/98495
> * config/i386/xmmintrin.h (_mm_extract_pi16): Cast to unsigned
> short first.
I'd tend to prefer masking with 0xffff rather than relying on the size
of a particular type being what we need. But this header is limited to
just x86 and it doesn't look like there's any variance in the size of a
short, across the x86 platforms.
So, OK.
jeff
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH] x86: Use unsigned short to compute pextrw result
2021-01-05 3:41 ` Jeff Law
@ 2021-01-05 19:03 ` H.J. Lu
0 siblings, 0 replies; 3+ messages in thread
From: H.J. Lu @ 2021-01-05 19:03 UTC (permalink / raw)
To: Jeff Law; +Cc: GCC Patches
[-- Attachment #1: Type: text/plain, Size: 813 bytes --]
On Mon, Jan 4, 2021 at 7:41 PM Jeff Law <law@redhat.com> wrote:
>
>
>
> On 1/1/21 6:34 AM, H.J. Lu via Gcc-patches wrote:
> > _mm_extract_pi16 is intrinsic for pextrw, which should be zero-extended,
> > not sign-extended.
> >
> > gcc/
> >
> > PR target/98495
> > * config/i386/xmmintrin.h (_mm_extract_pi16): Cast to unsigned
> > short first.
> I'd tend to prefer masking with 0xffff rather than relying on the size
> of a particular type being what we need. But this header is limited to
> just x86 and it doesn't look like there's any variance in the size of a
> short, across the x86 platforms.
>
> So, OK.
> jeff
>
I am checking in this patch to use unsigned short to compute the
zero-extended pextrw result. This fixed:
FAIL: gcc.target/i386/sse2-mmx-pextrw.c execution test
--
H.J.
[-- Attachment #2: 0001-x86-Use-unsigned-short-to-compute-pextrw-result.patch --]
[-- Type: text/x-patch, Size: 1059 bytes --]
From 4b3d73a439caffd82eba0a64ee43bae5d5e07de9 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 5 Jan 2021 10:57:20 -0800
Subject: [PATCH] x86: Use unsigned short to compute pextrw result
Use unsigned short to compute the zero-extended pextrw result.
PR target/98495
* gcc.target/i386/sse2-mmx-pextrw.c (compute_correct_result): Use
unsigned short to compute pextrw result.
---
gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
index bb48740a7ca..edbac919fd8 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
@@ -32,7 +32,7 @@ test_pextrw (__m64 *i, unsigned int imm, int *r)
static void
compute_correct_result (__m64 *src_p, unsigned int imm, int *res_p)
{
- short *src = (short *) src_p;
+ unsigned short *src = (unsigned short *) src_p;
if (imm < 4)
*res_p = src[imm];
}
--
2.29.2
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-01-05 19:03 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-01 13:34 [PATCH] x86: Cast to unsigned short first for _mm_extract_pi16 H.J. Lu
2021-01-05 3:41 ` Jeff Law
2021-01-05 19:03 ` [PATCH] x86: Use unsigned short to compute pextrw result H.J. Lu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).