public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2, rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative
@ 2018-04-13 20:21 Paul Clarke
  2018-04-13 22:40 ` Segher Boessenkool
  0 siblings, 1 reply; 6+ messages in thread
From: Paul Clarke @ 2018-04-13 20:21 UTC (permalink / raw)
  To: gcc-patches, Segher Boessenkool

The powerpc versions of _mm_slli_epi32 and __mm_slli_epi64 in emmintrin.h
do not properly handle shift values between 16 and 31, inclusive.
These are setting up the shift with vec_splat_s32, which only accepts
*5 bit signed* shift values, or a range of -16 to 15.  Values above 15
produce an error:

  error: argument 1 must be a 5-bit signed literal

Fix is to effectively reduce the range for which vec_splat_s32 is used
to < 32 and use vec_splats otherwise.

Also, __mm_slli_epi{16,32,64}, when given a negative shift value,
should always return a vector of {0}.

2018-04-13  Paul A. Clarke  <pc@us.ibm.com>

Changes in v2:
- fixed the "shift by 0" cases, which were being treated as negative
  shifts and returning {0} instead of a no-op. (Segher)
- fixed the test cases to correctly test the shift-by zero cases.

gcc/

	PR target/83402
	* config/rs6000/emmintrin.h (_mm_slli_epi{16,32,64}):
	Ensure that vec_splat_s32 is only called with 0 <= shift < 16.
	Ensure negative shifts result in {0}.

gcc/testsuite/

	PR target/83402
	* gcc.target/powerpc/sse2-psllw-1.c: Refactor and add tests for
	several values:  positive, negative, and zero.
	* gcc.target/powerpc/sse2-pslld-1.c: Same.
	* gcc.target/powerpc/sse2-psllq-1.c: Same.

Index: gcc/config/rs6000/emmintrin.h
===================================================================
--- gcc/config/rs6000/emmintrin.h	(revision 259375)
+++ gcc/config/rs6000/emmintrin.h	(working copy)
@@ -1488,7 +1488,7 @@ _mm_slli_epi16 (__m128i __A, int __B)
   __v8hu lshift;
   __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
-  if (__B < 16)
+  if (__B >= 0 && __B < 16)
     {
       if (__builtin_constant_p(__B))
 	  lshift = (__v8hu) vec_splat_s16(__B);
@@ -1507,12 +1507,12 @@ _mm_slli_epi32 (__m128i __A, int __B)
   __v4su lshift;
   __v4si result = { 0, 0, 0, 0 };
 
-  if (__B < 32)
+  if (__B >= 0 && __B < 32)
     {
-      if (__builtin_constant_p(__B))
-	lshift = (__v4su) vec_splat_s32(__B);
+      if (__builtin_constant_p(__B) && __B < 16)
+        lshift = (__v4su) vec_splat_s32(__B);
       else
-	lshift = vec_splats ((unsigned int) __B);
+        lshift = vec_splats ((unsigned int) __B);
 
       result = vec_vslw ((__v4si) __A, lshift);
     }
@@ -1527,17 +1527,12 @@ _mm_slli_epi64 (__m128i __A, int __B)
   __v2du lshift;
   __v2di result = { 0, 0 };
 
-  if (__B < 64)
+  if (__B >= 0 && __B < 64)
     {
-      if (__builtin_constant_p(__B))
-	{
-	  if (__B < 32)
-	      lshift = (__v2du) vec_splat_s32(__B);
-	    else
-	      lshift = (__v2du) vec_splats((unsigned long long)__B);
-	}
+      if (__builtin_constant_p(__B) && __B < 16)
+	lshift = (__v2du) vec_splat_s32(__B);
       else
-	  lshift = (__v2du) vec_splats ((unsigned int) __B);
+	lshift = (__v2du) vec_splats ((unsigned int) __B);
 
       result = vec_vsld ((__v2di) __A, lshift);
     }
Index: gcc/testsuite/gcc.target/powerpc/sse2-pslld-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/sse2-pslld-1.c	(revision 259375)
+++ gcc/testsuite/gcc.target/powerpc/sse2-pslld-1.c	(working copy)
@@ -13,32 +13,50 @@
 #define TEST sse2_test_pslld_1
 #endif
 
-#define N 0xf
-
 #include <emmintrin.h>
 
-static __m128i
-__attribute__((noinline, unused))
-test (__m128i s1)
-{
-  return _mm_slli_epi32 (s1, N); 
-}
+#define TEST_FUNC(id, N) \
+  static __m128i \
+  __attribute__((noinline, unused)) \
+  test##id (__m128i s1) \
+  { \
+    return _mm_slli_epi32 (s1, N);  \
+  }
 
+TEST_FUNC(0, 0)
+TEST_FUNC(15, 15)
+TEST_FUNC(16, 16)
+TEST_FUNC(31, 31)
+TEST_FUNC(neg1, -1)
+TEST_FUNC(neg16, -16)
+TEST_FUNC(neg32, -32)
+TEST_FUNC(neg64, -64)
+TEST_FUNC(neg128, -128)
+
+#define TEST_CODE(id, N) \
+  { \
+    int e[4] = {0}; \
+    union128i_d u, s; \
+    int i; \
+    s.x = _mm_set_epi32 (1, -2, 3, 4); \
+    u.x = test##id (s.x); \
+    if (N >= 0 && N < 32) \
+      for (i = 0; i < 4; i++) \
+        e[i] = s.a[i] << (N * (N >= 0)); \
+    if (check_union128i_d (u, e)) \
+      abort (); \
+  }
+
 static void
 TEST (void)
 {
-  union128i_d u, s;
-  int e[4] = {0};
-  int i;
- 
-  s.x = _mm_set_epi32 (1, -2, 3, 4);
-
-  u.x = test (s.x);
-
-  if (N < 32)
-    for (i = 0; i < 4; i++)
-      e[i] = s.a[i] << N; 
-
-  if (check_union128i_d (u, e))
-    abort (); 
+  TEST_CODE(0, 0);
+  TEST_CODE(15, 15);
+  TEST_CODE(16, 16);
+  TEST_CODE(31, 31);
+  TEST_CODE(neg1, -1);
+  TEST_CODE(neg16, -16);
+  TEST_CODE(neg32, -32);
+  TEST_CODE(neg64, -64);
+  TEST_CODE(neg128, -128);
 }
Index: gcc/testsuite/gcc.target/powerpc/sse2-psllq-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/sse2-psllq-1.c	(revision 259375)
+++ gcc/testsuite/gcc.target/powerpc/sse2-psllq-1.c	(working copy)
@@ -13,36 +13,56 @@
 #define TEST sse2_test_psllq_1
 #endif
 
-#define N 60
-
 #include <emmintrin.h>
 
 #ifdef _ARCH_PWR8
-static __m128i
-__attribute__((noinline, unused))
-test (__m128i s1)
-{
-  return _mm_slli_epi64 (s1, N); 
-}
+#define TEST_FUNC(id, N) \
+  static __m128i \
+  __attribute__((noinline, unused)) \
+  test##id (__m128i s1) \
+  { \
+    return _mm_slli_epi64 (s1, N);  \
+  }
+
+TEST_FUNC(0, 0)
+TEST_FUNC(15, 15)
+TEST_FUNC(16, 16)
+TEST_FUNC(31, 31)
+TEST_FUNC(63, 63)
+TEST_FUNC(neg1, -1)
+TEST_FUNC(neg16, -16)
+TEST_FUNC(neg32, -32)
+TEST_FUNC(neg64, -64)
+TEST_FUNC(neg128, -128)
 #endif
 
+#define TEST_CODE(id, N) \
+  { \
+    union128i_q u, s; \
+    long long e[2] = {0}; \
+    int i; \
+    s.x = _mm_set_epi64x (-1, 0xf); \
+    u.x = test##id (s.x); \
+    if (N >= 0 && N < 64) \
+      for (i = 0; i < 2; i++) \
+        e[i] = s.a[i] << (N * (N >= 0)); \
+    if (check_union128i_q (u, e)) \
+      abort (); \
+  }
+
 static void
 TEST (void)
 {
 #ifdef _ARCH_PWR8
-  union128i_q u, s;
-  long long e[2] = {0};
-  int i;
- 
-  s.x = _mm_set_epi64x (-1, 0xf);
-
-  u.x = test (s.x);
-
-  if (N < 64)
-    for (i = 0; i < 2; i++)
-      e[i] = s.a[i] << N; 
-
-  if (check_union128i_q (u, e))
-    abort (); 
+  TEST_CODE(0, 0);
+  TEST_CODE(15, 15);
+  TEST_CODE(16, 16);
+  TEST_CODE(31, 31);
+  TEST_CODE(63, 63);
+  TEST_CODE(neg1, -1);
+  TEST_CODE(neg16, -16);
+  TEST_CODE(neg32, -32);
+  TEST_CODE(neg64, -64);
+  TEST_CODE(neg128, -128);
 #endif
 }
Index: gcc/testsuite/gcc.target/powerpc/sse2-psllw-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/sse2-psllw-1.c	(revision 259375)
+++ gcc/testsuite/gcc.target/powerpc/sse2-psllw-1.c	(working copy)
@@ -13,32 +13,48 @@
 #define TEST sse2_test_psllw_1
 #endif
 
-#define N 0xb
-
 #include <emmintrin.h>
 
-static __m128i
-__attribute__((noinline, unused))
-test (__m128i s1)
-{
-  return _mm_slli_epi16 (s1, N); 
-}
+#define TEST_FUNC(id, N) \
+  static __m128i \
+  __attribute__((noinline, unused)) \
+  test##id (__m128i s1) \
+  { \
+    return _mm_slli_epi16 (s1, N);  \
+  }
 
+TEST_FUNC(0, 0)
+TEST_FUNC(15, 15)
+TEST_FUNC(16, 16)
+TEST_FUNC(neg1, -1)
+TEST_FUNC(neg16, -16)
+TEST_FUNC(neg32, -32)
+TEST_FUNC(neg64, -64)
+TEST_FUNC(neg128, -128)
+
+#define TEST_CODE(id, N) \
+  { \
+    short e[8] = {0}; \
+    union128i_w u, s; \
+    int i; \
+    s.x = _mm_set_epi16 (1, 2, 3, 4, 5, 6, 0x7000, 0x9000); \
+    u.x = test##id (s.x); \
+    if (N >= 0 && N < 16) \
+      for (i = 0; i < 8; i++) \
+        e[i] = s.a[i] << (N * (N >= 0)); \
+    if (check_union128i_w (u, e)) \
+      abort (); \
+  }
+
 static void
 TEST (void)
 {
-  union128i_w u, s;
-  short e[8] = {0};
-  int i;
- 
-  s.x = _mm_set_epi16 (1, 2, 3, 4, 5, 6, 0x7000, 0x9000);
-
-  u.x = test (s.x);
-
-  if (N < 16)
-    for (i = 0; i < 8; i++)
-      e[i] = s.a[i] << N; 
-
-  if (check_union128i_w (u, e))
-    abort (); 
+  TEST_CODE(0, 0);
+  TEST_CODE(15, 15);
+  TEST_CODE(16, 16);
+  TEST_CODE(neg1, -1);
+  TEST_CODE(neg16, -16);
+  TEST_CODE(neg32, -32);
+  TEST_CODE(neg64, -64);
+  TEST_CODE(neg128, -128);
 }
--
PC

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2, rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative
  2018-04-13 20:21 [PATCH v2, rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative Paul Clarke
@ 2018-04-13 22:40 ` Segher Boessenkool
  2018-04-23 19:47   ` Paul Clarke
  0 siblings, 1 reply; 6+ messages in thread
From: Segher Boessenkool @ 2018-04-13 22:40 UTC (permalink / raw)
  To: Paul Clarke; +Cc: gcc-patches

Hi!

On Fri, Apr 13, 2018 at 03:21:14PM -0500, Paul Clarke wrote:
> -      if (__builtin_constant_p(__B))
> -	lshift = (__v4su) vec_splat_s32(__B);
> +      if (__builtin_constant_p(__B) && __B < 16)
> +        lshift = (__v4su) vec_splat_s32(__B);
>        else
> -	lshift = vec_splats ((unsigned int) __B);
> +        lshift = vec_splats ((unsigned int) __B);

You changed the tab chars to spaces here.  Please don't.  I'll fix it.

Rest looks fine...  Let's see if I manage to commit it :-)

Thanks,


Segher

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2, rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative
  2018-04-13 22:40 ` Segher Boessenkool
@ 2018-04-23 19:47   ` Paul Clarke
  2018-04-23 20:03     ` Segher Boessenkool
  0 siblings, 1 reply; 6+ messages in thread
From: Paul Clarke @ 2018-04-23 19:47 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: gcc-patches

On 04/13/2018 05:40 PM, Segher Boessenkool wrote:
> Rest looks fine...  Let's see if I manage to commit it :-)

Thanks, Segher!

Can I push this to ibm/gcc-7-branch?

PC

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2, rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative
  2018-04-23 19:47   ` Paul Clarke
@ 2018-04-23 20:03     ` Segher Boessenkool
  2018-04-23 20:49       ` Paul Clarke
  2018-05-22  1:52       ` Gerald Pfeifer
  0 siblings, 2 replies; 6+ messages in thread
From: Segher Boessenkool @ 2018-04-23 20:03 UTC (permalink / raw)
  To: Paul Clarke; +Cc: gcc-patches

On Mon, Apr 23, 2018 at 02:23:42PM -0500, Paul Clarke wrote:
> On 04/13/2018 05:40 PM, Segher Boessenkool wrote:
> > Rest looks fine...  Let's see if I manage to commit it :-)
> 
> Thanks, Segher!
> 
> Can I push this to ibm/gcc-7-branch?

Don't ask me, I'm not maintainer of that branch.  I would point you
to the wiki page explaining who owns it, but i cannot find that page.
Ugh.

You would normally get stuff onto the ibm 7 branch by just merging
the fsf 7 branch; is there any reason it cannot go there?


Segher

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2, rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative
  2018-04-23 20:03     ` Segher Boessenkool
@ 2018-04-23 20:49       ` Paul Clarke
  2018-05-22  1:52       ` Gerald Pfeifer
  1 sibling, 0 replies; 6+ messages in thread
From: Paul Clarke @ 2018-04-23 20:49 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: gcc-patches

On 04/23/2018 02:47 PM, Segher Boessenkool wrote:
> On Mon, Apr 23, 2018 at 02:23:42PM -0500, Paul Clarke wrote:
>> Can I push this to ibm/gcc-7-branch?

> Don't ask me, I'm not maintainer of that branch.  I would point you
> to the wiki page explaining who owns it, but i cannot find that page.
> Ugh.

> You would normally get stuff onto the ibm 7 branch by just merging
> the fsf 7 branch; is there any reason it cannot go there?

Yes, the patch applies to code which was itself backported from trunk (GCC 8) and doesn't exist in GCC 7.

PC

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2, rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative
  2018-04-23 20:03     ` Segher Boessenkool
  2018-04-23 20:49       ` Paul Clarke
@ 2018-05-22  1:52       ` Gerald Pfeifer
  1 sibling, 0 replies; 6+ messages in thread
From: Gerald Pfeifer @ 2018-05-22  1:52 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: Paul Clarke, gcc-patches

On Mon, 23 Apr 2018, Segher Boessenkool wrote:
>> Can I push this to ibm/gcc-7-branch?
> Don't ask me, I'm not maintainer of that branch.  I would point you
> to the wiki page explaining who owns it, but i cannot find that page.

https://gcc.gnu.org/svn.html

Except that the ibm/gcc-7-branch is not listed there, just a couple
of other ibm/ branches.

Perhaps you guys can address that?

Gerald

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-05-22  1:42 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-13 20:21 [PATCH v2, rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative Paul Clarke
2018-04-13 22:40 ` Segher Boessenkool
2018-04-23 19:47   ` Paul Clarke
2018-04-23 20:03     ` Segher Boessenkool
2018-04-23 20:49       ` Paul Clarke
2018-05-22  1:52       ` Gerald Pfeifer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).