public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix ICEs with -mxop __builtin_ia32_vpermil2p[sd]{,256} and __builtin_ia32_vprot[bwdq]i intrinsics (PR target/49411)
@ 2011-06-15 10:26 Jakub Jelinek
  2011-06-15 17:00 ` Quentin Neill
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2011-06-15 10:26 UTC (permalink / raw)
  To: Uros Bizjak, Sebastian Pop, Fang, Changpeng; +Cc: gcc-patches

Hi!

All of these _mm{,256}_permute2_p[sd] and _mm_roti_epi{8,16,32,64}
intrinsics ICE if the last argument is constant integer, but not in the
expected range.

I could only find MSFT documentation for these intrinsics, where for
*permute2* it says that the last argument must be 0, 1, 2 or 3,
for *roti* it says that the last argument is integer rotation count,
preferrably constant and that if count is negative, it performs right
rotation instead of left rotation.
This patch adjusts the builtins to match that, if we want to instead
e.g. always mandate _mm_roti_epi* last argument is constant integer,
or constant integer in the range -N+1 .. N-1 where N is the number
after _mm_roti_epi, or in the range 0 .. N-1, it can be easily adjusted.

Regtested on x86_64-linux {-m32,-m64}, unfortunately on a SandyBridge
box, so I couldn't verify if xop-rotate[12]-int.c actually succeeds
on xop capable HW.

2011-06-15  Jakub Jelinek  <jakub@redhat.com>

	PR target/49411
	* config/i386/i386.c (ix86_expand_multi_arg_builtins): If
	last_arg_constant and last argument doesn't match its predicate,
	for xop_vpermil2<mode>3 error out and for xop_rotl<mode>3
	if it is CONST_INT, mask it, otherwise expand using rotl<mode>3.

	* gcc.target/i386/xop-vpermil2px-1.c: New test.
	* gcc.target/i386/xop-vpermil2px-2.c: New test.
	* gcc.target/i386/xop-rotate1-int.c: New test.
	* gcc.target/i386/xop-rotate2-int.c: New test.

--- gcc/config/i386/i386.c.jj	2011-06-09 16:56:56.000000000 +0200
+++ gcc/config/i386/i386.c	2011-06-15 11:17:12.000000000 +0200
@@ -26149,16 +26149,66 @@ ix86_expand_multi_arg_builtin (enum insn
       int adjust = (comparison_p) ? 1 : 0;
       enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
 
-      if (last_arg_constant && i == nargs-1)
+      if (last_arg_constant && i == nargs - 1)
 	{
-	  if (!CONST_INT_P (op))
+	  if (!insn_data[icode].operand[i + 1].predicate (op, mode))
 	    {
-	      error ("last argument must be an immediate");
-	      return gen_reg_rtx (tmode);
+	      enum insn_code new_icode = icode;
+	      switch (icode)
+		{
+		case CODE_FOR_xop_vpermil2v2df3:
+		case CODE_FOR_xop_vpermil2v4sf3:
+		case CODE_FOR_xop_vpermil2v4df3:
+		case CODE_FOR_xop_vpermil2v8sf3:
+		  if (!CONST_INT_P (op))
+		    {
+		      error ("last argument must be an immediate");
+		      return gen_reg_rtx (tmode);
+		    }
+		  error ("last argument must be in the range 0 .. 3");
+		  return gen_reg_rtx (tmode);
+		case CODE_FOR_xop_rotlv2di3:
+		  new_icode = CODE_FOR_rotlv2di3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv4si3:
+		  new_icode = CODE_FOR_rotlv4si3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv8hi3:
+		  new_icode = CODE_FOR_rotlv8hi3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv16qi3:
+		  new_icode = CODE_FOR_rotlv16qi3;
+		xop_rotl:
+		  if (CONST_INT_P (op))
+		    {
+		      int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
+		      op = GEN_INT (INTVAL (op) & mask);
+		      gcc_checking_assert
+			(insn_data[icode].operand[i + 1].predicate (op, mode));
+		    }
+		  else
+		    {
+		      gcc_checking_assert
+			(nargs == 2
+			 && insn_data[new_icode].operand[0].mode == tmode
+			 && insn_data[new_icode].operand[1].mode == tmode
+			 && insn_data[new_icode].operand[2].mode == mode
+			 && insn_data[new_icode].operand[0].predicate
+			    == insn_data[icode].operand[0].predicate
+			 && insn_data[new_icode].operand[1].predicate
+			    == insn_data[icode].operand[1].predicate);
+		      icode = new_icode;
+		      goto non_constant;
+		    }
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
 	    }
 	}
       else
 	{
+	non_constant:
 	  if (VECTOR_MODE_P (mode))
 	    op = safe_vector_operand (op, mode);
 
--- gcc/testsuite/gcc.target/i386/xop-vpermil2px-1.c.jj	2011-06-15 10:18:29.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-vpermil2px-1.c	2011-06-15 10:41:13.000000000 +0200
@@ -0,0 +1,25 @@
+/* PR target/49411 */
+/* { dg-do compile } */
+/* { dg-options "-O0 -mxop" } */
+
+#include <x86intrin.h>
+
+__m128d a1, a2, a3;
+__m256d b1, b2, b3;
+__m128 c1, c2, c3;
+__m256 d1, d2, d3;
+__m128i s;
+__m256i t;
+
+void
+foo (int i)
+{
+  a1 = _mm_permute2_pd (a2, a3, s, 3);
+  b1 = _mm256_permute2_pd (b2, b3, t, 3);
+  c1 = _mm_permute2_ps (c2, c3, s, 3);
+  d1 = _mm256_permute2_ps (d2, d3, t, 3);
+  a1 = _mm_permute2_pd (a2, a3, s, 17);		/* { dg-error "last argument must be in the range 0 .. 3" } */
+  b1 = _mm256_permute2_pd (b2, b3, t, 17);	/* { dg-error "last argument must be in the range 0 .. 3" } */
+  c1 = _mm_permute2_ps (c2, c3, s, 17);		/* { dg-error "last argument must be in the range 0 .. 3" } */
+  d1 = _mm256_permute2_ps (d2, d3, t, 17);	/* { dg-error "last argument must be in the range 0 .. 3" } */
+}
--- gcc/testsuite/gcc.target/i386/xop-vpermil2px-2.c.jj	2011-06-15 10:39:36.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-vpermil2px-2.c	2011-06-15 10:39:44.000000000 +0200
@@ -0,0 +1,21 @@
+/* PR target/49411 */
+/* { dg-do compile } */
+/* { dg-options "-O0 -mxop" } */
+
+#include <x86intrin.h>
+
+__m128d a1, a2, a3;
+__m256d b1, b2, b3;
+__m128 c1, c2, c3;
+__m256 d1, d2, d3;
+__m128i s;
+__m256i t;
+
+void
+foo (int i)
+{
+  a1 = _mm_permute2_pd (a2, a3, s, i);		/* { dg-error "last argument must be an immediate" } */
+  b1 = _mm256_permute2_pd (b2, b3, t, i);	/* { dg-error "last argument must be an immediate" } */
+  c1 = _mm_permute2_ps (c2, c3, s, i);		/* { dg-error "last argument must be an immediate" } */
+  d1 = _mm256_permute2_ps (d2, d3, t, i);	/* { dg-error "last argument must be an immediate" } */
+}
--- gcc/testsuite/gcc.target/i386/xop-rotate1-int.c.jj	2011-06-15 10:47:29.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-rotate1-int.c	2011-06-15 11:25:25.000000000 +0200
@@ -0,0 +1,63 @@
+/* PR target/49411 */
+/* { dg-do run } */
+/* { dg-require-effective-target xop } */
+/* { dg-options "-O2 -mxop" } */
+
+#include "xop-check.h"
+
+#include <x86intrin.h>
+
+extern void abort (void);
+
+union
+{
+  __m128i v;
+  unsigned char c[16];
+  unsigned short s[8];
+  unsigned int i[4];
+  unsigned long long l[2];
+} a, b, c, d;
+
+#define TEST1(F, N, S, SS) \
+do {							\
+  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++)	\
+    a.F[i] = i * 17;					\
+  s = _mm_set1_epi##SS (N);				\
+  b.v = _mm_roti_epi##S (a.v, N);			\
+  c.v = _mm_rot_epi##S (a.v, s);			\
+  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++)	\
+    {							\
+      int mask = __CHAR_BIT__ * sizeof (a.F[i]) - 1;	\
+      d.F[i] = a.F[i] << (N & mask);			\
+      if (N & mask)					\
+	d.F[i] |= a.F[i] >> (mask + 1 - (N & mask));	\
+      if (b.F[i] != c.F[i] || b.F[i] != d.F[i])		\
+	abort ();					\
+    }							\
+} while (0)
+#define TEST(N) \
+  TEST1 (c, N, 8, 8);					\
+  TEST1 (s, N, 16, 16);					\
+  TEST1 (i, N, 32, 32);					\
+  TEST1 (l, N, 64, 64x)
+
+volatile int n;
+
+static void
+xop_test (void)
+{
+  unsigned int i;
+  __m128i s;
+
+#ifndef NON_CONST
+  TEST (5);
+  TEST (-5);
+  TEST (0);
+  TEST (31);
+#else
+  n = 5; TEST (n);
+  n = -5; TEST (n);
+  n = 0; TEST (n);
+  n = 31; TEST (n);
+#endif
+}
--- gcc/testsuite/gcc.target/i386/xop-rotate2-int.c.jj	2011-06-15 11:25:42.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-rotate2-int.c	2011-06-15 11:26:03.000000000 +0200
@@ -0,0 +1,7 @@
+/* PR target/49411 */
+/* { dg-do run } */
+/* { dg-require-effective-target xop } */
+/* { dg-options "-O2 -mxop" } */
+
+#define NON_CONST 1
+#include "xop-rotate1-int.c"

	Jakub

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-06-20 17:03 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-15 10:26 [PATCH] Fix ICEs with -mxop __builtin_ia32_vpermil2p[sd]{,256} and __builtin_ia32_vprot[bwdq]i intrinsics (PR target/49411) Jakub Jelinek
2011-06-15 17:00 ` Quentin Neill
2011-06-16 20:42   ` Quentin Neill
2011-06-16 23:18     ` Quentin Neill
2011-06-17  0:24       ` Jakub Jelinek
2011-06-17 13:16         ` [PATCH] Fix ICEs with out of range immediates in SSE*/AVX*/XOP* " Jakub Jelinek
     [not found]           ` <BANLkTimKVOrTMzuceNrWOPUbHf6H34iQ4Q@mail.gmail.com>
2011-06-17 20:05             ` Fwd: " Uros Bizjak
2011-06-20 17:15           ` Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).