[PATCH] Fix ICEs with -mxop __builtin_ia32_vpermil2p[sd]{,256} and __builtin_ia32_vprot[bwdq]i intrinsics (PR target/49411)

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] Fix ICEs with -mxop __builtin_ia32_vpermil2p[sd]{,256} and __builtin_ia32_vprot[bwdq]i intrinsics (PR target/49411)
@ 2011-06-15 10:26 Jakub Jelinek
  2011-06-15 17:00 ` Quentin Neill
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2011-06-15 10:26 UTC (permalink / raw)
  To: Uros Bizjak, Sebastian Pop, Fang, Changpeng; +Cc: gcc-patches

Hi!

All of these _mm{,256}_permute2_p[sd] and _mm_roti_epi{8,16,32,64}
intrinsics ICE if the last argument is constant integer, but not in the
expected range.

I could only find MSFT documentation for these intrinsics, where for
*permute2* it says that the last argument must be 0, 1, 2 or 3,
for *roti* it says that the last argument is integer rotation count,
preferrably constant and that if count is negative, it performs right
rotation instead of left rotation.
This patch adjusts the builtins to match that, if we want to instead
e.g. always mandate _mm_roti_epi* last argument is constant integer,
or constant integer in the range -N+1 .. N-1 where N is the number
after _mm_roti_epi, or in the range 0 .. N-1, it can be easily adjusted.

Regtested on x86_64-linux {-m32,-m64}, unfortunately on a SandyBridge
box, so I couldn't verify if xop-rotate[12]-int.c actually succeeds
on xop capable HW.

2011-06-15  Jakub Jelinek  <jakub@redhat.com>

	PR target/49411
	* config/i386/i386.c (ix86_expand_multi_arg_builtins): If
	last_arg_constant and last argument doesn't match its predicate,
	for xop_vpermil2<mode>3 error out and for xop_rotl<mode>3
	if it is CONST_INT, mask it, otherwise expand using rotl<mode>3.

	* gcc.target/i386/xop-vpermil2px-1.c: New test.
	* gcc.target/i386/xop-vpermil2px-2.c: New test.
	* gcc.target/i386/xop-rotate1-int.c: New test.
	* gcc.target/i386/xop-rotate2-int.c: New test.

--- gcc/config/i386/i386.c.jj	2011-06-09 16:56:56.000000000 +0200
+++ gcc/config/i386/i386.c	2011-06-15 11:17:12.000000000 +0200
@@ -26149,16 +26149,66 @@ ix86_expand_multi_arg_builtin (enum insn
       int adjust = (comparison_p) ? 1 : 0;
       enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
 
-      if (last_arg_constant && i == nargs-1)
+      if (last_arg_constant && i == nargs - 1)
 	{
-	  if (!CONST_INT_P (op))
+	  if (!insn_data[icode].operand[i + 1].predicate (op, mode))
 	    {
-	      error ("last argument must be an immediate");
-	      return gen_reg_rtx (tmode);
+	      enum insn_code new_icode = icode;
+	      switch (icode)
+		{
+		case CODE_FOR_xop_vpermil2v2df3:
+		case CODE_FOR_xop_vpermil2v4sf3:
+		case CODE_FOR_xop_vpermil2v4df3:
+		case CODE_FOR_xop_vpermil2v8sf3:
+		  if (!CONST_INT_P (op))
+		    {
+		      error ("last argument must be an immediate");
+		      return gen_reg_rtx (tmode);
+		    }
+		  error ("last argument must be in the range 0 .. 3");
+		  return gen_reg_rtx (tmode);
+		case CODE_FOR_xop_rotlv2di3:
+		  new_icode = CODE_FOR_rotlv2di3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv4si3:
+		  new_icode = CODE_FOR_rotlv4si3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv8hi3:
+		  new_icode = CODE_FOR_rotlv8hi3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv16qi3:
+		  new_icode = CODE_FOR_rotlv16qi3;
+		xop_rotl:
+		  if (CONST_INT_P (op))
+		    {
+		      int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
+		      op = GEN_INT (INTVAL (op) & mask);
+		      gcc_checking_assert
+			(insn_data[icode].operand[i + 1].predicate (op, mode));
+		    }
+		  else
+		    {
+		      gcc_checking_assert
+			(nargs == 2
+			 && insn_data[new_icode].operand[0].mode == tmode
+			 && insn_data[new_icode].operand[1].mode == tmode
+			 && insn_data[new_icode].operand[2].mode == mode
+			 && insn_data[new_icode].operand[0].predicate
+			    == insn_data[icode].operand[0].predicate
+			 && insn_data[new_icode].operand[1].predicate
+			    == insn_data[icode].operand[1].predicate);
+		      icode = new_icode;
+		      goto non_constant;
+		    }
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
 	    }
 	}
       else
 	{
+	non_constant:
 	  if (VECTOR_MODE_P (mode))
 	    op = safe_vector_operand (op, mode);
 
--- gcc/testsuite/gcc.target/i386/xop-vpermil2px-1.c.jj	2011-06-15 10:18:29.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-vpermil2px-1.c	2011-06-15 10:41:13.000000000 +0200
@@ -0,0 +1,25 @@
+/* PR target/49411 */
+/* { dg-do compile } */
+/* { dg-options "-O0 -mxop" } */
+
+#include <x86intrin.h>
+
+__m128d a1, a2, a3;
+__m256d b1, b2, b3;
+__m128 c1, c2, c3;
+__m256 d1, d2, d3;
+__m128i s;
+__m256i t;
+
+void
+foo (int i)
+{
+  a1 = _mm_permute2_pd (a2, a3, s, 3);
+  b1 = _mm256_permute2_pd (b2, b3, t, 3);
+  c1 = _mm_permute2_ps (c2, c3, s, 3);
+  d1 = _mm256_permute2_ps (d2, d3, t, 3);
+  a1 = _mm_permute2_pd (a2, a3, s, 17);		/* { dg-error "last argument must be in the range 0 .. 3" } */
+  b1 = _mm256_permute2_pd (b2, b3, t, 17);	/* { dg-error "last argument must be in the range 0 .. 3" } */
+  c1 = _mm_permute2_ps (c2, c3, s, 17);		/* { dg-error "last argument must be in the range 0 .. 3" } */
+  d1 = _mm256_permute2_ps (d2, d3, t, 17);	/* { dg-error "last argument must be in the range 0 .. 3" } */
+}
--- gcc/testsuite/gcc.target/i386/xop-vpermil2px-2.c.jj	2011-06-15 10:39:36.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-vpermil2px-2.c	2011-06-15 10:39:44.000000000 +0200
@@ -0,0 +1,21 @@
+/* PR target/49411 */
+/* { dg-do compile } */
+/* { dg-options "-O0 -mxop" } */
+
+#include <x86intrin.h>
+
+__m128d a1, a2, a3;
+__m256d b1, b2, b3;
+__m128 c1, c2, c3;
+__m256 d1, d2, d3;
+__m128i s;
+__m256i t;
+
+void
+foo (int i)
+{
+  a1 = _mm_permute2_pd (a2, a3, s, i);		/* { dg-error "last argument must be an immediate" } */
+  b1 = _mm256_permute2_pd (b2, b3, t, i);	/* { dg-error "last argument must be an immediate" } */
+  c1 = _mm_permute2_ps (c2, c3, s, i);		/* { dg-error "last argument must be an immediate" } */
+  d1 = _mm256_permute2_ps (d2, d3, t, i);	/* { dg-error "last argument must be an immediate" } */
+}
--- gcc/testsuite/gcc.target/i386/xop-rotate1-int.c.jj	2011-06-15 10:47:29.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-rotate1-int.c	2011-06-15 11:25:25.000000000 +0200
@@ -0,0 +1,63 @@
+/* PR target/49411 */
+/* { dg-do run } */
+/* { dg-require-effective-target xop } */
+/* { dg-options "-O2 -mxop" } */
+
+#include "xop-check.h"
+
+#include <x86intrin.h>
+
+extern void abort (void);
+
+union
+{
+  __m128i v;
+  unsigned char c[16];
+  unsigned short s[8];
+  unsigned int i[4];
+  unsigned long long l[2];
+} a, b, c, d;
+
+#define TEST1(F, N, S, SS) \
+do {							\
+  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++)	\
+    a.F[i] = i * 17;					\
+  s = _mm_set1_epi##SS (N);				\
+  b.v = _mm_roti_epi##S (a.v, N);			\
+  c.v = _mm_rot_epi##S (a.v, s);			\
+  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++)	\
+    {							\
+      int mask = __CHAR_BIT__ * sizeof (a.F[i]) - 1;	\
+      d.F[i] = a.F[i] << (N & mask);			\
+      if (N & mask)					\
+	d.F[i] |= a.F[i] >> (mask + 1 - (N & mask));	\
+      if (b.F[i] != c.F[i] || b.F[i] != d.F[i])		\
+	abort ();					\
+    }							\
+} while (0)
+#define TEST(N) \
+  TEST1 (c, N, 8, 8);					\
+  TEST1 (s, N, 16, 16);					\
+  TEST1 (i, N, 32, 32);					\
+  TEST1 (l, N, 64, 64x)
+
+volatile int n;
+
+static void
+xop_test (void)
+{
+  unsigned int i;
+  __m128i s;
+
+#ifndef NON_CONST
+  TEST (5);
+  TEST (-5);
+  TEST (0);
+  TEST (31);
+#else
+  n = 5; TEST (n);
+  n = -5; TEST (n);
+  n = 0; TEST (n);
+  n = 31; TEST (n);
+#endif
+}
--- gcc/testsuite/gcc.target/i386/xop-rotate2-int.c.jj	2011-06-15 11:25:42.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-rotate2-int.c	2011-06-15 11:26:03.000000000 +0200
@@ -0,0 +1,7 @@
+/* PR target/49411 */
+/* { dg-do run } */
+/* { dg-require-effective-target xop } */
+/* { dg-options "-O2 -mxop" } */
+
+#define NON_CONST 1
+#include "xop-rotate1-int.c"

	Jakub

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Fix ICEs with -mxop __builtin_ia32_vpermil2p[sd]{,256} and __builtin_ia32_vprot[bwdq]i intrinsics (PR target/49411)
  2011-06-15 10:26 [PATCH] Fix ICEs with -mxop __builtin_ia32_vpermil2p[sd]{,256} and __builtin_ia32_vprot[bwdq]i intrinsics (PR target/49411) Jakub Jelinek
@ 2011-06-15 17:00 ` Quentin Neill
  2011-06-16 20:42   ` Quentin Neill
  0 siblings, 1 reply; 8+ messages in thread
From: Quentin Neill @ 2011-06-15 17:00 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Uros Bizjak, Sebastian Pop, Fang, Changpeng, gcc-patches

On Wed, Jun 15, 2011 at 4:54 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> All of these _mm{,256}_permute2_p[sd] and _mm_roti_epi{8,16,32,64}
> intrinsics ICE if the last argument is constant integer, but not in the
> expected range.
>
> I could only find MSFT documentation for these intrinsics, where for
> *permute2* it says that the last argument must be 0, 1, 2 or 3,
> for *roti* it says that the last argument is integer rotation count,
> preferrably constant and that if count is negative, it performs right
> rotation instead of left rotation.
> This patch adjusts the builtins to match that, if we want to instead
> e.g. always mandate _mm_roti_epi* last argument is constant integer,
> or constant integer in the range -N+1 .. N-1 where N is the number
> after _mm_roti_epi, or in the range 0 .. N-1, it can be easily adjusted.
>
> Regtested on x86_64-linux {-m32,-m64}, unfortunately on a SandyBridge
> box, so I couldn't verify if xop-rotate[12]-int.c actually succeeds
> on xop capable HW.
>
> 2011-06-15  Jakub Jelinek  <jakub@redhat.com>
>
>        PR target/49411
>        * config/i386/i386.c (ix86_expand_multi_arg_builtins): If
>        last_arg_constant and last argument doesn't match its predicate,
>        for xop_vpermil2<mode>3 error out and for xop_rotl<mode>3
>        if it is CONST_INT, mask it, otherwise expand using rotl<mode>3.
>
>        * gcc.target/i386/xop-vpermil2px-1.c: New test.
>        * gcc.target/i386/xop-vpermil2px-2.c: New test.
>        * gcc.target/i386/xop-rotate1-int.c: New test.
>        * gcc.target/i386/xop-rotate2-int.c: New test.
>
> --- gcc/config/i386/i386.c.jj   2011-06-09 16:56:56.000000000 +0200
> +++ gcc/config/i386/i386.c      2011-06-15 11:17:12.000000000 +0200
> @@ -26149,16 +26149,66 @@ ix86_expand_multi_arg_builtin (enum insn
>       int adjust = (comparison_p) ? 1 : 0;
>       enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
>
> -      if (last_arg_constant && i == nargs-1)
> +      if (last_arg_constant && i == nargs - 1)
>        {
> -         if (!CONST_INT_P (op))
> +         if (!insn_data[icode].operand[i + 1].predicate (op, mode))
>            {
> -             error ("last argument must be an immediate");
> -             return gen_reg_rtx (tmode);
> +             enum insn_code new_icode = icode;
> +             switch (icode)
> +               {
> +               case CODE_FOR_xop_vpermil2v2df3:
> +               case CODE_FOR_xop_vpermil2v4sf3:
> +               case CODE_FOR_xop_vpermil2v4df3:
> +               case CODE_FOR_xop_vpermil2v8sf3:
> +                 if (!CONST_INT_P (op))
> +                   {
> +                     error ("last argument must be an immediate");
> +                     return gen_reg_rtx (tmode);
> +                   }
> +                 error ("last argument must be in the range 0 .. 3");
> +                 return gen_reg_rtx (tmode);
> +               case CODE_FOR_xop_rotlv2di3:
> +                 new_icode = CODE_FOR_rotlv2di3;
> +                 goto xop_rotl;
> +               case CODE_FOR_xop_rotlv4si3:
> +                 new_icode = CODE_FOR_rotlv4si3;
> +                 goto xop_rotl;
> +               case CODE_FOR_xop_rotlv8hi3:
> +                 new_icode = CODE_FOR_rotlv8hi3;
> +                 goto xop_rotl;
> +               case CODE_FOR_xop_rotlv16qi3:
> +                 new_icode = CODE_FOR_rotlv16qi3;
> +               xop_rotl:
> +                 if (CONST_INT_P (op))
> +                   {
> +                     int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
> +                     op = GEN_INT (INTVAL (op) & mask);
> +                     gcc_checking_assert
> +                       (insn_data[icode].operand[i + 1].predicate (op, mode));
> +                   }
> +                 else
> +                   {
> +                     gcc_checking_assert
> +                       (nargs == 2
> +                        && insn_data[new_icode].operand[0].mode == tmode
> +                        && insn_data[new_icode].operand[1].mode == tmode
> +                        && insn_data[new_icode].operand[2].mode == mode
> +                        && insn_data[new_icode].operand[0].predicate
> +                           == insn_data[icode].operand[0].predicate
> +                        && insn_data[new_icode].operand[1].predicate
> +                           == insn_data[icode].operand[1].predicate);
> +                     icode = new_icode;
> +                     goto non_constant;
> +                   }
> +                 break;
> +               default:
> +                 gcc_unreachable ();
> +               }
>            }
>        }
>       else
>        {
> +       non_constant:
>          if (VECTOR_MODE_P (mode))
>            op = safe_vector_operand (op, mode);
>
> --- gcc/testsuite/gcc.target/i386/xop-vpermil2px-1.c.jj 2011-06-15 10:18:29.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/xop-vpermil2px-1.c    2011-06-15 10:41:13.000000000 +0200
> @@ -0,0 +1,25 @@
> +/* PR target/49411 */
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -mxop" } */
> +
> +#include <x86intrin.h>
> +
> +__m128d a1, a2, a3;
> +__m256d b1, b2, b3;
> +__m128 c1, c2, c3;
> +__m256 d1, d2, d3;
> +__m128i s;
> +__m256i t;
> +
> +void
> +foo (int i)
> +{
> +  a1 = _mm_permute2_pd (a2, a3, s, 3);
> +  b1 = _mm256_permute2_pd (b2, b3, t, 3);
> +  c1 = _mm_permute2_ps (c2, c3, s, 3);
> +  d1 = _mm256_permute2_ps (d2, d3, t, 3);
> +  a1 = _mm_permute2_pd (a2, a3, s, 17);                /* { dg-error "last argument must be in the range 0 .. 3" } */
> +  b1 = _mm256_permute2_pd (b2, b3, t, 17);     /* { dg-error "last argument must be in the range 0 .. 3" } */
> +  c1 = _mm_permute2_ps (c2, c3, s, 17);                /* { dg-error "last argument must be in the range 0 .. 3" } */
> +  d1 = _mm256_permute2_ps (d2, d3, t, 17);     /* { dg-error "last argument must be in the range 0 .. 3" } */
> +}
> --- gcc/testsuite/gcc.target/i386/xop-vpermil2px-2.c.jj 2011-06-15 10:39:36.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/xop-vpermil2px-2.c    2011-06-15 10:39:44.000000000 +0200
> @@ -0,0 +1,21 @@
> +/* PR target/49411 */
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -mxop" } */
> +
> +#include <x86intrin.h>
> +
> +__m128d a1, a2, a3;
> +__m256d b1, b2, b3;
> +__m128 c1, c2, c3;
> +__m256 d1, d2, d3;
> +__m128i s;
> +__m256i t;
> +
> +void
> +foo (int i)
> +{
> +  a1 = _mm_permute2_pd (a2, a3, s, i);         /* { dg-error "last argument must be an immediate" } */
> +  b1 = _mm256_permute2_pd (b2, b3, t, i);      /* { dg-error "last argument must be an immediate" } */
> +  c1 = _mm_permute2_ps (c2, c3, s, i);         /* { dg-error "last argument must be an immediate" } */
> +  d1 = _mm256_permute2_ps (d2, d3, t, i);      /* { dg-error "last argument must be an immediate" } */
> +}
> --- gcc/testsuite/gcc.target/i386/xop-rotate1-int.c.jj  2011-06-15 10:47:29.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/xop-rotate1-int.c     2011-06-15 11:25:25.000000000 +0200
> @@ -0,0 +1,63 @@
> +/* PR target/49411 */
> +/* { dg-do run } */
> +/* { dg-require-effective-target xop } */
> +/* { dg-options "-O2 -mxop" } */
> +
> +#include "xop-check.h"
> +
> +#include <x86intrin.h>
> +
> +extern void abort (void);
> +
> +union
> +{
> +  __m128i v;
> +  unsigned char c[16];
> +  unsigned short s[8];
> +  unsigned int i[4];
> +  unsigned long long l[2];
> +} a, b, c, d;
> +
> +#define TEST1(F, N, S, SS) \
> +do {                                                   \
> +  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++) \
> +    a.F[i] = i * 17;                                   \
> +  s = _mm_set1_epi##SS (N);                            \
> +  b.v = _mm_roti_epi##S (a.v, N);                      \
> +  c.v = _mm_rot_epi##S (a.v, s);                       \
> +  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++) \
> +    {                                                  \
> +      int mask = __CHAR_BIT__ * sizeof (a.F[i]) - 1;   \
> +      d.F[i] = a.F[i] << (N & mask);                   \
> +      if (N & mask)                                    \
> +       d.F[i] |= a.F[i] >> (mask + 1 - (N & mask));    \
> +      if (b.F[i] != c.F[i] || b.F[i] != d.F[i])                \
> +       abort ();                                       \
> +    }                                                  \
> +} while (0)
> +#define TEST(N) \
> +  TEST1 (c, N, 8, 8);                                  \
> +  TEST1 (s, N, 16, 16);                                        \
> +  TEST1 (i, N, 32, 32);                                        \
> +  TEST1 (l, N, 64, 64x)
> +
> +volatile int n;
> +
> +static void
> +xop_test (void)
> +{
> +  unsigned int i;
> +  __m128i s;
> +
> +#ifndef NON_CONST
> +  TEST (5);
> +  TEST (-5);
> +  TEST (0);
> +  TEST (31);
> +#else
> +  n = 5; TEST (n);
> +  n = -5; TEST (n);
> +  n = 0; TEST (n);
> +  n = 31; TEST (n);
> +#endif
> +}
> --- gcc/testsuite/gcc.target/i386/xop-rotate2-int.c.jj  2011-06-15 11:25:42.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/xop-rotate2-int.c     2011-06-15 11:26:03.000000000 +0200
> @@ -0,0 +1,7 @@
> +/* PR target/49411 */
> +/* { dg-do run } */
> +/* { dg-require-effective-target xop } */
> +/* { dg-options "-O2 -mxop" } */
> +
> +#define NON_CONST 1
> +#include "xop-rotate1-int.c"
>
>        Jakub
>

I will test on AMD HW.
-- 
Quentin

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Fix ICEs with -mxop __builtin_ia32_vpermil2p[sd]{,256} and __builtin_ia32_vprot[bwdq]i intrinsics (PR target/49411)
  2011-06-15 17:00 ` Quentin Neill
@ 2011-06-16 20:42   ` Quentin Neill
  2011-06-16 23:18     ` Quentin Neill
  0 siblings, 1 reply; 8+ messages in thread
From: Quentin Neill @ 2011-06-16 20:42 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Uros Bizjak, Sebastian Pop, Fang, Changpeng, gcc-patches

On Wed, Jun 15, 2011 at 11:40 AM, Quentin Neill
<quentin.neill.gnu@gmail.com> wrote:
> On Wed, Jun 15, 2011 at 4:54 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>> Hi!
>>
>> All of these _mm{,256}_permute2_p[sd] and _mm_roti_epi{8,16,32,64}
>> intrinsics ICE if the last argument is constant integer, but not in the
>> expected range.
>>
>> I could only find MSFT documentation for these intrinsics, where for
>> *permute2* it says that the last argument must be 0, 1, 2 or 3,
>> for *roti* it says that the last argument is integer rotation count,
>> preferrably constant and that if count is negative, it performs right
>> rotation instead of left rotation.
>> This patch adjusts the builtins to match that, if we want to instead
>> e.g. always mandate _mm_roti_epi* last argument is constant integer,
>> or constant integer in the range -N+1 .. N-1 where N is the number
>> after _mm_roti_epi, or in the range 0 .. N-1, it can be easily adjusted.
>>
>> Regtested on x86_64-linux {-m32,-m64}, unfortunately on a SandyBridge
>> box, so I couldn't verify if xop-rotate[12]-int.c actually succeeds
>> on xop capable HW.
>>
>> [snip]
>>
>>        Jakub
>>
>
> I will test on AMD HW.
> --
> Quentin

Regtested on x86_64-linux on AMD Family 16h, and verified the
xop-rotate[12]-int tests ran and passed.
-- 
Quentin

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Fix ICEs with -mxop __builtin_ia32_vpermil2p[sd]{,256} and __builtin_ia32_vprot[bwdq]i intrinsics (PR target/49411)
  2011-06-16 20:42   ` Quentin Neill
@ 2011-06-16 23:18     ` Quentin Neill
  2011-06-17  0:24       ` Jakub Jelinek
  0 siblings, 1 reply; 8+ messages in thread
From: Quentin Neill @ 2011-06-16 23:18 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Uros Bizjak, Sebastian Pop, Fang, Changpeng, gcc-patches

On Thu, Jun 16, 2011 at 3:34 PM, Quentin Neill
<quentin.neill.gnu@gmail.com> wrote:
> On Wed, Jun 15, 2011 at 11:40 AM, Quentin Neill
> <quentin.neill.gnu@gmail.com> wrote:
>> On Wed, Jun 15, 2011 at 4:54 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>>> Hi!
>>>
>>> All of these _mm{,256}_permute2_p[sd] and _mm_roti_epi{8,16,32,64}
>>> intrinsics ICE if the last argument is constant integer, but not in the
>>> expected range.
>>>
>>> I could only find MSFT documentation for these intrinsics, where for
>>> *permute2* it says that the last argument must be 0, 1, 2 or 3,
>>> for *roti* it says that the last argument is integer rotation count,
>>> preferrably constant and that if count is negative, it performs right
>>> rotation instead of left rotation.
>>> This patch adjusts the builtins to match that, if we want to instead
>>> e.g. always mandate _mm_roti_epi* last argument is constant integer,
>>> or constant integer in the range -N+1 .. N-1 where N is the number
>>> after _mm_roti_epi, or in the range 0 .. N-1, it can be easily adjusted.
>>>
>>> Regtested on x86_64-linux {-m32,-m64}, unfortunately on a SandyBridge
>>> box, so I couldn't verify if xop-rotate[12]-int.c actually succeeds
>>> on xop capable HW.
>>>
>>> [snip]
>>>
>>>        Jakub
>>>
>>
>> I will test on AMD HW.
>> --
>> Quentin
>
> Regtested on x86_64-linux on AMD Family 16h, and verified the
> xop-rotate[12]-int tests ran and passed.
> --
> Quentin

Does it need to also handle the VCVTP[SH]2P[HS] insns like this?

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 675888f..584f722 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25571,6 +25571,10 @@ ix86_expand_multi_arg_builtin (enum insn_code
icode, tree exp, rtx target,
                case CODE_FOR_xop_vpermil2v4sf3:
                case CODE_FOR_xop_vpermil2v4df3:
                case CODE_FOR_xop_vpermil2v8sf3:
+               case CODE_FOR_vcvtph2ps:
+               case CODE_FOR_vcvtph2ps256:
+               case CODE_FOR_vcvtps2ph:
+               case CODE_FOR_vcvtps2ph256:
                  if (!CONST_INT_P (op))
                    {
                      error ("last argument must be an immediate");

-- 
Quentin

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Fix ICEs with -mxop __builtin_ia32_vpermil2p[sd]{,256} and __builtin_ia32_vprot[bwdq]i intrinsics (PR target/49411)
  2011-06-16 23:18     ` Quentin Neill
@ 2011-06-17  0:24       ` Jakub Jelinek
  2011-06-17 13:16         ` [PATCH] Fix ICEs with out of range immediates in SSE*/AVX*/XOP* " Jakub Jelinek
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2011-06-17  0:24 UTC (permalink / raw)
  To: Uros Bizjak, Quentin Neill; +Cc: Sebastian Pop, Fang, Changpeng, gcc-patches

On Thu, Jun 16, 2011 at 05:57:12PM -0500, Quentin Neill wrote:
> Does it need to also handle the VCVTP[SH]2P[HS] insns like this?
> 
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 675888f..584f722 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -25571,6 +25571,10 @@ ix86_expand_multi_arg_builtin (enum insn_code
> icode, tree exp, rtx target,
>                 case CODE_FOR_xop_vpermil2v4sf3:
>                 case CODE_FOR_xop_vpermil2v4df3:
>                 case CODE_FOR_xop_vpermil2v8sf3:
> +               case CODE_FOR_vcvtph2ps:
> +               case CODE_FOR_vcvtph2ps256:
> +               case CODE_FOR_vcvtps2ph:
> +               case CODE_FOR_vcvtps2ph256:
>                   if (!CONST_INT_P (op))
>                     {
>                       error ("last argument must be an immediate");

Not here, those are handled by  ix86_expand_args_builtin
instead of ix86_expand_multi_arg_builtin.  Furthermore, only
CODE_FOR_vcvtps2ph and CODE_FOR_vcvtps2ph256 have CONST_INT argument.
And I believe ix86_expand_args_builtin handles it fine, what's wrong
is the actual predicates those insns use.

E.g.
#include <x86intrin.h>
__m128i a;
__m128 b;
void
foo (int i)
{
  a = _mm_cvtps_ph (b, 256);
//  a = _mm_cvtps_ph (b, i);
}
ICEs during reload.  I don't see how reload could ever fix up
if there is an CONST_INT operand which isn't from 0..255 range
to make that operand in range.  When the operand is non-constant,
ix86_expand_args_builtin correctly errors out that the
operand must be an 8-bit immediate.

So something like untested:

2011-06-17  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (vcvtps2ph, *vcvtps2ph, *vcvtps2ph_store,
	vcvtps2ph256): Use const_0_to_255_operand predicate instead of
	immediate_operand.

--- gcc/config/i386/sse.md	2011-06-06 10:24:40.000000000 +0200
+++ gcc/config/i386/sse.md	2011-06-17 01:19:45.371681174 +0200
@@ -10290,7 +10290,7 @@
   [(set (match_operand:V8HI 0 "register_operand" "")
 	(vec_concat:V8HI
 	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
-			(match_operand:SI 2 "immediate_operand" "")]
+			(match_operand:SI 2 "const_0_to_255_operand" "")]
 		       UNSPEC_VCVTPS2PH)
 	  (match_dup 3)))]
   "TARGET_F16C"
@@ -10300,7 +10300,7 @@
   [(set (match_operand:V8HI 0 "register_operand" "=x")
 	(vec_concat:V8HI
 	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
-			(match_operand:SI 2 "immediate_operand" "N")]
+			(match_operand:SI 2 "const_0_to_255_operand" "N")]
 		       UNSPEC_VCVTPS2PH)
 	  (match_operand:V4HI 3 "const0_operand" "")))]
   "TARGET_F16C"
@@ -10312,7 +10312,7 @@
 (define_insn "*vcvtps2ph_store"
   [(set (match_operand:V4HI 0 "memory_operand" "=m")
 	(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
-		      (match_operand:SI 2 "immediate_operand" "N")]
+		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
 		     UNSPEC_VCVTPS2PH))]
   "TARGET_F16C"
   "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
@@ -10323,7 +10323,7 @@
 (define_insn "vcvtps2ph256"
   [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
 	(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
-		      (match_operand:SI 2 "immediate_operand" "N")]
+		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
 		     UNSPEC_VCVTPS2PH))]
   "TARGET_F16C"
   "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"


	Jakub

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] Fix ICEs with out of range immediates in SSE*/AVX*/XOP* intrinsics (PR target/49411)
  2011-06-17  0:24       ` Jakub Jelinek
@ 2011-06-17 13:16         ` Jakub Jelinek
       [not found]           ` <BANLkTimKVOrTMzuceNrWOPUbHf6H34iQ4Q@mail.gmail.com>
  2011-06-20 17:15           ` Kirill Yukhin
  0 siblings, 2 replies; 8+ messages in thread
From: Jakub Jelinek @ 2011-06-17 13:16 UTC (permalink / raw)
  To: Uros Bizjak, Quentin Neill; +Cc: Sebastian Pop, Fang, Changpeng, gcc-patches

On Fri, Jun 17, 2011 at 01:31:14AM +0200, Jakub Jelinek wrote:
> Not here, those are handled by  ix86_expand_args_builtin
> instead of ix86_expand_multi_arg_builtin.  Furthermore, only
> CODE_FOR_vcvtps2ph and CODE_FOR_vcvtps2ph256 have CONST_INT argument.
> And I believe ix86_expand_args_builtin handles it fine, what's wrong
> is the actual predicates those insns use.

Ok, had a deeper look into this and it seems there are other issues,
some of them even without test coverage regressed since 4.6.
Some problems result in ICEs, other fail to assemble.  Had to revert
the blendbits removal patch, because that removal results in out of
range immediates not to be reported as predicate failures, but instead
as ICEs.

So here is an updated patch that adds test coverage.  Regtested
on x86_64-linux {-m32,-m64}, ok for trunk (and backport for 4.6)?

There are still a couple of things I'm unsure about (not tested
by the testcases, compile fine):
#include <x86intrin.h>
__m128i i1, i2, i3, i4;
__m128 a1, a2, a3, a4;
__m128d d1, d2, d3, d4;
__m256i l1, l2, l3, l4;
__m256 b1, b2, b3, b4;
__m256d e1, e2, e3, e4;
__m64 m1, m2, m3, m4;
int k1, k2, k3, k4;
float f1, f2, f3, f4;
void
foo (void)
{
  /* 8 bit imm only?  This compiles fine, but one ends up with
     number modulo 256 in the insn.  To make it error out
     const_0_to_255_operand would need to be used.  */
  e1 = _mm256_shuffle_pd (e2, e3, 256);
  b1 = _mm256_shuffle_ps (b2, b3, 256);
  i1 = _mm_shuffle_epi32 (i2, 256);
  i1 = _mm_shufflehi_epi16 (i2, 256);
  i1 = _mm_shufflelo_epi16 (i2, 256);
  d1 = _mm_shuffle_pd (d2, d3, 256);
  m1 = _mm_shuffle_pi16 (m2, 256);
  a1 = _mm_shuffle_ps (a2, a3, 256);
  /* What about these?  Similarly to the above, they result
     in imm modulo 16 resp. imm modulo 4.  */
  e1 = _mm256_permute_pd (e2, 16);
  d1 = _mm_permute_pd (d2, 4);
}

2011-06-17  Jakub Jelinek  <jakub@redhat.com>

	PR target/49411
	* config/i386/i386.c (ix86_expand_multi_arg_builtins): If
	last_arg_constant and last argument doesn't match its predicate,
	for xop_vpermil2<mode>3 error out and for xop_rotl<mode>3
	if it is CONST_INT, mask it, otherwise expand using rotl<mode>3.
	(ix86_expand_sse_pcmpestr, ix86_expand_sse_pcmpistr): Fix
	spelling of error message.
	* config/i386/sse.md (sse4a_extrqi, sse4a_insertqi,
	vcvtps2ph, *vcvtps2ph, *vcvtps2ph_store, vcvtps2ph256): Use
	const_0_to_255_operand instead of const_int_operand.

	Revert:
	2011-05-09  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/sse.md (blendbits): Remove mode attribute.
	(<sse4_1>_blend<ssemodesuffix><avxsizesuffix>): Use const_int_operand
	instead of const_0_to_<blendbits>_operand for operand 3 predicate.
	Check integer value of operand 3 in insn constraint.

	* gcc.target/i386/testimm-1.c: New test.
	* gcc.target/i386/testimm-2.c: New test.
	* gcc.target/i386/testimm-3.c: New test.
	* gcc.target/i386/testimm-4.c: New test.
	* gcc.target/i386/testimm-5.c: New test.
	* gcc.target/i386/testimm-6.c: New test.
	* gcc.target/i386/testimm-7.c: New test.
	* gcc.target/i386/testimm-8.c: New test.
	* gcc.target/i386/xop-vpermil2px-2.c: New test.
	* gcc.target/i386/xop-rotate1-int.c: New test.
	* gcc.target/i386/xop-rotate2-int.c: New test.

--- gcc/config/i386/i386.c.jj	2011-06-17 11:02:11.000000000 +0200
+++ gcc/config/i386/i386.c	2011-06-17 13:35:26.000000000 +0200
@@ -25566,16 +25566,61 @@ ix86_expand_multi_arg_builtin (enum insn
       int adjust = (comparison_p) ? 1 : 0;
       enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
 
-      if (last_arg_constant && i == nargs-1)
+      if (last_arg_constant && i == nargs - 1)
 	{
-	  if (!CONST_INT_P (op))
+	  if (!insn_data[icode].operand[i + 1].predicate (op, mode))
 	    {
-	      error ("last argument must be an immediate");
-	      return gen_reg_rtx (tmode);
+	      enum insn_code new_icode = icode;
+	      switch (icode)
+		{
+		case CODE_FOR_xop_vpermil2v2df3:
+		case CODE_FOR_xop_vpermil2v4sf3:
+		case CODE_FOR_xop_vpermil2v4df3:
+		case CODE_FOR_xop_vpermil2v8sf3:
+		  error ("the last argument must be a 2-bit immediate");
+		  return gen_reg_rtx (tmode);
+		case CODE_FOR_xop_rotlv2di3:
+		  new_icode = CODE_FOR_rotlv2di3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv4si3:
+		  new_icode = CODE_FOR_rotlv4si3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv8hi3:
+		  new_icode = CODE_FOR_rotlv8hi3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv16qi3:
+		  new_icode = CODE_FOR_rotlv16qi3;
+		xop_rotl:
+		  if (CONST_INT_P (op))
+		    {
+		      int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
+		      op = GEN_INT (INTVAL (op) & mask);
+		      gcc_checking_assert
+			(insn_data[icode].operand[i + 1].predicate (op, mode));
+		    }
+		  else
+		    {
+		      gcc_checking_assert
+			(nargs == 2
+			 && insn_data[new_icode].operand[0].mode == tmode
+			 && insn_data[new_icode].operand[1].mode == tmode
+			 && insn_data[new_icode].operand[2].mode == mode
+			 && insn_data[new_icode].operand[0].predicate
+			    == insn_data[icode].operand[0].predicate
+			 && insn_data[new_icode].operand[1].predicate
+			    == insn_data[icode].operand[1].predicate);
+		      icode = new_icode;
+		      goto non_constant;
+		    }
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
 	    }
 	}
       else
 	{
+	non_constant:
 	  if (VECTOR_MODE_P (mode))
 	    op = safe_vector_operand (op, mode);
 
@@ -25900,7 +25945,7 @@ ix86_expand_sse_pcmpestr (const struct b
 
   if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
     {
-      error ("the fifth argument must be a 8-bit immediate");
+      error ("the fifth argument must be an 8-bit immediate");
       return const0_rtx;
     }
 
@@ -25995,7 +26040,7 @@ ix86_expand_sse_pcmpistr (const struct b
 
   if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
     {
-      error ("the third argument must be a 8-bit immediate");
+      error ("the third argument must be an 8-bit immediate");
       return const0_rtx;
     }
 
--- gcc/config/i386/sse.md.jj	2011-06-17 11:02:11.000000000 +0200
+++ gcc/config/i386/sse.md	2011-06-17 14:14:09.000000000 +0200
@@ -188,6 +188,10 @@ (define_mode_iterator AVX256MODE2P [V8SI
 
 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
 
+;; Mapping of immediate bits for blend instructions
+(define_mode_attr blendbits
+  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
+
 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -7707,8 +7711,8 @@ (define_insn "sse4a_vmmovnt<mode>"
 (define_insn "sse4a_extrqi"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
-                      (match_operand 2 "const_int_operand" "")
-                      (match_operand 3 "const_int_operand" "")]
+                      (match_operand 2 "const_0_to_255_operand" "")
+                      (match_operand 3 "const_0_to_255_operand" "")]
                      UNSPEC_EXTRQI))]
   "TARGET_SSE4A"
   "extrq\t{%3, %2, %0|%0, %2, %3}"
@@ -7732,8 +7736,8 @@ (define_insn "sse4a_insertqi"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
         	      (match_operand:V2DI 2 "register_operand" "x")
-                      (match_operand 3 "const_int_operand" "")
-                      (match_operand 4 "const_int_operand" "")]
+                      (match_operand 3 "const_0_to_255_operand" "")
+                      (match_operand 4 "const_0_to_255_operand" "")]
                      UNSPEC_INSERTQI))]
   "TARGET_SSE4A"
   "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
@@ -7766,9 +7770,8 @@ (define_insn "<sse4_1>_blend<ssemodesuff
 	(vec_merge:VF
 	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
 	  (match_operand:VF 1 "register_operand" "0,x")
-	  (match_operand:SI 3 "const_int_operand" "")))]
-  "TARGET_SSE4_1
-   && IN_RANGE (INTVAL (operands[3]), 0, (1 << GET_MODE_NUNITS (<MODE>mode))-1)"
+	  (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
+  "TARGET_SSE4_1"
   "@
    blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
    vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
@@ -10327,7 +10330,7 @@ (define_expand "vcvtps2ph"
   [(set (match_operand:V8HI 0 "register_operand" "")
 	(vec_concat:V8HI
 	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
-			(match_operand:SI 2 "immediate_operand" "")]
+			(match_operand:SI 2 "const_0_to_255_operand" "")]
 		       UNSPEC_VCVTPS2PH)
 	  (match_dup 3)))]
   "TARGET_F16C"
@@ -10337,7 +10340,7 @@ (define_insn "*vcvtps2ph"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
 	(vec_concat:V8HI
 	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
-			(match_operand:SI 2 "immediate_operand" "N")]
+			(match_operand:SI 2 "const_0_to_255_operand" "N")]
 		       UNSPEC_VCVTPS2PH)
 	  (match_operand:V4HI 3 "const0_operand" "")))]
   "TARGET_F16C"
@@ -10349,7 +10352,7 @@ (define_insn "*vcvtps2ph"
 (define_insn "*vcvtps2ph_store"
   [(set (match_operand:V4HI 0 "memory_operand" "=m")
 	(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
-		      (match_operand:SI 2 "immediate_operand" "N")]
+		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
 		     UNSPEC_VCVTPS2PH))]
   "TARGET_F16C"
   "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
@@ -10360,7 +10363,7 @@ (define_insn "*vcvtps2ph_store"
 (define_insn "vcvtps2ph256"
   [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
 	(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
-		      (match_operand:SI 2 "immediate_operand" "N")]
+		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
 		     UNSPEC_VCVTPS2PH))]
   "TARGET_F16C"
   "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
--- gcc/testsuite/gcc.target/i386/testimm-1.c.jj	2011-06-17 13:37:44.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/testimm-1.c	2011-06-17 14:01:34.000000000 +0200
@@ -0,0 +1,94 @@
+/* PR target/49411 */
+/* { dg-do compile } */
+/* { dg-options "-O0 -mf16c -maes -mpclmul" } */
+
+#include <x86intrin.h>
+
+__m128i i1, i2, i3, i4;
+__m128 a1, a2, a3, a4;
+__m128d d1, d2, d3, d4;
+__m256i l1, l2, l3, l4;
+__m256 b1, b2, b3, b4;
+__m256d e1, e2, e3, e4;
+__m64 m1, m2, m3, m4;
+int k1, k2, k3, k4;
+float f1, f2, f3, f4;
+
+void
+test8bit (void)
+{
+  i1 = _mm_cmpistrm (i2, i3, 256);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistri (i2, i3, 256);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistra (i2, i3, 256);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistrc (i2, i3, 256);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistro (i2, i3, 256);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistrs (i2, i3, 256);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistrz (i2, i3, 256);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  i1 = _mm_cmpestrm (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestri (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestra (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestrc (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestro (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestrs (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestrz (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  b1 = _mm256_blend_ps (b2, b3, 256);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  k1 = _cvtss_sh (f1, 256);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm256_cvtps_ph (b2, 256);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  b1 = _mm256_dp_ps (b2, b3, 256);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  e1 = _mm256_permute2f128_pd (e2, e3, 256);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  b1 = _mm256_permute2f128_ps (b2, b3, 256);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  l1 = _mm256_permute2f128_si256 (l2, l3, 256);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  b1 = _mm256_permute_ps (b2, 256);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_aeskeygenassist_si128 (i2, 256);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_blend_epi16 (i2, i3, 256);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_clmulepi64_si128 (i2, i3, 256);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_cvtps_ph (a1, 256);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  d1 = _mm_dp_pd (d2, d3, 256);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  a1 = _mm_dp_ps (a2, a3, 256);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  a1 = _mm_insert_ps (a2, a3, 256);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_mpsadbw_epu8 (i2, i3, 256);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  a1 = _mm_permute_ps (a2, 256);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_slli_si128 (i2, 256);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_srli_si128 (i2, 256);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+}
+
+void
+test5bit (void)
+{
+  d1 = _mm_cmp_sd (d2, d3, 32);		  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  a1 = _mm_cmp_ss (a2, a3, 32);		  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  d1 = _mm_cmp_pd (d2, d3, 32);		  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  a1 = _mm_cmp_ps (a2, a3, 32);		  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  e1 = _mm256_cmp_pd (e2, e3, 32);	  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  b1 = _mm256_cmp_ps (b2, b3, 32);	  /* { dg-error "the last argument must be a 5-bit immediate" } */
+}
+
+void
+test4bit (void)
+{
+  d1 = _mm_round_pd (d2, 16);		  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  d1 = _mm_round_sd (d2, d3, 16);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  a1 = _mm_round_ps (a2, 16);		  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  a1 = _mm_round_ss (a2, a2, 16);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  a1 = _mm_blend_ps (a2, a3, 16);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  e1 = _mm256_blend_pd (e2, e3, 16);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  e1 = _mm256_round_pd (e2, 16);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  b1 = _mm256_round_ps (b2, 16);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+}
+
+void
+test2bit (void)
+{
+  d1 = _mm_blend_pd (d2, d3, 4);	  /* { dg-error "the last argument must be a 2-bit immediate" } */
+}
+
+void
+test1bit (void)
+{
+  d1 = _mm256_extractf128_pd (e2, 2);	  /* { dg-error "the last argument must be a 1-bit immediate" } */
+  a1 = _mm256_extractf128_ps (b2, 2);	  /* { dg-error "the last argument must be a 1-bit immediate" } */
+  i1 = _mm256_extractf128_si256 (l2, 2);  /* { dg-error "the last argument must be a 1-bit immediate" } */
+  e1 = _mm256_insertf128_pd (e2, d1, 2);  /* { dg-error "the last argument must be a 1-bit immediate" } */
+  b1 = _mm256_insertf128_ps (b2, a1, 2);  /* { dg-error "the last argument must be a 1-bit immediate" } */
+  l1 = _mm256_insertf128_si256 (l2, i1, 2);/* { dg-error "the last argument must be a 1-bit immediate" } */
+}
--- gcc/testsuite/gcc.target/i386/testimm-2.c.jj	2011-06-17 13:37:52.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/testimm-2.c	2011-06-17 14:01:38.000000000 +0200
@@ -0,0 +1,94 @@
+/* PR target/49411 */
+/* { dg-do compile } */
+/* { dg-options "-O0 -mf16c -maes -mpclmul" } */
+
+#include <x86intrin.h>
+
+__m128i i1, i2, i3, i4;
+__m128 a1, a2, a3, a4;
+__m128d d1, d2, d3, d4;
+__m256i l1, l2, l3, l4;
+__m256 b1, b2, b3, b4;
+__m256d e1, e2, e3, e4;
+__m64 m1, m2, m3, m4;
+int k1, k2, k3, k4;
+float f1, f2, f3, f4;
+
+void
+test8bit (void)
+{
+  i1 = _mm_cmpistrm (i2, i3, -10);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistri (i2, i3, -10);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistra (i2, i3, -10);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistrc (i2, i3, -10);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistro (i2, i3, -10);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistrs (i2, i3, -10);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistrz (i2, i3, -10);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  i1 = _mm_cmpestrm (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestri (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestra (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestrc (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestro (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestrs (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestrz (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  b1 = _mm256_blend_ps (b2, b3, -10);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  k1 = _cvtss_sh (f1, -10);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm256_cvtps_ph (b2, -10);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  b1 = _mm256_dp_ps (b2, b3, -10);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  e1 = _mm256_permute2f128_pd (e2, e3, -10);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  b1 = _mm256_permute2f128_ps (b2, b3, -10);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  l1 = _mm256_permute2f128_si256 (l2, l3, -10);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  b1 = _mm256_permute_ps (b2, -10);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_aeskeygenassist_si128 (i2, -10);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_blend_epi16 (i2, i3, -10);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_clmulepi64_si128 (i2, i3, -10);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_cvtps_ph (a1, -10);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  d1 = _mm_dp_pd (d2, d3, -10);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  a1 = _mm_dp_ps (a2, a3, -10);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  a1 = _mm_insert_ps (a2, a3, -10);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_mpsadbw_epu8 (i2, i3, -10);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  a1 = _mm_permute_ps (a2, -10);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_slli_si128 (i2, -10);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_srli_si128 (i2, -10);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+}
+
+void
+test5bit (void)
+{
+  d1 = _mm_cmp_sd (d2, d3, -7);		  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  a1 = _mm_cmp_ss (a2, a3, -7);		  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  d1 = _mm_cmp_pd (d2, d3, -7);		  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  a1 = _mm_cmp_ps (a2, a3, -7);		  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  e1 = _mm256_cmp_pd (e2, e3, -7);	  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  b1 = _mm256_cmp_ps (b2, b3, -7);	  /* { dg-error "the last argument must be a 5-bit immediate" } */
+}
+
+void
+test4bit (void)
+{
+  d1 = _mm_round_pd (d2, -7);		  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  d1 = _mm_round_sd (d2, d3, -7);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  a1 = _mm_round_ps (a2, -7);		  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  a1 = _mm_round_ss (a2, a2, -7);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  a1 = _mm_blend_ps (a2, a3, -7);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  e1 = _mm256_blend_pd (e2, e3, -7);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  e1 = _mm256_round_pd (e2, -7);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  b1 = _mm256_round_ps (b2, -7);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+}
+
+void
+test2bit (void)
+{
+  d1 = _mm_blend_pd (d2, d3, -1);	  /* { dg-error "the last argument must be a 2-bit immediate" } */
+}
+
+void
+test1bit (void)
+{
+  d1 = _mm256_extractf128_pd (e2, -1);	  /* { dg-error "the last argument must be a 1-bit immediate" } */
+  a1 = _mm256_extractf128_ps (b2, -1);	  /* { dg-error "the last argument must be a 1-bit immediate" } */
+  i1 = _mm256_extractf128_si256 (l2, -1); /* { dg-error "the last argument must be a 1-bit immediate" } */
+  e1 = _mm256_insertf128_pd (e2, d1, -1); /* { dg-error "the last argument must be a 1-bit immediate" } */
+  b1 = _mm256_insertf128_ps (b2, a1, -1); /* { dg-error "the last argument must be a 1-bit immediate" } */
+  l1 = _mm256_insertf128_si256 (l2, i1, -1);/* { dg-error "the last argument must be a 1-bit immediate" } */
+}
--- gcc/testsuite/gcc.target/i386/testimm-3.c.jj	2011-06-17 13:57:41.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/testimm-3.c	2011-06-17 14:01:42.000000000 +0200
@@ -0,0 +1,94 @@
+/* PR target/49411 */
+/* { dg-do compile } */
+/* { dg-options "-O0 -mf16c -maes -mpclmul" } */
+
+#include <x86intrin.h>
+
+__m128i i1, i2, i3, i4;
+__m128 a1, a2, a3, a4;
+__m128d d1, d2, d3, d4;
+__m256i l1, l2, l3, l4;
+__m256 b1, b2, b3, b4;
+__m256d e1, e2, e3, e4;
+__m64 m1, m2, m3, m4;
+int k1, k2, k3, k4;
+float f1, f2, f3, f4;
+
+void
+test8bit (void)
+{
+  i1 = _mm_cmpistrm (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistri (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistra (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistrc (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistro (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistrs (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpistrz (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
+  i1 = _mm_cmpestrm (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestri (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestra (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestrc (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestro (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestrs (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  k1 = _mm_cmpestrz (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
+  b1 = _mm256_blend_ps (b2, b3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  k1 = _cvtss_sh (f1, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm256_cvtps_ph (b2, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  b1 = _mm256_dp_ps (b2, b3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  e1 = _mm256_permute2f128_pd (e2, e3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  b1 = _mm256_permute2f128_ps (b2, b3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  l1 = _mm256_permute2f128_si256 (l2, l3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  b1 = _mm256_permute_ps (b2, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_aeskeygenassist_si128 (i2, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_blend_epi16 (i2, i3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_clmulepi64_si128 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_cvtps_ph (a1, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  d1 = _mm_dp_pd (d2, d3, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  a1 = _mm_dp_ps (a2, a3, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  a1 = _mm_insert_ps (a2, a3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_mpsadbw_epu8 (i2, i3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  a1 = _mm_permute_ps (a2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_slli_si128 (i2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_srli_si128 (i2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
+}
+
+void
+test5bit (void)
+{
+  d1 = _mm_cmp_sd (d2, d3, k4);		  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  a1 = _mm_cmp_ss (a2, a3, k4);		  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  d1 = _mm_cmp_pd (d2, d3, k4);		  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  a1 = _mm_cmp_ps (a2, a3, k4);		  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  e1 = _mm256_cmp_pd (e2, e3, k4);	  /* { dg-error "the last argument must be a 5-bit immediate" } */
+  b1 = _mm256_cmp_ps (b2, b3, k4);	  /* { dg-error "the last argument must be a 5-bit immediate" } */
+}
+
+void
+test4bit (void)
+{
+  d1 = _mm_round_pd (d2, k4);		  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  d1 = _mm_round_sd (d2, d3, k4);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  a1 = _mm_round_ps (a2, k4);		  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  a1 = _mm_round_ss (a2, a2, k4);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  a1 = _mm_blend_ps (a2, a3, k4);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  e1 = _mm256_blend_pd (e2, e3, k4);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  e1 = _mm256_round_pd (e2, k4);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+  b1 = _mm256_round_ps (b2, k4);	  /* { dg-error "the last argument must be a 4-bit immediate" } */
+}
+
+void
+test2bit (void)
+{
+  d1 = _mm_blend_pd (d2, d3, k4);	  /* { dg-error "the last argument must be a 2-bit immediate" } */
+}
+
+void
+test1bit (void)
+{
+  d1 = _mm256_extractf128_pd (e2, k4);	  /* { dg-error "the last argument must be a 1-bit immediate" } */
+  a1 = _mm256_extractf128_ps (b2, k4);	  /* { dg-error "the last argument must be a 1-bit immediate" } */
+  i1 = _mm256_extractf128_si256 (l2, k4); /* { dg-error "the last argument must be a 1-bit immediate" } */
+  e1 = _mm256_insertf128_pd (e2, d1, k4); /* { dg-error "the last argument must be a 1-bit immediate" } */
+  b1 = _mm256_insertf128_ps (b2, a1, k4); /* { dg-error "the last argument must be a 1-bit immediate" } */
+  l1 = _mm256_insertf128_si256 (l2, i1, k4);/* { dg-error "the last argument must be a 1-bit immediate" } */
+}
--- gcc/testsuite/gcc.target/i386/testimm-4.c.jj	2011-06-17 13:57:49.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/testimm-4.c	2011-06-17 14:19:23.000000000 +0200
@@ -0,0 +1,97 @@
+/* PR target/49411 */
+/* { dg-do assemble } */
+/* { dg-options "-O0 -mf16c -maes -mpclmul" } */
+/* { dg-require-effective-target f16c } */
+/* { dg-require-effective-target vaes } */
+/* { dg-require-effective-target vpclmul } */
+
+#include <x86intrin.h>
+
+__m128i i1, i2, i3, i4;
+__m128 a1, a2, a3, a4;
+__m128d d1, d2, d3, d4;
+__m256i l1, l2, l3, l4;
+__m256 b1, b2, b3, b4;
+__m256d e1, e2, e3, e4;
+__m64 m1, m2, m3, m4;
+int k1, k2, k3, k4;
+float f1, f2, f3, f4;
+
+void
+test8bit (void)
+{
+  i1 = _mm_cmpistrm (i2, i3, 255);
+  k1 = _mm_cmpistri (i2, i3, 255);
+  k1 = _mm_cmpistra (i2, i3, 255);
+  k1 = _mm_cmpistrc (i2, i3, 255);
+  k1 = _mm_cmpistro (i2, i3, 255);
+  k1 = _mm_cmpistrs (i2, i3, 255);
+  k1 = _mm_cmpistrz (i2, i3, 255);
+  i1 = _mm_cmpestrm (i2, k2, i3, k3, 255);
+  k1 = _mm_cmpestri (i2, k2, i3, k3, 255);
+  k1 = _mm_cmpestra (i2, k2, i3, k3, 255);
+  k1 = _mm_cmpestrc (i2, k2, i3, k3, 255);
+  k1 = _mm_cmpestro (i2, k2, i3, k3, 255);
+  k1 = _mm_cmpestrs (i2, k2, i3, k3, 255);
+  k1 = _mm_cmpestrz (i2, k2, i3, k3, 255);
+  b1 = _mm256_blend_ps (b2, b3, 255);
+  k1 = _cvtss_sh (f1, 255);
+  i1 = _mm256_cvtps_ph (b2, 255);
+  b1 = _mm256_dp_ps (b2, b3, 255);
+  e1 = _mm256_permute2f128_pd (e2, e3, 255);
+  b1 = _mm256_permute2f128_ps (b2, b3, 255);
+  l1 = _mm256_permute2f128_si256 (l2, l3, 255);
+  b1 = _mm256_permute_ps (b2, 255);
+  i1 = _mm_aeskeygenassist_si128 (i2, 255);
+  i1 = _mm_blend_epi16 (i2, i3, 255);
+  i1 = _mm_clmulepi64_si128 (i2, i3, 255);
+  i1 = _mm_cvtps_ph (a1, 255);
+  d1 = _mm_dp_pd (d2, d3, 255);
+  a1 = _mm_dp_ps (a2, a3, 255);
+  a1 = _mm_insert_ps (a2, a3, 255);
+  i1 = _mm_mpsadbw_epu8 (i2, i3, 255);
+  a1 = _mm_permute_ps (a2, 255);
+  i1 = _mm_slli_si128 (i2, 255);
+  i1 = _mm_srli_si128 (i2, 255);
+}
+
+void
+test5bit (void)
+{
+  d1 = _mm_cmp_sd (d2, d3, 31);
+  a1 = _mm_cmp_ss (a2, a3, 31);
+  d1 = _mm_cmp_pd (d2, d3, 31);
+  a1 = _mm_cmp_ps (a2, a3, 31);
+  e1 = _mm256_cmp_pd (e2, e3, 31);
+  b1 = _mm256_cmp_ps (b2, b3, 31);
+}
+
+void
+test4bit (void)
+{
+  d1 = _mm_round_pd (d2, 15);
+  d1 = _mm_round_sd (d2, d3, 15);
+  a1 = _mm_round_ps (a2, 15);
+  a1 = _mm_round_ss (a2, a2, 15);
+  a1 = _mm_blend_ps (a2, a3, 15);
+  e1 = _mm256_blend_pd (e2, e3, 15);
+  e1 = _mm256_round_pd (e2, 15);
+  b1 = _mm256_round_ps (b2, 15);
+}
+
+void
+test2bit (void)
+{
+  d1 = _mm_blend_pd (d2, d3, 3);
+}
+
+void
+test1bit (void)
+{
+  d1 = _mm256_extractf128_pd (e2, 1);
+  a1 = _mm256_extractf128_ps (b2, 1);
+  i1 = _mm256_extractf128_si256 (l2, 1);
+  e1 = _mm256_insertf128_pd (e2, d1, 1);
+  b1 = _mm256_insertf128_ps (b2, a1, 1);
+  l1 = _mm256_insertf128_si256 (l2, i1, 1);
+}
--- gcc/testsuite/gcc.target/i386/testimm-5.c.jj	2011-06-17 13:59:08.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/testimm-5.c	2011-06-17 14:19:27.000000000 +0200
@@ -0,0 +1,8 @@
+/* PR target/49411 */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -mf16c -maes -mpclmul" } */
+/* { dg-require-effective-target f16c } */
+/* { dg-require-effective-target vaes } */
+/* { dg-require-effective-target vpclmul } */
+
+#include "testimm-4.c"
--- gcc/testsuite/gcc.target/i386/testimm-6.c.jj	2011-06-17 14:00:40.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/testimm-6.c	2011-06-17 14:17:18.000000000 +0200
@@ -0,0 +1,41 @@
+/* PR target/49411 */
+/* { dg-do compile } */
+/* { dg-options "-O0 -mxop" } */
+
+#include <x86intrin.h>
+
+__m128i i1, i2, i3, i4;
+__m128 a1, a2, a3, a4;
+__m128d d1, d2, d3, d4;
+__m256i l1, l2, l3, l4;
+__m256 b1, b2, b3, b4;
+__m256d e1, e2, e3, e4;
+__m64 m1, m2, m3, m4;
+int k1, k2, k3, k4;
+float f1, f2, f3, f4;
+
+void
+test2bit (void)
+{
+  d1 = _mm_permute2_pd (d2, d3, i1, 17);	/* { dg-error "the last argument must be a 2-bit immediate" } */
+  e1 = _mm256_permute2_pd (e2, e3, l1, 17);	/* { dg-error "the last argument must be a 2-bit immediate" } */
+  a1 = _mm_permute2_ps (a2, a3, i1, 17);	/* { dg-error "the last argument must be a 2-bit immediate" } */
+  b1 = _mm256_permute2_ps (b2, b3, l1, 17);	/* { dg-error "the last argument must be a 2-bit immediate" } */
+  d1 = _mm_permute2_pd (d2, d3, i1, k4);	/* { dg-error "the last argument must be a 2-bit immediate" } */
+  e1 = _mm256_permute2_pd (e2, e3, l1, k4);	/* { dg-error "the last argument must be a 2-bit immediate" } */
+  a1 = _mm_permute2_ps (a2, a3, i1, k4);	/* { dg-error "the last argument must be a 2-bit immediate" } */
+  b1 = _mm256_permute2_ps (b2, b3, l1, k4);	/* { dg-error "the last argument must be a 2-bit immediate" } */
+}
+
+void
+test2args (void)
+{
+  i1 = _mm_extracti_si64 (i2, 256, 0);		/* { dg-error "the next to last argument must be an 8-bit immediate" } */
+  i1 = _mm_extracti_si64 (i2, 0, 256);		/* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_inserti_si64 (i2, i3, 256, 0);	/* { dg-error "the next to last argument must be an 8-bit immediate" } */
+  i2 = _mm_inserti_si64 (i2, i3, 0, 256);	/* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_extracti_si64 (i2, k4, 0);		/* { dg-error "the next to last argument must be an 8-bit immediate" } */
+  i1 = _mm_extracti_si64 (i2, 0, k4);		/* { dg-error "the last argument must be an 8-bit immediate" } */
+  i1 = _mm_inserti_si64 (i2, i3, k4, 0);	/* { dg-error "the next to last argument must be an 8-bit immediate" } */
+  i2 = _mm_inserti_si64 (i2, i3, 0, k4);	/* { dg-error "the last argument must be an 8-bit immediate" } */
+}
--- gcc/testsuite/gcc.target/i386/testimm-7.c.jj	2011-06-17 14:17:04.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/testimm-7.c	2011-06-17 14:20:02.000000000 +0200
@@ -0,0 +1,46 @@
+/* PR target/49411 */
+/* { dg-do assemble } */
+/* { dg-options "-O0 -mxop" } */
+/* { dg-require-effective-target xop } */
+
+#include <x86intrin.h>
+
+__m128i i1, i2, i3, i4;
+__m128 a1, a2, a3, a4;
+__m128d d1, d2, d3, d4;
+__m256i l1, l2, l3, l4;
+__m256 b1, b2, b3, b4;
+__m256d e1, e2, e3, e4;
+__m64 m1, m2, m3, m4;
+int k1, k2, k3, k4;
+float f1, f2, f3, f4;
+
+void
+test2bit (void)
+{
+  d1 = _mm_permute2_pd (d2, d3, i1, 3);
+  e1 = _mm256_permute2_pd (e2, e3, l1, 3);
+  a1 = _mm_permute2_ps (a2, a3, i1, 3);
+  b1 = _mm256_permute2_ps (b2, b3, l1, 3);
+  d1 = _mm_permute2_pd (d2, d3, i1, 0);
+  e1 = _mm256_permute2_pd (e2, e3, l1, 0);
+  a1 = _mm_permute2_ps (a2, a3, i1, 0);
+  b1 = _mm256_permute2_ps (b2, b3, l1, 0);
+}
+
+void
+test2args (void)
+{
+  i1 = _mm_extracti_si64 (i2, 255, 0);
+  i1 = _mm_extracti_si64 (i2, 0, 255);
+  i1 = _mm_inserti_si64 (i2, i3, 255, 0);
+  i2 = _mm_inserti_si64 (i2, i3, 0, 255);
+  i1 = _mm_extracti_si64 (i2, 255, 255);
+  i1 = _mm_extracti_si64 (i2, 255, 255);
+  i1 = _mm_inserti_si64 (i2, i3, 255, 255);
+  i2 = _mm_inserti_si64 (i2, i3, 255, 255);
+  i1 = _mm_extracti_si64 (i2, 0, 0);
+  i1 = _mm_extracti_si64 (i2, 0, 0);
+  i1 = _mm_inserti_si64 (i2, i3, 0, 0);
+  i2 = _mm_inserti_si64 (i2, i3, 0, 0);
+}
--- gcc/testsuite/gcc.target/i386/testimm-8.c.jj	2011-06-17 14:20:07.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/testimm-8.c	2011-06-17 14:20:12.000000000 +0200
@@ -0,0 +1,6 @@
+/* PR target/49411 */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -mxop" } */
+/* { dg-require-effective-target xop } */
+
+#include "testimm-7.c"
--- gcc/testsuite/gcc.target/i386/xop-rotate1-int.c.jj	2011-06-17 11:08:15.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-rotate1-int.c	2011-06-17 11:08:15.000000000 +0200
@@ -0,0 +1,63 @@
+/* PR target/49411 */
+/* { dg-do run } */
+/* { dg-require-effective-target xop } */
+/* { dg-options "-O2 -mxop" } */
+
+#include "xop-check.h"
+
+#include <x86intrin.h>
+
+extern void abort (void);
+
+union
+{
+  __m128i v;
+  unsigned char c[16];
+  unsigned short s[8];
+  unsigned int i[4];
+  unsigned long long l[2];
+} a, b, c, d;
+
+#define TEST1(F, N, S, SS) \
+do {							\
+  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++)	\
+    a.F[i] = i * 17;					\
+  s = _mm_set1_epi##SS (N);				\
+  b.v = _mm_roti_epi##S (a.v, N);			\
+  c.v = _mm_rot_epi##S (a.v, s);			\
+  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++)	\
+    {							\
+      int mask = __CHAR_BIT__ * sizeof (a.F[i]) - 1;	\
+      d.F[i] = a.F[i] << (N & mask);			\
+      if (N & mask)					\
+	d.F[i] |= a.F[i] >> (mask + 1 - (N & mask));	\
+      if (b.F[i] != c.F[i] || b.F[i] != d.F[i])		\
+	abort ();					\
+    }							\
+} while (0)
+#define TEST(N) \
+  TEST1 (c, N, 8, 8);					\
+  TEST1 (s, N, 16, 16);					\
+  TEST1 (i, N, 32, 32);					\
+  TEST1 (l, N, 64, 64x)
+
+volatile int n;
+
+static void
+xop_test (void)
+{
+  unsigned int i;
+  __m128i s;
+
+#ifndef NON_CONST
+  TEST (5);
+  TEST (-5);
+  TEST (0);
+  TEST (31);
+#else
+  n = 5; TEST (n);
+  n = -5; TEST (n);
+  n = 0; TEST (n);
+  n = 31; TEST (n);
+#endif
+}
--- gcc/testsuite/gcc.target/i386/xop-rotate2-int.c.jj	2011-06-17 11:08:15.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xop-rotate2-int.c	2011-06-17 11:08:15.000000000 +0200
@@ -0,0 +1,7 @@
+/* PR target/49411 */
+/* { dg-do run } */
+/* { dg-require-effective-target xop } */
+/* { dg-options "-O2 -mxop" } */
+
+#define NON_CONST 1
+#include "xop-rotate1-int.c"

	Jakub

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Fwd: [PATCH] Fix ICEs with out of range immediates in SSE*/AVX*/XOP* intrinsics (PR target/49411)
       [not found]           ` <BANLkTimKVOrTMzuceNrWOPUbHf6H34iQ4Q@mail.gmail.com>
@ 2011-06-17 20:05             ` Uros Bizjak
  0 siblings, 0 replies; 8+ messages in thread
From: Uros Bizjak @ 2011-06-17 20:05 UTC (permalink / raw)
  To: gcc-patches

On Fri, Jun 17, 2011 at 3:01 PM, Jakub Jelinek <jakub@redhat.com> wrote:

>
> > Not here, those are handled by  ix86_expand_args_builtin
> > instead of ix86_expand_multi_arg_builtin.  Furthermore, only
> > CODE_FOR_vcvtps2ph and CODE_FOR_vcvtps2ph256 have CONST_INT argument.
> > And I believe ix86_expand_args_builtin handles it fine, what's wrong
> > is the actual predicates those insns use.
>
> Ok, had a deeper look into this and it seems there are other issues,
> some of them even without test coverage regressed since 4.6.
> Some problems result in ICEs, other fail to assemble.  Had to revert
> the blendbits removal patch, because that removal results in out of
> range immediates not to be reported as predicate failures, but instead
> as ICEs.
>
> So here is an updated patch that adds test coverage.  Regtested
> on x86_64-linux {-m32,-m64}, ok for trunk (and backport for 4.6)?
>
> There are still a couple of things I'm unsure about (not tested
> by the testcases, compile fine):
> #include <x86intrin.h>
> __m128i i1, i2, i3, i4;
> __m128 a1, a2, a3, a4;
> __m128d d1, d2, d3, d4;
> __m256i l1, l2, l3, l4;
> __m256 b1, b2, b3, b4;
> __m256d e1, e2, e3, e4;
> __m64 m1, m2, m3, m4;
> int k1, k2, k3, k4;
> float f1, f2, f3, f4;
> void
> foo (void)
> {
>  /* 8 bit imm only?  This compiles fine, but one ends up with
>     number modulo 256 in the insn.  To make it error out
>     const_0_to_255_operand would need to be used.  */
>  e1 = _mm256_shuffle_pd (e2, e3, 256);
>  b1 = _mm256_shuffle_ps (b2, b3, 256);
>  i1 = _mm_shuffle_epi32 (i2, 256);
>  i1 = _mm_shufflehi_epi16 (i2, 256);
>  i1 = _mm_shufflelo_epi16 (i2, 256);
>  d1 = _mm_shuffle_pd (d2, d3, 256);
>  m1 = _mm_shuffle_pi16 (m2, 256);
>  a1 = _mm_shuffle_ps (a2, a3, 256);

These actually take macro function for shuffle. But I think that we
should use const_0_to_255 here, since this is the range that assembler
recognizes.

>  /* What about these?  Similarly to the above, they result
>     in imm modulo 16 resp. imm modulo 4.  */
>  e1 = _mm256_permute_pd (e2, 16);
>  d1 = _mm_permute_pd (d2, 4);
> }
>

Also const_0_to_255 here, the width of the immediate is specified as
8-bit immediate at [1].

[1] http://software.intel.com/sites/products/documentation/hpc/composerxe/en-us/cpp/lin/intref_cls/common/intref_avx_permute_pd.htm

>
> 2011-06-17  Jakub Jelinek  <jakub@redhat.com>
>
>        PR target/49411
>        * config/i386/i386.c (ix86_expand_multi_arg_builtins): If
>        last_arg_constant and last argument doesn't match its predicate,
>        for xop_vpermil2<mode>3 error out and for xop_rotl<mode>3
>        if it is CONST_INT, mask it, otherwise expand using rotl<mode>3.
>        (ix86_expand_sse_pcmpestr, ix86_expand_sse_pcmpistr): Fix
>        spelling of error message.
>        * config/i386/sse.md (sse4a_extrqi, sse4a_insertqi,
>        vcvtps2ph, *vcvtps2ph, *vcvtps2ph_store, vcvtps2ph256): Use
>        const_0_to_255_operand instead of const_int_operand.
>
>        Revert:
>        2011-05-09  Uros Bizjak  <ubizjak@gmail.com>
>
>        * config/i386/sse.md (blendbits): Remove mode attribute.
>        (<sse4_1>_blend<ssemodesuffix><avxsizesuffix>): Use const_int_operand
>        instead of const_0_to_<blendbits>_operand for operand 3 predicate.
>        Check integer value of operand 3 in insn constraint.
>
>        * gcc.target/i386/testimm-1.c: New test.
>        * gcc.target/i386/testimm-2.c: New test.
>        * gcc.target/i386/testimm-3.c: New test.
>        * gcc.target/i386/testimm-4.c: New test.
>        * gcc.target/i386/testimm-5.c: New test.
>        * gcc.target/i386/testimm-6.c: New test.
>        * gcc.target/i386/testimm-7.c: New test.
>        * gcc.target/i386/testimm-8.c: New test.
>        * gcc.target/i386/xop-vpermil2px-2.c: New test.
>        * gcc.target/i386/xop-rotate1-int.c: New test.
>        * gcc.target/i386/xop-rotate2-int.c: New test.
>

This is OK for 4.6 and mainline.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Fix ICEs with out of range immediates in SSE*/AVX*/XOP* intrinsics (PR target/49411)
  2011-06-17 13:16         ` [PATCH] Fix ICEs with out of range immediates in SSE*/AVX*/XOP* " Jakub Jelinek
       [not found]           ` <BANLkTimKVOrTMzuceNrWOPUbHf6H34iQ4Q@mail.gmail.com>
@ 2011-06-20 17:15           ` Kirill Yukhin
  1 sibling, 0 replies; 8+ messages in thread
From: Kirill Yukhin @ 2011-06-20 17:15 UTC (permalink / raw)
  To: Jakub Jelinek
  Cc: Uros Bizjak, Quentin Neill, Sebastian Pop, Fang, Changpeng, gcc-patches

Folks,
I think that implementation of the patch is not as good. It introduces
working with specific instructions in ix86_expand_multi_arg_builtin(),
however before it was really generic.
It operated only on abstract insns, only number/type of arguments was
matter. But now there’re INSN_CODE switches, gotos …

Thanks, Kirill

On Fri, Jun 17, 2011 at 5:01 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Fri, Jun 17, 2011 at 01:31:14AM +0200, Jakub Jelinek wrote:
>> Not here, those are handled by  ix86_expand_args_builtin
>> instead of ix86_expand_multi_arg_builtin.  Furthermore, only
>> CODE_FOR_vcvtps2ph and CODE_FOR_vcvtps2ph256 have CONST_INT argument.
>> And I believe ix86_expand_args_builtin handles it fine, what's wrong
>> is the actual predicates those insns use.
>
> Ok, had a deeper look into this and it seems there are other issues,
> some of them even without test coverage regressed since 4.6.
> Some problems result in ICEs, other fail to assemble.  Had to revert
> the blendbits removal patch, because that removal results in out of
> range immediates not to be reported as predicate failures, but instead
> as ICEs.
>
> So here is an updated patch that adds test coverage.  Regtested
> on x86_64-linux {-m32,-m64}, ok for trunk (and backport for 4.6)?
>
> There are still a couple of things I'm unsure about (not tested
> by the testcases, compile fine):
> #include <x86intrin.h>
> __m128i i1, i2, i3, i4;
> __m128 a1, a2, a3, a4;
> __m128d d1, d2, d3, d4;
> __m256i l1, l2, l3, l4;
> __m256 b1, b2, b3, b4;
> __m256d e1, e2, e3, e4;
> __m64 m1, m2, m3, m4;
> int k1, k2, k3, k4;
> float f1, f2, f3, f4;
> void
> foo (void)
> {
>  /* 8 bit imm only?  This compiles fine, but one ends up with
>     number modulo 256 in the insn.  To make it error out
>     const_0_to_255_operand would need to be used.  */
>  e1 = _mm256_shuffle_pd (e2, e3, 256);
>  b1 = _mm256_shuffle_ps (b2, b3, 256);
>  i1 = _mm_shuffle_epi32 (i2, 256);
>  i1 = _mm_shufflehi_epi16 (i2, 256);
>  i1 = _mm_shufflelo_epi16 (i2, 256);
>  d1 = _mm_shuffle_pd (d2, d3, 256);
>  m1 = _mm_shuffle_pi16 (m2, 256);
>  a1 = _mm_shuffle_ps (a2, a3, 256);
>  /* What about these?  Similarly to the above, they result
>     in imm modulo 16 resp. imm modulo 4.  */
>  e1 = _mm256_permute_pd (e2, 16);
>  d1 = _mm_permute_pd (d2, 4);
> }
>
> 2011-06-17  Jakub Jelinek  <jakub@redhat.com>
>
>        PR target/49411
>        * config/i386/i386.c (ix86_expand_multi_arg_builtins): If
>        last_arg_constant and last argument doesn't match its predicate,
>        for xop_vpermil2<mode>3 error out and for xop_rotl<mode>3
>        if it is CONST_INT, mask it, otherwise expand using rotl<mode>3.
>        (ix86_expand_sse_pcmpestr, ix86_expand_sse_pcmpistr): Fix
>        spelling of error message.
>        * config/i386/sse.md (sse4a_extrqi, sse4a_insertqi,
>        vcvtps2ph, *vcvtps2ph, *vcvtps2ph_store, vcvtps2ph256): Use
>        const_0_to_255_operand instead of const_int_operand.
>
>        Revert:
>        2011-05-09  Uros Bizjak  <ubizjak@gmail.com>
>
>        * config/i386/sse.md (blendbits): Remove mode attribute.
>        (<sse4_1>_blend<ssemodesuffix><avxsizesuffix>): Use const_int_operand
>        instead of const_0_to_<blendbits>_operand for operand 3 predicate.
>        Check integer value of operand 3 in insn constraint.
>
>        * gcc.target/i386/testimm-1.c: New test.
>        * gcc.target/i386/testimm-2.c: New test.
>        * gcc.target/i386/testimm-3.c: New test.
>        * gcc.target/i386/testimm-4.c: New test.
>        * gcc.target/i386/testimm-5.c: New test.
>        * gcc.target/i386/testimm-6.c: New test.
>        * gcc.target/i386/testimm-7.c: New test.
>        * gcc.target/i386/testimm-8.c: New test.
>        * gcc.target/i386/xop-vpermil2px-2.c: New test.
>        * gcc.target/i386/xop-rotate1-int.c: New test.
>        * gcc.target/i386/xop-rotate2-int.c: New test.
>
> --- gcc/config/i386/i386.c.jj   2011-06-17 11:02:11.000000000 +0200
> +++ gcc/config/i386/i386.c      2011-06-17 13:35:26.000000000 +0200
> @@ -25566,16 +25566,61 @@ ix86_expand_multi_arg_builtin (enum insn
>       int adjust = (comparison_p) ? 1 : 0;
>       enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
>
> -      if (last_arg_constant && i == nargs-1)
> +      if (last_arg_constant && i == nargs - 1)
>        {
> -         if (!CONST_INT_P (op))
> +         if (!insn_data[icode].operand[i + 1].predicate (op, mode))
>            {
> -             error ("last argument must be an immediate");
> -             return gen_reg_rtx (tmode);
> +             enum insn_code new_icode = icode;
> +             switch (icode)
> +               {
> +               case CODE_FOR_xop_vpermil2v2df3:
> +               case CODE_FOR_xop_vpermil2v4sf3:
> +               case CODE_FOR_xop_vpermil2v4df3:
> +               case CODE_FOR_xop_vpermil2v8sf3:
> +                 error ("the last argument must be a 2-bit immediate");
> +                 return gen_reg_rtx (tmode);
> +               case CODE_FOR_xop_rotlv2di3:
> +                 new_icode = CODE_FOR_rotlv2di3;
> +                 goto xop_rotl;
> +               case CODE_FOR_xop_rotlv4si3:
> +                 new_icode = CODE_FOR_rotlv4si3;
> +                 goto xop_rotl;
> +               case CODE_FOR_xop_rotlv8hi3:
> +                 new_icode = CODE_FOR_rotlv8hi3;
> +                 goto xop_rotl;
> +               case CODE_FOR_xop_rotlv16qi3:
> +                 new_icode = CODE_FOR_rotlv16qi3;
> +               xop_rotl:
> +                 if (CONST_INT_P (op))
> +                   {
> +                     int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
> +                     op = GEN_INT (INTVAL (op) & mask);
> +                     gcc_checking_assert
> +                       (insn_data[icode].operand[i + 1].predicate (op, mode));
> +                   }
> +                 else
> +                   {
> +                     gcc_checking_assert
> +                       (nargs == 2
> +                        && insn_data[new_icode].operand[0].mode == tmode
> +                        && insn_data[new_icode].operand[1].mode == tmode
> +                        && insn_data[new_icode].operand[2].mode == mode
> +                        && insn_data[new_icode].operand[0].predicate
> +                           == insn_data[icode].operand[0].predicate
> +                        && insn_data[new_icode].operand[1].predicate
> +                           == insn_data[icode].operand[1].predicate);
> +                     icode = new_icode;
> +                     goto non_constant;
> +                   }
> +                 break;
> +               default:
> +                 gcc_unreachable ();
> +               }
>            }
>        }
>       else
>        {
> +       non_constant:
>          if (VECTOR_MODE_P (mode))
>            op = safe_vector_operand (op, mode);
>
> @@ -25900,7 +25945,7 @@ ix86_expand_sse_pcmpestr (const struct b
>
>   if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
>     {
> -      error ("the fifth argument must be a 8-bit immediate");
> +      error ("the fifth argument must be an 8-bit immediate");
>       return const0_rtx;
>     }
>
> @@ -25995,7 +26040,7 @@ ix86_expand_sse_pcmpistr (const struct b
>
>   if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
>     {
> -      error ("the third argument must be a 8-bit immediate");
> +      error ("the third argument must be an 8-bit immediate");
>       return const0_rtx;
>     }
>
> --- gcc/config/i386/sse.md.jj   2011-06-17 11:02:11.000000000 +0200
> +++ gcc/config/i386/sse.md      2011-06-17 14:14:09.000000000 +0200
> @@ -188,6 +188,10 @@ (define_mode_iterator AVX256MODE2P [V8SI
>
>  (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
>
> +;; Mapping of immediate bits for blend instructions
> +(define_mode_attr blendbits
> +  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
> +
>  ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
>
>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> @@ -7707,8 +7711,8 @@ (define_insn "sse4a_vmmovnt<mode>"
>  (define_insn "sse4a_extrqi"
>   [(set (match_operand:V2DI 0 "register_operand" "=x")
>         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
> -                      (match_operand 2 "const_int_operand" "")
> -                      (match_operand 3 "const_int_operand" "")]
> +                      (match_operand 2 "const_0_to_255_operand" "")
> +                      (match_operand 3 "const_0_to_255_operand" "")]
>                      UNSPEC_EXTRQI))]
>   "TARGET_SSE4A"
>   "extrq\t{%3, %2, %0|%0, %2, %3}"
> @@ -7732,8 +7736,8 @@ (define_insn "sse4a_insertqi"
>   [(set (match_operand:V2DI 0 "register_operand" "=x")
>         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
>                      (match_operand:V2DI 2 "register_operand" "x")
> -                      (match_operand 3 "const_int_operand" "")
> -                      (match_operand 4 "const_int_operand" "")]
> +                      (match_operand 3 "const_0_to_255_operand" "")
> +                      (match_operand 4 "const_0_to_255_operand" "")]
>                      UNSPEC_INSERTQI))]
>   "TARGET_SSE4A"
>   "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
> @@ -7766,9 +7770,8 @@ (define_insn "<sse4_1>_blend<ssemodesuff
>        (vec_merge:VF
>          (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
>          (match_operand:VF 1 "register_operand" "0,x")
> -         (match_operand:SI 3 "const_int_operand" "")))]
> -  "TARGET_SSE4_1
> -   && IN_RANGE (INTVAL (operands[3]), 0, (1 << GET_MODE_NUNITS (<MODE>mode))-1)"
> +         (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
> +  "TARGET_SSE4_1"
>   "@
>    blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
>    vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> @@ -10327,7 +10330,7 @@ (define_expand "vcvtps2ph"
>   [(set (match_operand:V8HI 0 "register_operand" "")
>        (vec_concat:V8HI
>          (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
> -                       (match_operand:SI 2 "immediate_operand" "")]
> +                       (match_operand:SI 2 "const_0_to_255_operand" "")]
>                       UNSPEC_VCVTPS2PH)
>          (match_dup 3)))]
>   "TARGET_F16C"
> @@ -10337,7 +10340,7 @@ (define_insn "*vcvtps2ph"
>   [(set (match_operand:V8HI 0 "register_operand" "=x")
>        (vec_concat:V8HI
>          (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
> -                       (match_operand:SI 2 "immediate_operand" "N")]
> +                       (match_operand:SI 2 "const_0_to_255_operand" "N")]
>                       UNSPEC_VCVTPS2PH)
>          (match_operand:V4HI 3 "const0_operand" "")))]
>   "TARGET_F16C"
> @@ -10349,7 +10352,7 @@ (define_insn "*vcvtps2ph"
>  (define_insn "*vcvtps2ph_store"
>   [(set (match_operand:V4HI 0 "memory_operand" "=m")
>        (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
> -                     (match_operand:SI 2 "immediate_operand" "N")]
> +                     (match_operand:SI 2 "const_0_to_255_operand" "N")]
>                     UNSPEC_VCVTPS2PH))]
>   "TARGET_F16C"
>   "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
> @@ -10360,7 +10363,7 @@ (define_insn "*vcvtps2ph_store"
>  (define_insn "vcvtps2ph256"
>   [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
>        (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
> -                     (match_operand:SI 2 "immediate_operand" "N")]
> +                     (match_operand:SI 2 "const_0_to_255_operand" "N")]
>                     UNSPEC_VCVTPS2PH))]
>   "TARGET_F16C"
>   "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
> --- gcc/testsuite/gcc.target/i386/testimm-1.c.jj        2011-06-17 13:37:44.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/testimm-1.c   2011-06-17 14:01:34.000000000 +0200
> @@ -0,0 +1,94 @@
> +/* PR target/49411 */
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -mf16c -maes -mpclmul" } */
> +
> +#include <x86intrin.h>
> +
> +__m128i i1, i2, i3, i4;
> +__m128 a1, a2, a3, a4;
> +__m128d d1, d2, d3, d4;
> +__m256i l1, l2, l3, l4;
> +__m256 b1, b2, b3, b4;
> +__m256d e1, e2, e3, e4;
> +__m64 m1, m2, m3, m4;
> +int k1, k2, k3, k4;
> +float f1, f2, f3, f4;
> +
> +void
> +test8bit (void)
> +{
> +  i1 = _mm_cmpistrm (i2, i3, 256);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistri (i2, i3, 256);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistra (i2, i3, 256);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistrc (i2, i3, 256);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistro (i2, i3, 256);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistrs (i2, i3, 256);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistrz (i2, i3, 256);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  i1 = _mm_cmpestrm (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestri (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestra (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestrc (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestro (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestrs (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestrz (i2, k2, i3, k3, 256);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  b1 = _mm256_blend_ps (b2, b3, 256);    /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  k1 = _cvtss_sh (f1, 256);              /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm256_cvtps_ph (b2, 256);        /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  b1 = _mm256_dp_ps (b2, b3, 256);       /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  e1 = _mm256_permute2f128_pd (e2, e3, 256);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  b1 = _mm256_permute2f128_ps (b2, b3, 256);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  l1 = _mm256_permute2f128_si256 (l2, l3, 256);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  b1 = _mm256_permute_ps (b2, 256);      /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_aeskeygenassist_si128 (i2, 256);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_blend_epi16 (i2, i3, 256);    /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_clmulepi64_si128 (i2, i3, 256);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_cvtps_ph (a1, 256);           /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  d1 = _mm_dp_pd (d2, d3, 256);                  /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  a1 = _mm_dp_ps (a2, a3, 256);                  /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  a1 = _mm_insert_ps (a2, a3, 256);      /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_mpsadbw_epu8 (i2, i3, 256);   /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  a1 = _mm_permute_ps (a2, 256);         /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_slli_si128 (i2, 256);         /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_srli_si128 (i2, 256);         /* { dg-error "the last argument must be an 8-bit immediate" } */
> +}
> +
> +void
> +test5bit (void)
> +{
> +  d1 = _mm_cmp_sd (d2, d3, 32);                  /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  a1 = _mm_cmp_ss (a2, a3, 32);                  /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  d1 = _mm_cmp_pd (d2, d3, 32);                  /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  a1 = _mm_cmp_ps (a2, a3, 32);                  /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  e1 = _mm256_cmp_pd (e2, e3, 32);       /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  b1 = _mm256_cmp_ps (b2, b3, 32);       /* { dg-error "the last argument must be a 5-bit immediate" } */
> +}
> +
> +void
> +test4bit (void)
> +{
> +  d1 = _mm_round_pd (d2, 16);            /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  d1 = _mm_round_sd (d2, d3, 16);        /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  a1 = _mm_round_ps (a2, 16);            /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  a1 = _mm_round_ss (a2, a2, 16);        /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  a1 = _mm_blend_ps (a2, a3, 16);        /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  e1 = _mm256_blend_pd (e2, e3, 16);     /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  e1 = _mm256_round_pd (e2, 16);         /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  b1 = _mm256_round_ps (b2, 16);         /* { dg-error "the last argument must be a 4-bit immediate" } */
> +}
> +
> +void
> +test2bit (void)
> +{
> +  d1 = _mm_blend_pd (d2, d3, 4);         /* { dg-error "the last argument must be a 2-bit immediate" } */
> +}
> +
> +void
> +test1bit (void)
> +{
> +  d1 = _mm256_extractf128_pd (e2, 2);    /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  a1 = _mm256_extractf128_ps (b2, 2);    /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  i1 = _mm256_extractf128_si256 (l2, 2);  /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  e1 = _mm256_insertf128_pd (e2, d1, 2);  /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  b1 = _mm256_insertf128_ps (b2, a1, 2);  /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  l1 = _mm256_insertf128_si256 (l2, i1, 2);/* { dg-error "the last argument must be a 1-bit immediate" } */
> +}
> --- gcc/testsuite/gcc.target/i386/testimm-2.c.jj        2011-06-17 13:37:52.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/testimm-2.c   2011-06-17 14:01:38.000000000 +0200
> @@ -0,0 +1,94 @@
> +/* PR target/49411 */
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -mf16c -maes -mpclmul" } */
> +
> +#include <x86intrin.h>
> +
> +__m128i i1, i2, i3, i4;
> +__m128 a1, a2, a3, a4;
> +__m128d d1, d2, d3, d4;
> +__m256i l1, l2, l3, l4;
> +__m256 b1, b2, b3, b4;
> +__m256d e1, e2, e3, e4;
> +__m64 m1, m2, m3, m4;
> +int k1, k2, k3, k4;
> +float f1, f2, f3, f4;
> +
> +void
> +test8bit (void)
> +{
> +  i1 = _mm_cmpistrm (i2, i3, -10);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistri (i2, i3, -10);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistra (i2, i3, -10);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistrc (i2, i3, -10);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistro (i2, i3, -10);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistrs (i2, i3, -10);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistrz (i2, i3, -10);       /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  i1 = _mm_cmpestrm (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestri (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestra (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestrc (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestro (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestrs (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestrz (i2, k2, i3, k3, -10);/* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  b1 = _mm256_blend_ps (b2, b3, -10);    /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  k1 = _cvtss_sh (f1, -10);              /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm256_cvtps_ph (b2, -10);        /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  b1 = _mm256_dp_ps (b2, b3, -10);       /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  e1 = _mm256_permute2f128_pd (e2, e3, -10);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  b1 = _mm256_permute2f128_ps (b2, b3, -10);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  l1 = _mm256_permute2f128_si256 (l2, l3, -10);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  b1 = _mm256_permute_ps (b2, -10);      /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_aeskeygenassist_si128 (i2, -10);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_blend_epi16 (i2, i3, -10);    /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_clmulepi64_si128 (i2, i3, -10);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_cvtps_ph (a1, -10);           /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  d1 = _mm_dp_pd (d2, d3, -10);                  /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  a1 = _mm_dp_ps (a2, a3, -10);                  /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  a1 = _mm_insert_ps (a2, a3, -10);      /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_mpsadbw_epu8 (i2, i3, -10);   /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  a1 = _mm_permute_ps (a2, -10);         /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_slli_si128 (i2, -10);         /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_srli_si128 (i2, -10);         /* { dg-error "the last argument must be an 8-bit immediate" } */
> +}
> +
> +void
> +test5bit (void)
> +{
> +  d1 = _mm_cmp_sd (d2, d3, -7);                  /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  a1 = _mm_cmp_ss (a2, a3, -7);                  /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  d1 = _mm_cmp_pd (d2, d3, -7);                  /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  a1 = _mm_cmp_ps (a2, a3, -7);                  /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  e1 = _mm256_cmp_pd (e2, e3, -7);       /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  b1 = _mm256_cmp_ps (b2, b3, -7);       /* { dg-error "the last argument must be a 5-bit immediate" } */
> +}
> +
> +void
> +test4bit (void)
> +{
> +  d1 = _mm_round_pd (d2, -7);            /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  d1 = _mm_round_sd (d2, d3, -7);        /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  a1 = _mm_round_ps (a2, -7);            /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  a1 = _mm_round_ss (a2, a2, -7);        /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  a1 = _mm_blend_ps (a2, a3, -7);        /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  e1 = _mm256_blend_pd (e2, e3, -7);     /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  e1 = _mm256_round_pd (e2, -7);         /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  b1 = _mm256_round_ps (b2, -7);         /* { dg-error "the last argument must be a 4-bit immediate" } */
> +}
> +
> +void
> +test2bit (void)
> +{
> +  d1 = _mm_blend_pd (d2, d3, -1);        /* { dg-error "the last argument must be a 2-bit immediate" } */
> +}
> +
> +void
> +test1bit (void)
> +{
> +  d1 = _mm256_extractf128_pd (e2, -1);   /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  a1 = _mm256_extractf128_ps (b2, -1);   /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  i1 = _mm256_extractf128_si256 (l2, -1); /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  e1 = _mm256_insertf128_pd (e2, d1, -1); /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  b1 = _mm256_insertf128_ps (b2, a1, -1); /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  l1 = _mm256_insertf128_si256 (l2, i1, -1);/* { dg-error "the last argument must be a 1-bit immediate" } */
> +}
> --- gcc/testsuite/gcc.target/i386/testimm-3.c.jj        2011-06-17 13:57:41.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/testimm-3.c   2011-06-17 14:01:42.000000000 +0200
> @@ -0,0 +1,94 @@
> +/* PR target/49411 */
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -mf16c -maes -mpclmul" } */
> +
> +#include <x86intrin.h>
> +
> +__m128i i1, i2, i3, i4;
> +__m128 a1, a2, a3, a4;
> +__m128d d1, d2, d3, d4;
> +__m256i l1, l2, l3, l4;
> +__m256 b1, b2, b3, b4;
> +__m256d e1, e2, e3, e4;
> +__m64 m1, m2, m3, m4;
> +int k1, k2, k3, k4;
> +float f1, f2, f3, f4;
> +
> +void
> +test8bit (void)
> +{
> +  i1 = _mm_cmpistrm (i2, i3, k4);        /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistri (i2, i3, k4);        /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistra (i2, i3, k4);        /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistrc (i2, i3, k4);        /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistro (i2, i3, k4);        /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistrs (i2, i3, k4);        /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpistrz (i2, i3, k4);        /* { dg-error "the third argument must be an 8-bit immediate" } */
> +  i1 = _mm_cmpestrm (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestri (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestra (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestrc (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestro (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestrs (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  k1 = _mm_cmpestrz (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
> +  b1 = _mm256_blend_ps (b2, b3, k4);     /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  k1 = _cvtss_sh (f1, k4);               /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm256_cvtps_ph (b2, k4);         /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  b1 = _mm256_dp_ps (b2, b3, k4);        /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  e1 = _mm256_permute2f128_pd (e2, e3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  b1 = _mm256_permute2f128_ps (b2, b3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  l1 = _mm256_permute2f128_si256 (l2, l3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  b1 = _mm256_permute_ps (b2, k4);       /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_aeskeygenassist_si128 (i2, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_blend_epi16 (i2, i3, k4);     /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_clmulepi64_si128 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_cvtps_ph (a1, k4);            /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  d1 = _mm_dp_pd (d2, d3, k4);           /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  a1 = _mm_dp_ps (a2, a3, k4);           /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  a1 = _mm_insert_ps (a2, a3, k4);       /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_mpsadbw_epu8 (i2, i3, k4);    /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  a1 = _mm_permute_ps (a2, k4);                  /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_slli_si128 (i2, k4);                  /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_srli_si128 (i2, k4);                  /* { dg-error "the last argument must be an 8-bit immediate" } */
> +}
> +
> +void
> +test5bit (void)
> +{
> +  d1 = _mm_cmp_sd (d2, d3, k4);                  /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  a1 = _mm_cmp_ss (a2, a3, k4);                  /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  d1 = _mm_cmp_pd (d2, d3, k4);                  /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  a1 = _mm_cmp_ps (a2, a3, k4);                  /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  e1 = _mm256_cmp_pd (e2, e3, k4);       /* { dg-error "the last argument must be a 5-bit immediate" } */
> +  b1 = _mm256_cmp_ps (b2, b3, k4);       /* { dg-error "the last argument must be a 5-bit immediate" } */
> +}
> +
> +void
> +test4bit (void)
> +{
> +  d1 = _mm_round_pd (d2, k4);            /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  d1 = _mm_round_sd (d2, d3, k4);        /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  a1 = _mm_round_ps (a2, k4);            /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  a1 = _mm_round_ss (a2, a2, k4);        /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  a1 = _mm_blend_ps (a2, a3, k4);        /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  e1 = _mm256_blend_pd (e2, e3, k4);     /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  e1 = _mm256_round_pd (e2, k4);         /* { dg-error "the last argument must be a 4-bit immediate" } */
> +  b1 = _mm256_round_ps (b2, k4);         /* { dg-error "the last argument must be a 4-bit immediate" } */
> +}
> +
> +void
> +test2bit (void)
> +{
> +  d1 = _mm_blend_pd (d2, d3, k4);        /* { dg-error "the last argument must be a 2-bit immediate" } */
> +}
> +
> +void
> +test1bit (void)
> +{
> +  d1 = _mm256_extractf128_pd (e2, k4);   /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  a1 = _mm256_extractf128_ps (b2, k4);   /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  i1 = _mm256_extractf128_si256 (l2, k4); /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  e1 = _mm256_insertf128_pd (e2, d1, k4); /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  b1 = _mm256_insertf128_ps (b2, a1, k4); /* { dg-error "the last argument must be a 1-bit immediate" } */
> +  l1 = _mm256_insertf128_si256 (l2, i1, k4);/* { dg-error "the last argument must be a 1-bit immediate" } */
> +}
> --- gcc/testsuite/gcc.target/i386/testimm-4.c.jj        2011-06-17 13:57:49.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/testimm-4.c   2011-06-17 14:19:23.000000000 +0200
> @@ -0,0 +1,97 @@
> +/* PR target/49411 */
> +/* { dg-do assemble } */
> +/* { dg-options "-O0 -mf16c -maes -mpclmul" } */
> +/* { dg-require-effective-target f16c } */
> +/* { dg-require-effective-target vaes } */
> +/* { dg-require-effective-target vpclmul } */
> +
> +#include <x86intrin.h>
> +
> +__m128i i1, i2, i3, i4;
> +__m128 a1, a2, a3, a4;
> +__m128d d1, d2, d3, d4;
> +__m256i l1, l2, l3, l4;
> +__m256 b1, b2, b3, b4;
> +__m256d e1, e2, e3, e4;
> +__m64 m1, m2, m3, m4;
> +int k1, k2, k3, k4;
> +float f1, f2, f3, f4;
> +
> +void
> +test8bit (void)
> +{
> +  i1 = _mm_cmpistrm (i2, i3, 255);
> +  k1 = _mm_cmpistri (i2, i3, 255);
> +  k1 = _mm_cmpistra (i2, i3, 255);
> +  k1 = _mm_cmpistrc (i2, i3, 255);
> +  k1 = _mm_cmpistro (i2, i3, 255);
> +  k1 = _mm_cmpistrs (i2, i3, 255);
> +  k1 = _mm_cmpistrz (i2, i3, 255);
> +  i1 = _mm_cmpestrm (i2, k2, i3, k3, 255);
> +  k1 = _mm_cmpestri (i2, k2, i3, k3, 255);
> +  k1 = _mm_cmpestra (i2, k2, i3, k3, 255);
> +  k1 = _mm_cmpestrc (i2, k2, i3, k3, 255);
> +  k1 = _mm_cmpestro (i2, k2, i3, k3, 255);
> +  k1 = _mm_cmpestrs (i2, k2, i3, k3, 255);
> +  k1 = _mm_cmpestrz (i2, k2, i3, k3, 255);
> +  b1 = _mm256_blend_ps (b2, b3, 255);
> +  k1 = _cvtss_sh (f1, 255);
> +  i1 = _mm256_cvtps_ph (b2, 255);
> +  b1 = _mm256_dp_ps (b2, b3, 255);
> +  e1 = _mm256_permute2f128_pd (e2, e3, 255);
> +  b1 = _mm256_permute2f128_ps (b2, b3, 255);
> +  l1 = _mm256_permute2f128_si256 (l2, l3, 255);
> +  b1 = _mm256_permute_ps (b2, 255);
> +  i1 = _mm_aeskeygenassist_si128 (i2, 255);
> +  i1 = _mm_blend_epi16 (i2, i3, 255);
> +  i1 = _mm_clmulepi64_si128 (i2, i3, 255);
> +  i1 = _mm_cvtps_ph (a1, 255);
> +  d1 = _mm_dp_pd (d2, d3, 255);
> +  a1 = _mm_dp_ps (a2, a3, 255);
> +  a1 = _mm_insert_ps (a2, a3, 255);
> +  i1 = _mm_mpsadbw_epu8 (i2, i3, 255);
> +  a1 = _mm_permute_ps (a2, 255);
> +  i1 = _mm_slli_si128 (i2, 255);
> +  i1 = _mm_srli_si128 (i2, 255);
> +}
> +
> +void
> +test5bit (void)
> +{
> +  d1 = _mm_cmp_sd (d2, d3, 31);
> +  a1 = _mm_cmp_ss (a2, a3, 31);
> +  d1 = _mm_cmp_pd (d2, d3, 31);
> +  a1 = _mm_cmp_ps (a2, a3, 31);
> +  e1 = _mm256_cmp_pd (e2, e3, 31);
> +  b1 = _mm256_cmp_ps (b2, b3, 31);
> +}
> +
> +void
> +test4bit (void)
> +{
> +  d1 = _mm_round_pd (d2, 15);
> +  d1 = _mm_round_sd (d2, d3, 15);
> +  a1 = _mm_round_ps (a2, 15);
> +  a1 = _mm_round_ss (a2, a2, 15);
> +  a1 = _mm_blend_ps (a2, a3, 15);
> +  e1 = _mm256_blend_pd (e2, e3, 15);
> +  e1 = _mm256_round_pd (e2, 15);
> +  b1 = _mm256_round_ps (b2, 15);
> +}
> +
> +void
> +test2bit (void)
> +{
> +  d1 = _mm_blend_pd (d2, d3, 3);
> +}
> +
> +void
> +test1bit (void)
> +{
> +  d1 = _mm256_extractf128_pd (e2, 1);
> +  a1 = _mm256_extractf128_ps (b2, 1);
> +  i1 = _mm256_extractf128_si256 (l2, 1);
> +  e1 = _mm256_insertf128_pd (e2, d1, 1);
> +  b1 = _mm256_insertf128_ps (b2, a1, 1);
> +  l1 = _mm256_insertf128_si256 (l2, i1, 1);
> +}
> --- gcc/testsuite/gcc.target/i386/testimm-5.c.jj        2011-06-17 13:59:08.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/testimm-5.c   2011-06-17 14:19:27.000000000 +0200
> @@ -0,0 +1,8 @@
> +/* PR target/49411 */
> +/* { dg-do assemble } */
> +/* { dg-options "-O2 -mf16c -maes -mpclmul" } */
> +/* { dg-require-effective-target f16c } */
> +/* { dg-require-effective-target vaes } */
> +/* { dg-require-effective-target vpclmul } */
> +
> +#include "testimm-4.c"
> --- gcc/testsuite/gcc.target/i386/testimm-6.c.jj        2011-06-17 14:00:40.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/testimm-6.c   2011-06-17 14:17:18.000000000 +0200
> @@ -0,0 +1,41 @@
> +/* PR target/49411 */
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -mxop" } */
> +
> +#include <x86intrin.h>
> +
> +__m128i i1, i2, i3, i4;
> +__m128 a1, a2, a3, a4;
> +__m128d d1, d2, d3, d4;
> +__m256i l1, l2, l3, l4;
> +__m256 b1, b2, b3, b4;
> +__m256d e1, e2, e3, e4;
> +__m64 m1, m2, m3, m4;
> +int k1, k2, k3, k4;
> +float f1, f2, f3, f4;
> +
> +void
> +test2bit (void)
> +{
> +  d1 = _mm_permute2_pd (d2, d3, i1, 17);       /* { dg-error "the last argument must be a 2-bit immediate" } */
> +  e1 = _mm256_permute2_pd (e2, e3, l1, 17);    /* { dg-error "the last argument must be a 2-bit immediate" } */
> +  a1 = _mm_permute2_ps (a2, a3, i1, 17);       /* { dg-error "the last argument must be a 2-bit immediate" } */
> +  b1 = _mm256_permute2_ps (b2, b3, l1, 17);    /* { dg-error "the last argument must be a 2-bit immediate" } */
> +  d1 = _mm_permute2_pd (d2, d3, i1, k4);       /* { dg-error "the last argument must be a 2-bit immediate" } */
> +  e1 = _mm256_permute2_pd (e2, e3, l1, k4);    /* { dg-error "the last argument must be a 2-bit immediate" } */
> +  a1 = _mm_permute2_ps (a2, a3, i1, k4);       /* { dg-error "the last argument must be a 2-bit immediate" } */
> +  b1 = _mm256_permute2_ps (b2, b3, l1, k4);    /* { dg-error "the last argument must be a 2-bit immediate" } */
> +}
> +
> +void
> +test2args (void)
> +{
> +  i1 = _mm_extracti_si64 (i2, 256, 0);         /* { dg-error "the next to last argument must be an 8-bit immediate" } */
> +  i1 = _mm_extracti_si64 (i2, 0, 256);         /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_inserti_si64 (i2, i3, 256, 0);      /* { dg-error "the next to last argument must be an 8-bit immediate" } */
> +  i2 = _mm_inserti_si64 (i2, i3, 0, 256);      /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_extracti_si64 (i2, k4, 0);          /* { dg-error "the next to last argument must be an 8-bit immediate" } */
> +  i1 = _mm_extracti_si64 (i2, 0, k4);          /* { dg-error "the last argument must be an 8-bit immediate" } */
> +  i1 = _mm_inserti_si64 (i2, i3, k4, 0);       /* { dg-error "the next to last argument must be an 8-bit immediate" } */
> +  i2 = _mm_inserti_si64 (i2, i3, 0, k4);       /* { dg-error "the last argument must be an 8-bit immediate" } */
> +}
> --- gcc/testsuite/gcc.target/i386/testimm-7.c.jj        2011-06-17 14:17:04.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/testimm-7.c   2011-06-17 14:20:02.000000000 +0200
> @@ -0,0 +1,46 @@
> +/* PR target/49411 */
> +/* { dg-do assemble } */
> +/* { dg-options "-O0 -mxop" } */
> +/* { dg-require-effective-target xop } */
> +
> +#include <x86intrin.h>
> +
> +__m128i i1, i2, i3, i4;
> +__m128 a1, a2, a3, a4;
> +__m128d d1, d2, d3, d4;
> +__m256i l1, l2, l3, l4;
> +__m256 b1, b2, b3, b4;
> +__m256d e1, e2, e3, e4;
> +__m64 m1, m2, m3, m4;
> +int k1, k2, k3, k4;
> +float f1, f2, f3, f4;
> +
> +void
> +test2bit (void)
> +{
> +  d1 = _mm_permute2_pd (d2, d3, i1, 3);
> +  e1 = _mm256_permute2_pd (e2, e3, l1, 3);
> +  a1 = _mm_permute2_ps (a2, a3, i1, 3);
> +  b1 = _mm256_permute2_ps (b2, b3, l1, 3);
> +  d1 = _mm_permute2_pd (d2, d3, i1, 0);
> +  e1 = _mm256_permute2_pd (e2, e3, l1, 0);
> +  a1 = _mm_permute2_ps (a2, a3, i1, 0);
> +  b1 = _mm256_permute2_ps (b2, b3, l1, 0);
> +}
> +
> +void
> +test2args (void)
> +{
> +  i1 = _mm_extracti_si64 (i2, 255, 0);
> +  i1 = _mm_extracti_si64 (i2, 0, 255);
> +  i1 = _mm_inserti_si64 (i2, i3, 255, 0);
> +  i2 = _mm_inserti_si64 (i2, i3, 0, 255);
> +  i1 = _mm_extracti_si64 (i2, 255, 255);
> +  i1 = _mm_extracti_si64 (i2, 255, 255);
> +  i1 = _mm_inserti_si64 (i2, i3, 255, 255);
> +  i2 = _mm_inserti_si64 (i2, i3, 255, 255);
> +  i1 = _mm_extracti_si64 (i2, 0, 0);
> +  i1 = _mm_extracti_si64 (i2, 0, 0);
> +  i1 = _mm_inserti_si64 (i2, i3, 0, 0);
> +  i2 = _mm_inserti_si64 (i2, i3, 0, 0);
> +}
> --- gcc/testsuite/gcc.target/i386/testimm-8.c.jj        2011-06-17 14:20:07.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/testimm-8.c   2011-06-17 14:20:12.000000000 +0200
> @@ -0,0 +1,6 @@
> +/* PR target/49411 */
> +/* { dg-do assemble } */
> +/* { dg-options "-O2 -mxop" } */
> +/* { dg-require-effective-target xop } */
> +
> +#include "testimm-7.c"
> --- gcc/testsuite/gcc.target/i386/xop-rotate1-int.c.jj  2011-06-17 11:08:15.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/xop-rotate1-int.c     2011-06-17 11:08:15.000000000 +0200
> @@ -0,0 +1,63 @@
> +/* PR target/49411 */
> +/* { dg-do run } */
> +/* { dg-require-effective-target xop } */
> +/* { dg-options "-O2 -mxop" } */
> +
> +#include "xop-check.h"
> +
> +#include <x86intrin.h>
> +
> +extern void abort (void);
> +
> +union
> +{
> +  __m128i v;
> +  unsigned char c[16];
> +  unsigned short s[8];
> +  unsigned int i[4];
> +  unsigned long long l[2];
> +} a, b, c, d;
> +
> +#define TEST1(F, N, S, SS) \
> +do {                                                   \
> +  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++) \
> +    a.F[i] = i * 17;                                   \
> +  s = _mm_set1_epi##SS (N);                            \
> +  b.v = _mm_roti_epi##S (a.v, N);                      \
> +  c.v = _mm_rot_epi##S (a.v, s);                       \
> +  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++) \
> +    {                                                  \
> +      int mask = __CHAR_BIT__ * sizeof (a.F[i]) - 1;   \
> +      d.F[i] = a.F[i] << (N & mask);                   \
> +      if (N & mask)                                    \
> +       d.F[i] |= a.F[i] >> (mask + 1 - (N & mask));    \
> +      if (b.F[i] != c.F[i] || b.F[i] != d.F[i])                \
> +       abort ();                                       \
> +    }                                                  \
> +} while (0)
> +#define TEST(N) \
> +  TEST1 (c, N, 8, 8);                                  \
> +  TEST1 (s, N, 16, 16);                                        \
> +  TEST1 (i, N, 32, 32);                                        \
> +  TEST1 (l, N, 64, 64x)
> +
> +volatile int n;
> +
> +static void
> +xop_test (void)
> +{
> +  unsigned int i;
> +  __m128i s;
> +
> +#ifndef NON_CONST
> +  TEST (5);
> +  TEST (-5);
> +  TEST (0);
> +  TEST (31);
> +#else
> +  n = 5; TEST (n);
> +  n = -5; TEST (n);
> +  n = 0; TEST (n);
> +  n = 31; TEST (n);
> +#endif
> +}
> --- gcc/testsuite/gcc.target/i386/xop-rotate2-int.c.jj  2011-06-17 11:08:15.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/xop-rotate2-int.c     2011-06-17 11:08:15.000000000 +0200
> @@ -0,0 +1,7 @@
> +/* PR target/49411 */
> +/* { dg-do run } */
> +/* { dg-require-effective-target xop } */
> +/* { dg-options "-O2 -mxop" } */
> +
> +#define NON_CONST 1
> +#include "xop-rotate1-int.c"
>
>        Jakub
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-06-20 17:03 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-15 10:26 [PATCH] Fix ICEs with -mxop __builtin_ia32_vpermil2p[sd]{,256} and __builtin_ia32_vprot[bwdq]i intrinsics (PR target/49411) Jakub Jelinek
2011-06-15 17:00 ` Quentin Neill
2011-06-16 20:42   ` Quentin Neill
2011-06-16 23:18     ` Quentin Neill
2011-06-17  0:24       ` Jakub Jelinek
2011-06-17 13:16         ` [PATCH] Fix ICEs with out of range immediates in SSE*/AVX*/XOP* " Jakub Jelinek
     [not found]           ` <BANLkTimKVOrTMzuceNrWOPUbHf6H34iQ4Q@mail.gmail.com>
2011-06-17 20:05             ` Fwd: " Uros Bizjak
2011-06-20 17:15           ` Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).