public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible
@ 2011-09-21 12:59 Jakub Jelinek
  2011-09-21 13:00 ` Uros Bizjak
  2011-09-21 15:03 ` Richard Henderson
  0 siblings, 2 replies; 6+ messages in thread
From: Jakub Jelinek @ 2011-09-21 12:59 UTC (permalink / raw)
  To: Uros Bizjak, Richard Henderson; +Cc: gcc-patches, H.J. Lu

Hi!

For vcond{,u} etc. we currently generate vpandn+vpand+vpor
sequence but SSE4.1+ has instructions for at least some modes
to handle those 3 in one instruction (haven't benchmarked how much
faster/slower it is though).

Bootstrapped/regtested on x86_64-linux and i686-linux, tested
on SandyBridge too, AVX2 just eyeballed.

2011-09-21  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (ix86_expand_sse_movcc): Use
	blendvps, blendvpd and pblendvb if possible.

	* gcc.dg/vect/vect-cond-7.c: New test.
	* gcc.target/i386/sse4_1-cond-1.c: New test.
	* gcc.target/i386/avx-cond-1.c: New test.

--- gcc/config/i386/i386.c.jj	2011-09-20 22:21:35.000000000 +0200
+++ gcc/config/i386/i386.c	2011-09-21 10:09:09.000000000 +0200
@@ -18905,24 +18905,42 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp
     }
   else
     {
-      op_true = force_reg (mode, op_true);
+      rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
+
       op_false = force_reg (mode, op_false);
+      switch (mode)
+	{
+	case V4SFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvps; break;
+	case V2DFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvpd; break;
+	case V16QImode: if (TARGET_SSE4_1) gen = gen_sse4_1_pblendvb; break;
+	case V8SFmode: if (TARGET_AVX) gen = gen_avx_blendvps256; break;
+	case V4DFmode: if (TARGET_AVX) gen = gen_avx_blendvpd256; break;
+	case V32QImode: if (TARGET_AVX2) gen = gen_avx2_pblendvb; break;
+	default: break;
+	}
 
-      t2 = gen_reg_rtx (mode);
-      if (optimize)
-	t3 = gen_reg_rtx (mode);
+      if (gen != NULL)
+	emit_insn (gen (dest, op_false, op_true, cmp));
       else
-	t3 = dest;
+	{
+	  op_true = force_reg (mode, op_true);
+
+	  t2 = gen_reg_rtx (mode);
+	  if (optimize)
+	    t3 = gen_reg_rtx (mode);
+	  else
+	    t3 = dest;
 
-      x = gen_rtx_AND (mode, op_true, cmp);
-      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
+	  x = gen_rtx_AND (mode, op_true, cmp);
+	  emit_insn (gen_rtx_SET (VOIDmode, t2, x));
 
-      x = gen_rtx_NOT (mode, cmp);
-      x = gen_rtx_AND (mode, x, op_false);
-      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
+	  x = gen_rtx_NOT (mode, cmp);
+	  x = gen_rtx_AND (mode, x, op_false);
+	  emit_insn (gen_rtx_SET (VOIDmode, t3, x));
 
-      x = gen_rtx_IOR (mode, t3, t2);
-      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+	  x = gen_rtx_IOR (mode, t3, t2);
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+	}
     }
 }
 
--- gcc/testsuite/gcc.dg/vect/vect-cond-7.c.jj	2011-09-21 10:42:37.000000000 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-cond-7.c	2011-09-21 10:43:20.000000000 +0200
@@ -0,0 +1,68 @@
+#include "tree-vect.h"
+
+extern void abort (void);
+double ad[64], bd[64], cd[64], dd[64], ed[64];
+float af[64], bf[64], cf[64], df[64], ef[64];
+signed char ac[64], bc[64], cc[64], dc[64], ec[64];
+short as[64], bs[64], cs[64], ds[64], es[64];
+int ai[64], bi[64], ci[64], di[64], ei[64];
+long long all[64], bll[64], cll[64], dll[64], ell[64];
+unsigned char auc[64], buc[64], cuc[64], duc[64], euc[64];
+unsigned short aus[64], bus[64], cus[64], dus[64], eus[64];
+unsigned int au[64], bu[64], cu[64], du[64], eu[64];
+unsigned long long aull[64], bull[64], cull[64], dull[64], eull[64];
+
+#define F(var) \
+__attribute__((noinline, noclone)) void \
+f##var (void) \
+{ \
+  int i; \
+  for (i = 0; i < 64; i++) \
+    { \
+      __typeof (a##var[0]) d = d##var[i], e = e##var[i]; \
+      a##var[i] = b##var[i] > c##var[i] ? d : e; \
+    } \
+}
+
+#define TESTS \
+F (d) F (f) F (c) F (s) F (i) F (ll) F (uc) F (us) F (u) F (ull)
+
+TESTS
+
+int
+main ()
+{
+  int i;
+
+  check_vect ();
+  for (i = 0; i < 64; i++)
+    {
+#undef F
+#define F(var) \
+      b##var[i] = i + 64; \
+      switch (i % 3) \
+	{ \
+	case 0: c##var[i] = i + 64; break; \
+	case 1: c##var[i] = 127 - i; break; \
+	case 2: c##var[i] = i; break; \
+	} \
+      d##var[i] = i / 2; \
+      e##var[i] = i * 2;
+      TESTS
+    }
+#undef F
+#define F(var) f##var ();
+  TESTS
+  for (i = 0; i < 64; i++)
+    {
+      asm volatile ("" : : : "memory");
+#undef F
+#define F(var) \
+      if (a##var[i] != (b##var[i] > c##var[i] ? d##var[i] : e##var[i])) \
+	abort ();
+      TESTS
+    }
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.target/i386/sse4_1-cond-1.c.jj	2011-09-21 10:45:15.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/sse4_1-cond-1.c	2011-09-21 10:47:06.000000000 +0200
@@ -0,0 +1,75 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O3 -msse4.1" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+extern void abort (void);
+double ad[64], bd[64], cd[64], dd[64], ed[64];
+float af[64], bf[64], cf[64], df[64], ef[64];
+signed char ac[64], bc[64], cc[64], dc[64], ec[64];
+short as[64], bs[64], cs[64], ds[64], es[64];
+int ai[64], bi[64], ci[64], di[64], ei[64];
+long long all[64], bll[64], cll[64], dll[64], ell[64];
+unsigned char auc[64], buc[64], cuc[64], duc[64], euc[64];
+unsigned short aus[64], bus[64], cus[64], dus[64], eus[64];
+unsigned int au[64], bu[64], cu[64], du[64], eu[64];
+unsigned long long aull[64], bull[64], cull[64], dull[64], eull[64];
+
+#define F(var) \
+__attribute__((noinline, noclone)) void \
+f##var (void) \
+{ \
+  int i; \
+  for (i = 0; i < 64; i++) \
+    { \
+      __typeof (a##var[0]) d = d##var[i], e = e##var[i]; \
+      a##var[i] = b##var[i] > c##var[i] ? d : e; \
+    } \
+}
+
+#define TESTS \
+F (d) F (f) F (c) F (s) F (i) F (ll) F (uc) F (us) F (u) F (ull)
+
+TESTS
+
+void
+TEST ()
+{
+  int i;
+  for (i = 0; i < 64; i++)
+    {
+#undef F
+#define F(var) \
+      b##var[i] = i + 64; \
+      switch (i % 3) \
+	{ \
+	case 0: c##var[i] = i + 64; break; \
+	case 1: c##var[i] = 127 - i; break; \
+	case 2: c##var[i] = i; break; \
+	} \
+      d##var[i] = i / 2; \
+      e##var[i] = i * 2;
+      TESTS
+    }
+#undef F
+#define F(var) f##var ();
+  TESTS
+  for (i = 0; i < 64; i++)
+    {
+      asm volatile ("" : : : "memory");
+#undef F
+#define F(var) \
+      if (a##var[i] != (b##var[i] > c##var[i] ? d##var[i] : e##var[i])) \
+	abort ();
+      TESTS
+    }
+}
--- gcc/testsuite/gcc.target/i386/avx-cond-1.c.jj	2011-09-21 10:44:08.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx-cond-1.c	2011-09-21 10:48:34.000000000 +0200
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mavx" } */
+/* { dg-require-effective-target avx_runtime } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include "sse4_1-cond-1.c"

	Jakub

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible
  2011-09-21 12:59 [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible Jakub Jelinek
@ 2011-09-21 13:00 ` Uros Bizjak
  2011-09-21 13:35   ` Jakub Jelinek
  2011-09-21 15:03 ` Richard Henderson
  1 sibling, 1 reply; 6+ messages in thread
From: Uros Bizjak @ 2011-09-21 13:00 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Henderson, gcc-patches, H.J. Lu

On Wed, Sep 21, 2011 at 1:37 PM, Jakub Jelinek <jakub@redhat.com> wrote:

> For vcond{,u} etc. we currently generate vpandn+vpand+vpor
> sequence but SSE4.1+ has instructions for at least some modes
> to handle those 3 in one instruction (haven't benchmarked how much
> faster/slower it is though).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, tested
> on SandyBridge too, AVX2 just eyeballed.
>
> 2011-09-21  Jakub Jelinek  <jakub@redhat.com>
>
>        * config/i386/i386.c (ix86_expand_sse_movcc): Use
>        blendvps, blendvpd and pblendvb if possible.
>
>        * gcc.dg/vect/vect-cond-7.c: New test.
>        * gcc.target/i386/sse4_1-cond-1.c: New test.
>        * gcc.target/i386/avx-cond-1.c: New test.

OK with a nit below:

> --- gcc/config/i386/i386.c.jj   2011-09-20 22:21:35.000000000 +0200
> +++ gcc/config/i386/i386.c      2011-09-21 10:09:09.000000000 +0200
> @@ -18905,24 +18905,42 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp
>     }
>   else
>     {
> -      op_true = force_reg (mode, op_true);
> +      rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
> +
>       op_false = force_reg (mode, op_false);
> +      switch (mode)
> +       {
> +       case V4SFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvps; break;
> +       case V2DFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvpd; break;
> +       case V16QImode: if (TARGET_SSE4_1) gen = gen_sse4_1_pblendvb; break;
> +       case V8SFmode: if (TARGET_AVX) gen = gen_avx_blendvps256; break;
> +       case V4DFmode: if (TARGET_AVX) gen = gen_avx_blendvpd256; break;
> +       case V32QImode: if (TARGET_AVX2) gen = gen_avx2_pblendvb; break;
> +       default: break;

  gen = NULL; here instead of break.

> +       }

Please also add appropriate line breaks in the above code...

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible
  2011-09-21 13:00 ` Uros Bizjak
@ 2011-09-21 13:35   ` Jakub Jelinek
  0 siblings, 0 replies; 6+ messages in thread
From: Jakub Jelinek @ 2011-09-21 13:35 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Richard Henderson, gcc-patches, H.J. Lu

On Wed, Sep 21, 2011 at 02:02:44PM +0200, Uros Bizjak wrote:
> > +      rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
> > +
> >       op_false = force_reg (mode, op_false);
> > +      switch (mode)
> > +       {
> > +       case V4SFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvps; break;
> > +       case V2DFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvpd; break;
> > +       case V16QImode: if (TARGET_SSE4_1) gen = gen_sse4_1_pblendvb; break;
> > +       case V8SFmode: if (TARGET_AVX) gen = gen_avx_blendvps256; break;
> > +       case V4DFmode: if (TARGET_AVX) gen = gen_avx_blendvpd256; break;
> > +       case V32QImode: if (TARGET_AVX2) gen = gen_avx2_pblendvb; break;
> > +       default: break;
> 
>   gen = NULL; here instead of break.
> 
> > +       }

gen is already initialized to NULL above (as the assignments
for other case labels are only conditional, it needs to be).
I didn't want to write like:
  gen = TARGET_SSE4_1 ? gen_sse4_1_blendvps : NULL;
  break;

> Please also add appropriate line breaks in the above code...

Sure, will do.

	Jakub

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible
  2011-09-21 12:59 [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible Jakub Jelinek
  2011-09-21 13:00 ` Uros Bizjak
@ 2011-09-21 15:03 ` Richard Henderson
  2011-09-21 15:12   ` Jakub Jelinek
  1 sibling, 1 reply; 6+ messages in thread
From: Richard Henderson @ 2011-09-21 15:03 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Uros Bizjak, gcc-patches, H.J. Lu

On 09/21/2011 04:37 AM, Jakub Jelinek wrote:
>        op_false = force_reg (mode, op_false);
> +      switch (mode)
> +	{
> +	case V4SFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvps; break;
> +	case V2DFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvpd; break;
> +	case V16QImode: if (TARGET_SSE4_1) gen = gen_sse4_1_pblendvb; break;
> +	case V8SFmode: if (TARGET_AVX) gen = gen_avx_blendvps256; break;
> +	case V4DFmode: if (TARGET_AVX) gen = gen_avx_blendvpd256; break;
> +	case V32QImode: if (TARGET_AVX2) gen = gen_avx2_pblendvb; break;
> +	default: break;
> +	}

pblendvb is applicable to all of the integer modes, not just QImode.
You do have to frob the modes around, but it'll work.


r~

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible
  2011-09-21 15:03 ` Richard Henderson
@ 2011-09-21 15:12   ` Jakub Jelinek
  2011-09-21 16:04     ` Richard Henderson
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2011-09-21 15:12 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Uros Bizjak, gcc-patches, H.J. Lu

On Wed, Sep 21, 2011 at 07:25:43AM -0700, Richard Henderson wrote:
> On 09/21/2011 04:37 AM, Jakub Jelinek wrote:
> >        op_false = force_reg (mode, op_false);
> > +      switch (mode)
> > +	{
> > +	case V4SFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvps; break;
> > +	case V2DFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvpd; break;
> > +	case V16QImode: if (TARGET_SSE4_1) gen = gen_sse4_1_pblendvb; break;
> > +	case V8SFmode: if (TARGET_AVX) gen = gen_avx_blendvps256; break;
> > +	case V4DFmode: if (TARGET_AVX) gen = gen_avx_blendvpd256; break;
> > +	case V32QImode: if (TARGET_AVX2) gen = gen_avx2_pblendvb; break;
> > +	default: break;
> > +	}
> 
> pblendvb is applicable to all of the integer modes, not just QImode.
> You do have to frob the modes around, but it'll work.

Good idea.  So like this instead?

2011-09-21  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (ix86_expand_sse_movcc): Use
	blendvps, blendvpd and pblendvb if possible.

	* gcc.dg/vect/vect-cond-7.c: New test.
	* gcc.target/i386/sse4_1-cond-1.c: New test.
	* gcc.target/i386/avx-cond-1.c: New test.

--- gcc/config/i386/i386.c.jj	2011-09-21 16:32:10.878449912 +0200
+++ gcc/config/i386/i386.c	2011-09-21 16:37:46.531420718 +0200
@@ -18905,24 +18905,80 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp
     }
   else
     {
-      op_true = force_reg (mode, op_true);
+      rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
+
       op_false = force_reg (mode, op_false);
 
-      t2 = gen_reg_rtx (mode);
-      if (optimize)
-	t3 = gen_reg_rtx (mode);
+      switch (mode)
+	{
+	case V4SFmode:
+	  if (TARGET_SSE4_1)
+	    gen = gen_sse4_1_blendvps;
+	  break;
+	case V2DFmode:
+	  if (TARGET_SSE4_1)
+	    gen = gen_sse4_1_blendvpd;
+	  break;
+	case V16QImode:
+	case V8HImode:
+	case V4SImode:
+	case V2DImode:
+	  if (TARGET_SSE4_1)
+	    {
+	      gen = gen_sse4_1_pblendvb;
+	      dest = gen_lowpart (V16QImode, dest);
+	      op_false = gen_lowpart (V16QImode, op_false);
+	      op_true = gen_lowpart (V16QImode, op_true);
+	      cmp = gen_lowpart (V16QImode, cmp);
+	    }
+	  break;
+	case V8SFmode:
+	  if (TARGET_AVX)
+	    gen = gen_avx_blendvps256;
+	  break;
+	case V4DFmode:
+	  if (TARGET_AVX)
+	    gen = gen_avx_blendvpd256;
+	  break;
+	case V32QImode:
+	case V16HImode:
+	case V8SImode:
+	case V4DImode:
+	  if (TARGET_AVX2)
+	    {
+	      gen = gen_avx2_pblendvb;
+	      dest = gen_lowpart (V32QImode, dest);
+	      op_false = gen_lowpart (V32QImode, op_false);
+	      op_true = gen_lowpart (V32QImode, op_true);
+	      cmp = gen_lowpart (V32QImode, cmp);
+	    }
+	  break;
+	default:
+	  break;
+	}
+
+      if (gen != NULL)
+	emit_insn (gen (dest, op_false, op_true, cmp));
       else
-	t3 = dest;
+	{
+	  op_true = force_reg (mode, op_true);
 
-      x = gen_rtx_AND (mode, op_true, cmp);
-      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
+	  t2 = gen_reg_rtx (mode);
+	  if (optimize)
+	    t3 = gen_reg_rtx (mode);
+	  else
+	    t3 = dest;
+
+	  x = gen_rtx_AND (mode, op_true, cmp);
+	  emit_insn (gen_rtx_SET (VOIDmode, t2, x));
 
-      x = gen_rtx_NOT (mode, cmp);
-      x = gen_rtx_AND (mode, x, op_false);
-      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
+	  x = gen_rtx_NOT (mode, cmp);
+	  x = gen_rtx_AND (mode, x, op_false);
+	  emit_insn (gen_rtx_SET (VOIDmode, t3, x));
 
-      x = gen_rtx_IOR (mode, t3, t2);
-      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+	  x = gen_rtx_IOR (mode, t3, t2);
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+	}
     }
 }
 
--- gcc/testsuite/gcc.dg/vect/vect-cond-7.c.jj	2011-09-21 16:32:37.227546030 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-cond-7.c	2011-09-21 16:32:37.227546030 +0200
@@ -0,0 +1,68 @@
+#include "tree-vect.h"
+
+extern void abort (void);
+double ad[64], bd[64], cd[64], dd[64], ed[64];
+float af[64], bf[64], cf[64], df[64], ef[64];
+signed char ac[64], bc[64], cc[64], dc[64], ec[64];
+short as[64], bs[64], cs[64], ds[64], es[64];
+int ai[64], bi[64], ci[64], di[64], ei[64];
+long long all[64], bll[64], cll[64], dll[64], ell[64];
+unsigned char auc[64], buc[64], cuc[64], duc[64], euc[64];
+unsigned short aus[64], bus[64], cus[64], dus[64], eus[64];
+unsigned int au[64], bu[64], cu[64], du[64], eu[64];
+unsigned long long aull[64], bull[64], cull[64], dull[64], eull[64];
+
+#define F(var) \
+__attribute__((noinline, noclone)) void \
+f##var (void) \
+{ \
+  int i; \
+  for (i = 0; i < 64; i++) \
+    { \
+      __typeof (a##var[0]) d = d##var[i], e = e##var[i]; \
+      a##var[i] = b##var[i] > c##var[i] ? d : e; \
+    } \
+}
+
+#define TESTS \
+F (d) F (f) F (c) F (s) F (i) F (ll) F (uc) F (us) F (u) F (ull)
+
+TESTS
+
+int
+main ()
+{
+  int i;
+
+  check_vect ();
+  for (i = 0; i < 64; i++)
+    {
+#undef F
+#define F(var) \
+      b##var[i] = i + 64; \
+      switch (i % 3) \
+	{ \
+	case 0: c##var[i] = i + 64; break; \
+	case 1: c##var[i] = 127 - i; break; \
+	case 2: c##var[i] = i; break; \
+	} \
+      d##var[i] = i / 2; \
+      e##var[i] = i * 2;
+      TESTS
+    }
+#undef F
+#define F(var) f##var ();
+  TESTS
+  for (i = 0; i < 64; i++)
+    {
+      asm volatile ("" : : : "memory");
+#undef F
+#define F(var) \
+      if (a##var[i] != (b##var[i] > c##var[i] ? d##var[i] : e##var[i])) \
+	abort ();
+      TESTS
+    }
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.target/i386/sse4_1-cond-1.c.jj	2011-09-21 16:32:37.228590863 +0200
+++ gcc/testsuite/gcc.target/i386/sse4_1-cond-1.c	2011-09-21 16:32:37.228590863 +0200
@@ -0,0 +1,75 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O3 -msse4.1" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+extern void abort (void);
+double ad[64], bd[64], cd[64], dd[64], ed[64];
+float af[64], bf[64], cf[64], df[64], ef[64];
+signed char ac[64], bc[64], cc[64], dc[64], ec[64];
+short as[64], bs[64], cs[64], ds[64], es[64];
+int ai[64], bi[64], ci[64], di[64], ei[64];
+long long all[64], bll[64], cll[64], dll[64], ell[64];
+unsigned char auc[64], buc[64], cuc[64], duc[64], euc[64];
+unsigned short aus[64], bus[64], cus[64], dus[64], eus[64];
+unsigned int au[64], bu[64], cu[64], du[64], eu[64];
+unsigned long long aull[64], bull[64], cull[64], dull[64], eull[64];
+
+#define F(var) \
+__attribute__((noinline, noclone)) void \
+f##var (void) \
+{ \
+  int i; \
+  for (i = 0; i < 64; i++) \
+    { \
+      __typeof (a##var[0]) d = d##var[i], e = e##var[i]; \
+      a##var[i] = b##var[i] > c##var[i] ? d : e; \
+    } \
+}
+
+#define TESTS \
+F (d) F (f) F (c) F (s) F (i) F (ll) F (uc) F (us) F (u) F (ull)
+
+TESTS
+
+void
+TEST ()
+{
+  int i;
+  for (i = 0; i < 64; i++)
+    {
+#undef F
+#define F(var) \
+      b##var[i] = i + 64; \
+      switch (i % 3) \
+	{ \
+	case 0: c##var[i] = i + 64; break; \
+	case 1: c##var[i] = 127 - i; break; \
+	case 2: c##var[i] = i; break; \
+	} \
+      d##var[i] = i / 2; \
+      e##var[i] = i * 2;
+      TESTS
+    }
+#undef F
+#define F(var) f##var ();
+  TESTS
+  for (i = 0; i < 64; i++)
+    {
+      asm volatile ("" : : : "memory");
+#undef F
+#define F(var) \
+      if (a##var[i] != (b##var[i] > c##var[i] ? d##var[i] : e##var[i])) \
+	abort ();
+      TESTS
+    }
+}
--- gcc/testsuite/gcc.target/i386/avx-cond-1.c.jj	2011-09-21 16:32:37.228590863 +0200
+++ gcc/testsuite/gcc.target/i386/avx-cond-1.c	2011-09-21 16:32:37.229545353 +0200
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mavx" } */
+/* { dg-require-effective-target avx_runtime } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include "sse4_1-cond-1.c"


	Jakub

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible
  2011-09-21 15:12   ` Jakub Jelinek
@ 2011-09-21 16:04     ` Richard Henderson
  0 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2011-09-21 16:04 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Uros Bizjak, gcc-patches, H.J. Lu

On 09/21/2011 07:40 AM, Jakub Jelinek wrote:
> 	* config/i386/i386.c (ix86_expand_sse_movcc): Use
> 	blendvps, blendvpd and pblendvb if possible.
> 
> 	* gcc.dg/vect/vect-cond-7.c: New test.
> 	* gcc.target/i386/sse4_1-cond-1.c: New test.
> 	* gcc.target/i386/avx-cond-1.c: New test.

Ok.


r~

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-09-21 15:24 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-21 12:59 [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible Jakub Jelinek
2011-09-21 13:00 ` Uros Bizjak
2011-09-21 13:35   ` Jakub Jelinek
2011-09-21 15:03 ` Richard Henderson
2011-09-21 15:12   ` Jakub Jelinek
2011-09-21 16:04     ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).