* [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible @ 2011-09-21 12:59 Jakub Jelinek 2011-09-21 13:00 ` Uros Bizjak 2011-09-21 15:03 ` Richard Henderson 0 siblings, 2 replies; 6+ messages in thread From: Jakub Jelinek @ 2011-09-21 12:59 UTC (permalink / raw) To: Uros Bizjak, Richard Henderson; +Cc: gcc-patches, H.J. Lu Hi! For vcond{,u} etc. we currently generate vpandn+vpand+vpor sequence but SSE4.1+ has instructions for at least some modes to handle those 3 in one instruction (haven't benchmarked how much faster/slower it is though). Bootstrapped/regtested on x86_64-linux and i686-linux, tested on SandyBridge too, AVX2 just eyeballed. 2011-09-21 Jakub Jelinek <jakub@redhat.com> * config/i386/i386.c (ix86_expand_sse_movcc): Use blendvps, blendvpd and pblendvb if possible. * gcc.dg/vect/vect-cond-7.c: New test. * gcc.target/i386/sse4_1-cond-1.c: New test. * gcc.target/i386/avx-cond-1.c: New test. --- gcc/config/i386/i386.c.jj 2011-09-20 22:21:35.000000000 +0200 +++ gcc/config/i386/i386.c 2011-09-21 10:09:09.000000000 +0200 @@ -18905,24 +18905,42 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp } else { - op_true = force_reg (mode, op_true); + rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; + op_false = force_reg (mode, op_false); + switch (mode) + { + case V4SFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvps; break; + case V2DFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvpd; break; + case V16QImode: if (TARGET_SSE4_1) gen = gen_sse4_1_pblendvb; break; + case V8SFmode: if (TARGET_AVX) gen = gen_avx_blendvps256; break; + case V4DFmode: if (TARGET_AVX) gen = gen_avx_blendvpd256; break; + case V32QImode: if (TARGET_AVX2) gen = gen_avx2_pblendvb; break; + default: break; + } - t2 = gen_reg_rtx (mode); - if (optimize) - t3 = gen_reg_rtx (mode); + if (gen != NULL) + emit_insn (gen (dest, op_false, op_true, cmp)); else - t3 = dest; + { + op_true = force_reg (mode, op_true); + + t2 = gen_reg_rtx (mode); + if (optimize) + t3 = gen_reg_rtx (mode); + else + t3 = dest; - x = gen_rtx_AND (mode, op_true, cmp); - emit_insn (gen_rtx_SET (VOIDmode, t2, x)); + x = gen_rtx_AND (mode, op_true, cmp); + emit_insn (gen_rtx_SET (VOIDmode, t2, x)); - x = gen_rtx_NOT (mode, cmp); - x = gen_rtx_AND (mode, x, op_false); - emit_insn (gen_rtx_SET (VOIDmode, t3, x)); + x = gen_rtx_NOT (mode, cmp); + x = gen_rtx_AND (mode, x, op_false); + emit_insn (gen_rtx_SET (VOIDmode, t3, x)); - x = gen_rtx_IOR (mode, t3, t2); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + x = gen_rtx_IOR (mode, t3, t2); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } } } --- gcc/testsuite/gcc.dg/vect/vect-cond-7.c.jj 2011-09-21 10:42:37.000000000 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-cond-7.c 2011-09-21 10:43:20.000000000 +0200 @@ -0,0 +1,68 @@ +#include "tree-vect.h" + +extern void abort (void); +double ad[64], bd[64], cd[64], dd[64], ed[64]; +float af[64], bf[64], cf[64], df[64], ef[64]; +signed char ac[64], bc[64], cc[64], dc[64], ec[64]; +short as[64], bs[64], cs[64], ds[64], es[64]; +int ai[64], bi[64], ci[64], di[64], ei[64]; +long long all[64], bll[64], cll[64], dll[64], ell[64]; +unsigned char auc[64], buc[64], cuc[64], duc[64], euc[64]; +unsigned short aus[64], bus[64], cus[64], dus[64], eus[64]; +unsigned int au[64], bu[64], cu[64], du[64], eu[64]; +unsigned long long aull[64], bull[64], cull[64], dull[64], eull[64]; + +#define F(var) \ +__attribute__((noinline, noclone)) void \ +f##var (void) \ +{ \ + int i; \ + for (i = 0; i < 64; i++) \ + { \ + __typeof (a##var[0]) d = d##var[i], e = e##var[i]; \ + a##var[i] = b##var[i] > c##var[i] ? d : e; \ + } \ +} + +#define TESTS \ +F (d) F (f) F (c) F (s) F (i) F (ll) F (uc) F (us) F (u) F (ull) + +TESTS + +int +main () +{ + int i; + + check_vect (); + for (i = 0; i < 64; i++) + { +#undef F +#define F(var) \ + b##var[i] = i + 64; \ + switch (i % 3) \ + { \ + case 0: c##var[i] = i + 64; break; \ + case 1: c##var[i] = 127 - i; break; \ + case 2: c##var[i] = i; break; \ + } \ + d##var[i] = i / 2; \ + e##var[i] = i * 2; + TESTS + } +#undef F +#define F(var) f##var (); + TESTS + for (i = 0; i < 64; i++) + { + asm volatile ("" : : : "memory"); +#undef F +#define F(var) \ + if (a##var[i] != (b##var[i] > c##var[i] ? d##var[i] : e##var[i])) \ + abort (); + TESTS + } + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ --- gcc/testsuite/gcc.target/i386/sse4_1-cond-1.c.jj 2011-09-21 10:45:15.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/sse4_1-cond-1.c 2011-09-21 10:47:06.000000000 +0200 @@ -0,0 +1,75 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O3 -msse4.1" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +extern void abort (void); +double ad[64], bd[64], cd[64], dd[64], ed[64]; +float af[64], bf[64], cf[64], df[64], ef[64]; +signed char ac[64], bc[64], cc[64], dc[64], ec[64]; +short as[64], bs[64], cs[64], ds[64], es[64]; +int ai[64], bi[64], ci[64], di[64], ei[64]; +long long all[64], bll[64], cll[64], dll[64], ell[64]; +unsigned char auc[64], buc[64], cuc[64], duc[64], euc[64]; +unsigned short aus[64], bus[64], cus[64], dus[64], eus[64]; +unsigned int au[64], bu[64], cu[64], du[64], eu[64]; +unsigned long long aull[64], bull[64], cull[64], dull[64], eull[64]; + +#define F(var) \ +__attribute__((noinline, noclone)) void \ +f##var (void) \ +{ \ + int i; \ + for (i = 0; i < 64; i++) \ + { \ + __typeof (a##var[0]) d = d##var[i], e = e##var[i]; \ + a##var[i] = b##var[i] > c##var[i] ? d : e; \ + } \ +} + +#define TESTS \ +F (d) F (f) F (c) F (s) F (i) F (ll) F (uc) F (us) F (u) F (ull) + +TESTS + +void +TEST () +{ + int i; + for (i = 0; i < 64; i++) + { +#undef F +#define F(var) \ + b##var[i] = i + 64; \ + switch (i % 3) \ + { \ + case 0: c##var[i] = i + 64; break; \ + case 1: c##var[i] = 127 - i; break; \ + case 2: c##var[i] = i; break; \ + } \ + d##var[i] = i / 2; \ + e##var[i] = i * 2; + TESTS + } +#undef F +#define F(var) f##var (); + TESTS + for (i = 0; i < 64; i++) + { + asm volatile ("" : : : "memory"); +#undef F +#define F(var) \ + if (a##var[i] != (b##var[i] > c##var[i] ? d##var[i] : e##var[i])) \ + abort (); + TESTS + } +} --- gcc/testsuite/gcc.target/i386/avx-cond-1.c.jj 2011-09-21 10:44:08.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx-cond-1.c 2011-09-21 10:48:34.000000000 +0200 @@ -0,0 +1,13 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mavx" } */ +/* { dg-require-effective-target avx_runtime } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include "sse4_1-cond-1.c" Jakub ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible 2011-09-21 12:59 [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible Jakub Jelinek @ 2011-09-21 13:00 ` Uros Bizjak 2011-09-21 13:35 ` Jakub Jelinek 2011-09-21 15:03 ` Richard Henderson 1 sibling, 1 reply; 6+ messages in thread From: Uros Bizjak @ 2011-09-21 13:00 UTC (permalink / raw) To: Jakub Jelinek; +Cc: Richard Henderson, gcc-patches, H.J. Lu On Wed, Sep 21, 2011 at 1:37 PM, Jakub Jelinek <jakub@redhat.com> wrote: > For vcond{,u} etc. we currently generate vpandn+vpand+vpor > sequence but SSE4.1+ has instructions for at least some modes > to handle those 3 in one instruction (haven't benchmarked how much > faster/slower it is though). > > Bootstrapped/regtested on x86_64-linux and i686-linux, tested > on SandyBridge too, AVX2 just eyeballed. > > 2011-09-21 Jakub Jelinek <jakub@redhat.com> > > * config/i386/i386.c (ix86_expand_sse_movcc): Use > blendvps, blendvpd and pblendvb if possible. > > * gcc.dg/vect/vect-cond-7.c: New test. > * gcc.target/i386/sse4_1-cond-1.c: New test. > * gcc.target/i386/avx-cond-1.c: New test. OK with a nit below: > --- gcc/config/i386/i386.c.jj 2011-09-20 22:21:35.000000000 +0200 > +++ gcc/config/i386/i386.c 2011-09-21 10:09:09.000000000 +0200 > @@ -18905,24 +18905,42 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp > } > else > { > - op_true = force_reg (mode, op_true); > + rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; > + > op_false = force_reg (mode, op_false); > + switch (mode) > + { > + case V4SFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvps; break; > + case V2DFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvpd; break; > + case V16QImode: if (TARGET_SSE4_1) gen = gen_sse4_1_pblendvb; break; > + case V8SFmode: if (TARGET_AVX) gen = gen_avx_blendvps256; break; > + case V4DFmode: if (TARGET_AVX) gen = gen_avx_blendvpd256; break; > + case V32QImode: if (TARGET_AVX2) gen = gen_avx2_pblendvb; break; > + default: break; gen = NULL; here instead of break. > + } Please also add appropriate line breaks in the above code... Thanks, Uros. ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible 2011-09-21 13:00 ` Uros Bizjak @ 2011-09-21 13:35 ` Jakub Jelinek 0 siblings, 0 replies; 6+ messages in thread From: Jakub Jelinek @ 2011-09-21 13:35 UTC (permalink / raw) To: Uros Bizjak; +Cc: Richard Henderson, gcc-patches, H.J. Lu On Wed, Sep 21, 2011 at 02:02:44PM +0200, Uros Bizjak wrote: > > + Â Â Â rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; > > + > > Â Â Â op_false = force_reg (mode, op_false); > > + Â Â Â switch (mode) > > + Â Â Â { > > + Â Â Â case V4SFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvps; break; > > + Â Â Â case V2DFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvpd; break; > > + Â Â Â case V16QImode: if (TARGET_SSE4_1) gen = gen_sse4_1_pblendvb; break; > > + Â Â Â case V8SFmode: if (TARGET_AVX) gen = gen_avx_blendvps256; break; > > + Â Â Â case V4DFmode: if (TARGET_AVX) gen = gen_avx_blendvpd256; break; > > + Â Â Â case V32QImode: if (TARGET_AVX2) gen = gen_avx2_pblendvb; break; > > + Â Â Â default: break; > > gen = NULL; here instead of break. > > > + Â Â Â } gen is already initialized to NULL above (as the assignments for other case labels are only conditional, it needs to be). I didn't want to write like: gen = TARGET_SSE4_1 ? gen_sse4_1_blendvps : NULL; break; > Please also add appropriate line breaks in the above code... Sure, will do. Jakub ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible 2011-09-21 12:59 [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible Jakub Jelinek 2011-09-21 13:00 ` Uros Bizjak @ 2011-09-21 15:03 ` Richard Henderson 2011-09-21 15:12 ` Jakub Jelinek 1 sibling, 1 reply; 6+ messages in thread From: Richard Henderson @ 2011-09-21 15:03 UTC (permalink / raw) To: Jakub Jelinek; +Cc: Uros Bizjak, gcc-patches, H.J. Lu On 09/21/2011 04:37 AM, Jakub Jelinek wrote: > op_false = force_reg (mode, op_false); > + switch (mode) > + { > + case V4SFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvps; break; > + case V2DFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvpd; break; > + case V16QImode: if (TARGET_SSE4_1) gen = gen_sse4_1_pblendvb; break; > + case V8SFmode: if (TARGET_AVX) gen = gen_avx_blendvps256; break; > + case V4DFmode: if (TARGET_AVX) gen = gen_avx_blendvpd256; break; > + case V32QImode: if (TARGET_AVX2) gen = gen_avx2_pblendvb; break; > + default: break; > + } pblendvb is applicable to all of the integer modes, not just QImode. You do have to frob the modes around, but it'll work. r~ ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible 2011-09-21 15:03 ` Richard Henderson @ 2011-09-21 15:12 ` Jakub Jelinek 2011-09-21 16:04 ` Richard Henderson 0 siblings, 1 reply; 6+ messages in thread From: Jakub Jelinek @ 2011-09-21 15:12 UTC (permalink / raw) To: Richard Henderson; +Cc: Uros Bizjak, gcc-patches, H.J. Lu On Wed, Sep 21, 2011 at 07:25:43AM -0700, Richard Henderson wrote: > On 09/21/2011 04:37 AM, Jakub Jelinek wrote: > > op_false = force_reg (mode, op_false); > > + switch (mode) > > + { > > + case V4SFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvps; break; > > + case V2DFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvpd; break; > > + case V16QImode: if (TARGET_SSE4_1) gen = gen_sse4_1_pblendvb; break; > > + case V8SFmode: if (TARGET_AVX) gen = gen_avx_blendvps256; break; > > + case V4DFmode: if (TARGET_AVX) gen = gen_avx_blendvpd256; break; > > + case V32QImode: if (TARGET_AVX2) gen = gen_avx2_pblendvb; break; > > + default: break; > > + } > > pblendvb is applicable to all of the integer modes, not just QImode. > You do have to frob the modes around, but it'll work. Good idea. So like this instead? 2011-09-21 Jakub Jelinek <jakub@redhat.com> * config/i386/i386.c (ix86_expand_sse_movcc): Use blendvps, blendvpd and pblendvb if possible. * gcc.dg/vect/vect-cond-7.c: New test. * gcc.target/i386/sse4_1-cond-1.c: New test. * gcc.target/i386/avx-cond-1.c: New test. --- gcc/config/i386/i386.c.jj 2011-09-21 16:32:10.878449912 +0200 +++ gcc/config/i386/i386.c 2011-09-21 16:37:46.531420718 +0200 @@ -18905,24 +18905,80 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp } else { - op_true = force_reg (mode, op_true); + rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; + op_false = force_reg (mode, op_false); - t2 = gen_reg_rtx (mode); - if (optimize) - t3 = gen_reg_rtx (mode); + switch (mode) + { + case V4SFmode: + if (TARGET_SSE4_1) + gen = gen_sse4_1_blendvps; + break; + case V2DFmode: + if (TARGET_SSE4_1) + gen = gen_sse4_1_blendvpd; + break; + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + if (TARGET_SSE4_1) + { + gen = gen_sse4_1_pblendvb; + dest = gen_lowpart (V16QImode, dest); + op_false = gen_lowpart (V16QImode, op_false); + op_true = gen_lowpart (V16QImode, op_true); + cmp = gen_lowpart (V16QImode, cmp); + } + break; + case V8SFmode: + if (TARGET_AVX) + gen = gen_avx_blendvps256; + break; + case V4DFmode: + if (TARGET_AVX) + gen = gen_avx_blendvpd256; + break; + case V32QImode: + case V16HImode: + case V8SImode: + case V4DImode: + if (TARGET_AVX2) + { + gen = gen_avx2_pblendvb; + dest = gen_lowpart (V32QImode, dest); + op_false = gen_lowpart (V32QImode, op_false); + op_true = gen_lowpart (V32QImode, op_true); + cmp = gen_lowpart (V32QImode, cmp); + } + break; + default: + break; + } + + if (gen != NULL) + emit_insn (gen (dest, op_false, op_true, cmp)); else - t3 = dest; + { + op_true = force_reg (mode, op_true); - x = gen_rtx_AND (mode, op_true, cmp); - emit_insn (gen_rtx_SET (VOIDmode, t2, x)); + t2 = gen_reg_rtx (mode); + if (optimize) + t3 = gen_reg_rtx (mode); + else + t3 = dest; + + x = gen_rtx_AND (mode, op_true, cmp); + emit_insn (gen_rtx_SET (VOIDmode, t2, x)); - x = gen_rtx_NOT (mode, cmp); - x = gen_rtx_AND (mode, x, op_false); - emit_insn (gen_rtx_SET (VOIDmode, t3, x)); + x = gen_rtx_NOT (mode, cmp); + x = gen_rtx_AND (mode, x, op_false); + emit_insn (gen_rtx_SET (VOIDmode, t3, x)); - x = gen_rtx_IOR (mode, t3, t2); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + x = gen_rtx_IOR (mode, t3, t2); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } } } --- gcc/testsuite/gcc.dg/vect/vect-cond-7.c.jj 2011-09-21 16:32:37.227546030 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-cond-7.c 2011-09-21 16:32:37.227546030 +0200 @@ -0,0 +1,68 @@ +#include "tree-vect.h" + +extern void abort (void); +double ad[64], bd[64], cd[64], dd[64], ed[64]; +float af[64], bf[64], cf[64], df[64], ef[64]; +signed char ac[64], bc[64], cc[64], dc[64], ec[64]; +short as[64], bs[64], cs[64], ds[64], es[64]; +int ai[64], bi[64], ci[64], di[64], ei[64]; +long long all[64], bll[64], cll[64], dll[64], ell[64]; +unsigned char auc[64], buc[64], cuc[64], duc[64], euc[64]; +unsigned short aus[64], bus[64], cus[64], dus[64], eus[64]; +unsigned int au[64], bu[64], cu[64], du[64], eu[64]; +unsigned long long aull[64], bull[64], cull[64], dull[64], eull[64]; + +#define F(var) \ +__attribute__((noinline, noclone)) void \ +f##var (void) \ +{ \ + int i; \ + for (i = 0; i < 64; i++) \ + { \ + __typeof (a##var[0]) d = d##var[i], e = e##var[i]; \ + a##var[i] = b##var[i] > c##var[i] ? d : e; \ + } \ +} + +#define TESTS \ +F (d) F (f) F (c) F (s) F (i) F (ll) F (uc) F (us) F (u) F (ull) + +TESTS + +int +main () +{ + int i; + + check_vect (); + for (i = 0; i < 64; i++) + { +#undef F +#define F(var) \ + b##var[i] = i + 64; \ + switch (i % 3) \ + { \ + case 0: c##var[i] = i + 64; break; \ + case 1: c##var[i] = 127 - i; break; \ + case 2: c##var[i] = i; break; \ + } \ + d##var[i] = i / 2; \ + e##var[i] = i * 2; + TESTS + } +#undef F +#define F(var) f##var (); + TESTS + for (i = 0; i < 64; i++) + { + asm volatile ("" : : : "memory"); +#undef F +#define F(var) \ + if (a##var[i] != (b##var[i] > c##var[i] ? d##var[i] : e##var[i])) \ + abort (); + TESTS + } + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ --- gcc/testsuite/gcc.target/i386/sse4_1-cond-1.c.jj 2011-09-21 16:32:37.228590863 +0200 +++ gcc/testsuite/gcc.target/i386/sse4_1-cond-1.c 2011-09-21 16:32:37.228590863 +0200 @@ -0,0 +1,75 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O3 -msse4.1" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +extern void abort (void); +double ad[64], bd[64], cd[64], dd[64], ed[64]; +float af[64], bf[64], cf[64], df[64], ef[64]; +signed char ac[64], bc[64], cc[64], dc[64], ec[64]; +short as[64], bs[64], cs[64], ds[64], es[64]; +int ai[64], bi[64], ci[64], di[64], ei[64]; +long long all[64], bll[64], cll[64], dll[64], ell[64]; +unsigned char auc[64], buc[64], cuc[64], duc[64], euc[64]; +unsigned short aus[64], bus[64], cus[64], dus[64], eus[64]; +unsigned int au[64], bu[64], cu[64], du[64], eu[64]; +unsigned long long aull[64], bull[64], cull[64], dull[64], eull[64]; + +#define F(var) \ +__attribute__((noinline, noclone)) void \ +f##var (void) \ +{ \ + int i; \ + for (i = 0; i < 64; i++) \ + { \ + __typeof (a##var[0]) d = d##var[i], e = e##var[i]; \ + a##var[i] = b##var[i] > c##var[i] ? d : e; \ + } \ +} + +#define TESTS \ +F (d) F (f) F (c) F (s) F (i) F (ll) F (uc) F (us) F (u) F (ull) + +TESTS + +void +TEST () +{ + int i; + for (i = 0; i < 64; i++) + { +#undef F +#define F(var) \ + b##var[i] = i + 64; \ + switch (i % 3) \ + { \ + case 0: c##var[i] = i + 64; break; \ + case 1: c##var[i] = 127 - i; break; \ + case 2: c##var[i] = i; break; \ + } \ + d##var[i] = i / 2; \ + e##var[i] = i * 2; + TESTS + } +#undef F +#define F(var) f##var (); + TESTS + for (i = 0; i < 64; i++) + { + asm volatile ("" : : : "memory"); +#undef F +#define F(var) \ + if (a##var[i] != (b##var[i] > c##var[i] ? d##var[i] : e##var[i])) \ + abort (); + TESTS + } +} --- gcc/testsuite/gcc.target/i386/avx-cond-1.c.jj 2011-09-21 16:32:37.228590863 +0200 +++ gcc/testsuite/gcc.target/i386/avx-cond-1.c 2011-09-21 16:32:37.229545353 +0200 @@ -0,0 +1,13 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mavx" } */ +/* { dg-require-effective-target avx_runtime } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include "sse4_1-cond-1.c" Jakub ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible 2011-09-21 15:12 ` Jakub Jelinek @ 2011-09-21 16:04 ` Richard Henderson 0 siblings, 0 replies; 6+ messages in thread From: Richard Henderson @ 2011-09-21 16:04 UTC (permalink / raw) To: Jakub Jelinek; +Cc: Uros Bizjak, gcc-patches, H.J. Lu On 09/21/2011 07:40 AM, Jakub Jelinek wrote: > * config/i386/i386.c (ix86_expand_sse_movcc): Use > blendvps, blendvpd and pblendvb if possible. > > * gcc.dg/vect/vect-cond-7.c: New test. > * gcc.target/i386/sse4_1-cond-1.c: New test. > * gcc.target/i386/avx-cond-1.c: New test. Ok. r~ ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2011-09-21 15:24 UTC | newest] Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2011-09-21 12:59 [PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible Jakub Jelinek 2011-09-21 13:00 ` Uros Bizjak 2011-09-21 13:35 ` Jakub Jelinek 2011-09-21 15:03 ` Richard Henderson 2011-09-21 15:12 ` Jakub Jelinek 2011-09-21 16:04 ` Richard Henderson
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).