public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH x86] Enable v64qi permutations.
@ 2014-12-04  9:50 Ilya Tocar
  2014-12-04 11:54 ` H.J. Lu
  2014-12-10 16:50 ` Richard Henderson
  0 siblings, 2 replies; 14+ messages in thread
From: Ilya Tocar @ 2014-12-04  9:50 UTC (permalink / raw)
  To: Uros Bizjak, Jakub Jelinek; +Cc: GCC Patches

Hi,

As discussed in https://gcc.gnu.org/ml/gcc-patches/2014-10/msg00473.html
This patch enables v64qi permutations.
I've checked  vshuf* tests from dg-torture.exp,
with avx512* options on sde and generated permutations are correct.

OK for trunk?

---
 gcc/config/i386/i386.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/config/i386/sse.md |  4 +--
 2 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index eafc15a..f29f8ce 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21831,6 +21831,10 @@ ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
       if (TARGET_AVX512VL && TARGET_AVX512BW)
 	gen = gen_avx512vl_vpermi2varv16hi3;
       break;
+    case V64QImode:
+      if (TARGET_AVX512VBMI)
+	gen = gen_avx512bw_vpermi2varv64qi3;
+      break;
     case V32HImode:
       if (TARGET_AVX512BW)
 	gen = gen_avx512bw_vpermi2varv32hi3;
@@ -48872,6 +48876,7 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
 	emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
       return true;
 
+    case V64QImode:
     case V32QImode:
     case V16HImode:
     case V8SImode:
@@ -48905,6 +48910,78 @@ expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
   return expand_vec_perm_broadcast_1 (d);
 }
 
+/* Implement arbitrary permutations of two V64QImode operands
+   will 2 vpermi2w, 2 vpshufb and one vpor instruction.  */
+static bool
+expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
+{
+  if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
+    return false;
+
+  if (d->testing_p)
+    return true;
+
+  struct expand_vec_perm_d ds[2];
+  rtx rperm[128], vperm, target0, target1;
+  unsigned int i, nelt;
+  machine_mode vmode;
+
+  nelt = d->nelt;
+  vmode = V64QImode;
+
+  for (i = 0; i < 2; i++)
+    {
+      ds[i] = *d;
+      ds[i].vmode = V32HImode;
+      ds[i].nelt = 32;
+      ds[i].target = gen_reg_rtx (V32HImode);
+      ds[i].op0 = gen_lowpart (V32HImode, d->op0);
+      ds[i].op1 = gen_lowpart (V32HImode, d->op1);
+    }
+
+  /* Prepare permutations such that the first one takes care of
+     putting the even bytes into the right positions or one higher
+     positions (ds[0]) and the second one takes care of
+     putting the odd bytes into the right positions or one below
+     (ds[1]).  */
+
+  for (i = 0; i < nelt; i++)
+    {
+      ds[i & 1].perm[i / 2] = d->perm[i] / 2;
+      if (i & 1)
+	{
+	  rperm[i] = constm1_rtx;
+	  rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
+	}
+      else
+	{
+	  rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
+	  rperm[i + 64] = constm1_rtx;
+	}
+    }
+
+  bool ok = expand_vec_perm_1 (&ds[0]);
+  gcc_assert (ok);
+  ds[0].target = gen_lowpart (V64QImode, ds[0].target);
+
+  ok = expand_vec_perm_1 (&ds[1]);
+  gcc_assert (ok);
+  ds[1].target = gen_lowpart (V64QImode, ds[1].target);
+
+  vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
+  vperm = force_reg (vmode, vperm);
+  target0 = gen_reg_rtx (V64QImode);
+  emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
+
+  vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
+  vperm = force_reg (vmode, vperm);
+  target1 = gen_reg_rtx (V64QImode);
+  emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
+
+  emit_insn (gen_iorv64qi3 (d->target, target0, target1));
+  return true;
+}
+
 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
    with 4 vpshufb insns, 2 vpermq and 3 vpor.  We should have already failed
    all the shorter instruction sequences.  */
@@ -49079,6 +49156,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
   if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
     return true;
 
+  if (expand_vec_perm_vpermi2_vpshub2 (d))
+    return true;
+
   /* ??? Look for narrow permutations whose element orderings would
      allow the promotion to a wider mode.  */
 
@@ -49223,6 +49303,11 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
 	/* All implementable with a single vpermi2 insn.  */
 	return true;
       break;
+    case V64QImode:
+      if (TARGET_AVX512BW)
+	/* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn.  */
+	return true;
+      break;
     case V8SImode:
     case V8SFmode:
     case V4DFmode:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ca5d720..6252e7e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10678,7 +10678,7 @@
    (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
    (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
    (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
-   (V32HI "TARGET_AVX512BW")])
+   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
 
 (define_expand "vec_perm<mode>"
   [(match_operand:VEC_PERM_AVX2 0 "register_operand")
@@ -10700,7 +10700,7 @@
    (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
    (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
    (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
-   (V32HI "TARGET_AVX512BW")])
+   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
 
 (define_expand "vec_perm_const<mode>"
   [(match_operand:VEC_PERM_CONST 0 "register_operand")
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-04  9:50 [PATCH x86] Enable v64qi permutations Ilya Tocar
@ 2014-12-04 11:54 ` H.J. Lu
  2014-12-04 11:57   ` Jakub Jelinek
  2014-12-10 16:50 ` Richard Henderson
  1 sibling, 1 reply; 14+ messages in thread
From: H.J. Lu @ 2014-12-04 11:54 UTC (permalink / raw)
  To: Ilya Tocar; +Cc: Uros Bizjak, Jakub Jelinek, GCC Patches

On Thu, Dec 4, 2014 at 1:49 AM, Ilya Tocar <tocarip.intel@gmail.com> wrote:
> Hi,
>
> As discussed in https://gcc.gnu.org/ml/gcc-patches/2014-10/msg00473.html
> This patch enables v64qi permutations.
> I've checked  vshuf* tests from dg-torture.exp,
> with avx512* options on sde and generated permutations are correct.
>
> OK for trunk?
>

Can you add a few testcases?


-- 
H.J.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-04 11:54 ` H.J. Lu
@ 2014-12-04 11:57   ` Jakub Jelinek
  2014-12-04 12:00     ` H.J. Lu
  0 siblings, 1 reply; 14+ messages in thread
From: Jakub Jelinek @ 2014-12-04 11:57 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Ilya Tocar, Uros Bizjak, GCC Patches

On Thu, Dec 04, 2014 at 03:54:25AM -0800, H.J. Lu wrote:
> On Thu, Dec 4, 2014 at 1:49 AM, Ilya Tocar <tocarip.intel@gmail.com> wrote:
> > Hi,
> >
> > As discussed in https://gcc.gnu.org/ml/gcc-patches/2014-10/msg00473.html
> > This patch enables v64qi permutations.
> > I've checked  vshuf* tests from dg-torture.exp,
> > with avx512* options on sde and generated permutations are correct.
> >
> > OK for trunk?
> >
> 
> Can you add a few testcases?

Isn't it already covered by gcc.dg/torture/vshuf* ?

	Jakub

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-04 11:57   ` Jakub Jelinek
@ 2014-12-04 12:00     ` H.J. Lu
  2014-12-04 12:04       ` Jakub Jelinek
  0 siblings, 1 reply; 14+ messages in thread
From: H.J. Lu @ 2014-12-04 12:00 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Ilya Tocar, Uros Bizjak, GCC Patches

On Thu, Dec 4, 2014 at 3:57 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Dec 04, 2014 at 03:54:25AM -0800, H.J. Lu wrote:
>> On Thu, Dec 4, 2014 at 1:49 AM, Ilya Tocar <tocarip.intel@gmail.com> wrote:
>> > Hi,
>> >
>> > As discussed in https://gcc.gnu.org/ml/gcc-patches/2014-10/msg00473.html
>> > This patch enables v64qi permutations.
>> > I've checked  vshuf* tests from dg-torture.exp,
>> > with avx512* options on sde and generated permutations are correct.
>> >
>> > OK for trunk?
>> >
>>
>> Can you add a few testcases?
>
> Isn't it already covered by gcc.dg/torture/vshuf* ?
>

I didn't see them fail on my machines today.


-- 
H.J.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-04 12:00     ` H.J. Lu
@ 2014-12-04 12:04       ` Jakub Jelinek
  2014-12-04 12:45         ` Uros Bizjak
  2014-12-04 12:57         ` H.J. Lu
  0 siblings, 2 replies; 14+ messages in thread
From: Jakub Jelinek @ 2014-12-04 12:04 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Ilya Tocar, Uros Bizjak, GCC Patches

On Thu, Dec 04, 2014 at 04:00:27AM -0800, H.J. Lu wrote:
> >> Can you add a few testcases?
> >
> > Isn't it already covered by gcc.dg/torture/vshuf* ?
> >
> 
> I didn't see them fail on my machines today.

Those are executable testcases, those better should not fail.
The patch just improved code generation and the testcases test
if the improved code generation works well.
Did you mean some scan-assembler test that verifies the better code
generation?  Guess it is possible, though fragile.

	Jakub

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-04 12:04       ` Jakub Jelinek
@ 2014-12-04 12:45         ` Uros Bizjak
  2014-12-04 12:51           ` Uros Bizjak
  2014-12-04 12:57         ` H.J. Lu
  1 sibling, 1 reply; 14+ messages in thread
From: Uros Bizjak @ 2014-12-04 12:45 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: H.J. Lu, Ilya Tocar, GCC Patches

On Thu, Dec 4, 2014 at 1:04 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Dec 04, 2014 at 04:00:27AM -0800, H.J. Lu wrote:
>> >> Can you add a few testcases?
>> >
>> > Isn't it already covered by gcc.dg/torture/vshuf* ?
>> >
>>
>> I didn't see them fail on my machines today.
>
> Those are executable testcases, those better should not fail.
> The patch just improved code generation and the testcases test
> if the improved code generation works well.
> Did you mean some scan-assembler test that verifies the better code
> generation?  Guess it is possible, though fragile.

I think that existing executable testcases adequately cover the
functionality of the patch.

The patch is OK.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-04 12:45         ` Uros Bizjak
@ 2014-12-04 12:51           ` Uros Bizjak
  2014-12-04 13:58             ` Ilya Tocar
  0 siblings, 1 reply; 14+ messages in thread
From: Uros Bizjak @ 2014-12-04 12:51 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: H.J. Lu, Ilya Tocar, GCC Patches

On Thu, Dec 4, 2014 at 1:45 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Dec 4, 2014 at 1:04 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>> On Thu, Dec 04, 2014 at 04:00:27AM -0800, H.J. Lu wrote:
>>> >> Can you add a few testcases?
>>> >
>>> > Isn't it already covered by gcc.dg/torture/vshuf* ?
>>> >
>>>
>>> I didn't see them fail on my machines today.
>>
>> Those are executable testcases, those better should not fail.
>> The patch just improved code generation and the testcases test
>> if the improved code generation works well.
>> Did you mean some scan-assembler test that verifies the better code
>> generation?  Guess it is possible, though fragile.
>
> I think that existing executable testcases adequately cover the
> functionality of the patch.
>
> The patch is OK.

BTW, the ChangeLog is missing.

index ca5d720..6252e7e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10678,7 +10678,7 @@
    (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
    (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
    (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
-   (V32HI "TARGET_AVX512BW")])
+   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])

I don't think change for VBMI target belongs in this patch.

Uros.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-04 12:04       ` Jakub Jelinek
  2014-12-04 12:45         ` Uros Bizjak
@ 2014-12-04 12:57         ` H.J. Lu
  1 sibling, 0 replies; 14+ messages in thread
From: H.J. Lu @ 2014-12-04 12:57 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Ilya Tocar, Uros Bizjak, GCC Patches

On Thu, Dec 4, 2014 at 4:04 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Dec 04, 2014 at 04:00:27AM -0800, H.J. Lu wrote:
>> >> Can you add a few testcases?
>> >
>> > Isn't it already covered by gcc.dg/torture/vshuf* ?
>> >
>>
>> I didn't see them fail on my machines today.
>
> Those are executable testcases, those better should not fail.
> The patch just improved code generation and the testcases test
> if the improved code generation works well.
> Did you mean some scan-assembler test that verifies the better code
> generation?  Guess it is possible, though fragile.

Well, we will never be sure that the better code is really generated
unless we visually exam the assembly code.  Any changes in
the future may disable the better code generation and we won't
notice until much later.


-- 
H.J.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-04 12:51           ` Uros Bizjak
@ 2014-12-04 13:58             ` Ilya Tocar
  2014-12-04 14:16               ` Uros Bizjak
  0 siblings, 1 reply; 14+ messages in thread
From: Ilya Tocar @ 2014-12-04 13:58 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Jakub Jelinek, H.J. Lu, GCC Patches

On 04 Dec 13:51, Uros Bizjak wrote:
> On Thu, Dec 4, 2014 at 1:45 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> > On Thu, Dec 4, 2014 at 1:04 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> >> On Thu, Dec 04, 2014 at 04:00:27AM -0800, H.J. Lu wrote:
> >>> >> Can you add a few testcases?
> >>> >
> >>> > Isn't it already covered by gcc.dg/torture/vshuf* ?
> >>> >
> >>>
> >>> I didn't see them fail on my machines today.
> >>
> >> Those are executable testcases, those better should not fail.
> >> The patch just improved code generation and the testcases test
> >> if the improved code generation works well.
> >> Did you mean some scan-assembler test that verifies the better code
> >> generation?  Guess it is possible, though fragile.
> >
> > I think that existing executable testcases adequately cover the
> > functionality of the patch.
> >
> > The patch is OK.
> 
> BTW, the ChangeLog is missing.
> 
	* config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Handle v64qi.
	(expand_vec_perm_broadcast_1): Ditto.
	(expand_vec_perm_vpermi2_vpshub2): New.
	(ix86_expand_vec_perm_const_1): Use it.
	(ix86_vectorize_vec_perm_const_ok): Handle v64qi.
	* config/i386/sse.md (VEC_PERM_AVX2): Add v64qi.
	(VEC_PERM_CONST): Ditto.
> index ca5d720..6252e7e 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -10678,7 +10678,7 @@
>     (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
>     (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
>     (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
> -   (V32HI "TARGET_AVX512BW")])
> +   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
> 
> I don't think change for VBMI target belongs in this patch.
>
Those changes enable non-const v64qi permutes
(via single vpermi2b insn), should I split them into separate patch?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-04 13:58             ` Ilya Tocar
@ 2014-12-04 14:16               ` Uros Bizjak
  2014-12-05 16:33                 ` Ilya Tocar
  0 siblings, 1 reply; 14+ messages in thread
From: Uros Bizjak @ 2014-12-04 14:16 UTC (permalink / raw)
  To: Ilya Tocar; +Cc: Jakub Jelinek, H.J. Lu, GCC Patches

On Thu, Dec 4, 2014 at 2:53 PM, Ilya Tocar <tocarip.intel@gmail.com> wrote:

>> >>> >> Can you add a few testcases?
>> >>> >
>> >>> > Isn't it already covered by gcc.dg/torture/vshuf* ?
>> >>> >
>> >>>
>> >>> I didn't see them fail on my machines today.
>> >>
>> >> Those are executable testcases, those better should not fail.
>> >> The patch just improved code generation and the testcases test
>> >> if the improved code generation works well.
>> >> Did you mean some scan-assembler test that verifies the better code
>> >> generation?  Guess it is possible, though fragile.
>> >
>> > I think that existing executable testcases adequately cover the
>> > functionality of the patch.
>> >
>> > The patch is OK.
>>
>> BTW, the ChangeLog is missing.
>>
>         * config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Handle v64qi.
>         (expand_vec_perm_broadcast_1): Ditto.
>         (expand_vec_perm_vpermi2_vpshub2): New.
>         (ix86_expand_vec_perm_const_1): Use it.
>         (ix86_vectorize_vec_perm_const_ok): Handle v64qi.
>         * config/i386/sse.md (VEC_PERM_AVX2): Add v64qi.
>         (VEC_PERM_CONST): Ditto.
>> index ca5d720..6252e7e 100644
>> --- a/gcc/config/i386/sse.md
>> +++ b/gcc/config/i386/sse.md
>> @@ -10678,7 +10678,7 @@
>>     (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
>>     (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
>>     (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
>> -   (V32HI "TARGET_AVX512BW")])
>> +   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
>>
>> I don't think change for VBMI target belongs in this patch.
>>
> Those changes enable non-const v64qi permutes
> (via single vpermi2b insn), should I split them into separate patch?

If they are not on the same topic, then please yes. Please don't mix
separate issues together.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-04 14:16               ` Uros Bizjak
@ 2014-12-05 16:33                 ` Ilya Tocar
  2014-12-06  8:57                   ` Uros Bizjak
  0 siblings, 1 reply; 14+ messages in thread
From: Ilya Tocar @ 2014-12-05 16:33 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Jakub Jelinek, H.J. Lu, GCC Patches

On 04 Dec 15:16, Uros Bizjak wrote:
> On Thu, Dec 4, 2014 at 2:53 PM, Ilya Tocar <tocarip.intel@gmail.com> wrote:
> 
> >> >>> >> Can you add a few testcases?
> >> >>> >
> >> >>> > Isn't it already covered by gcc.dg/torture/vshuf* ?
> >> >>> >
> >> >>>
> >> >>> I didn't see them fail on my machines today.
> >> >>
> >> >> Those are executable testcases, those better should not fail.
> >> >> The patch just improved code generation and the testcases test
> >> >> if the improved code generation works well.
> >> >> Did you mean some scan-assembler test that verifies the better code
> >> >> generation?  Guess it is possible, though fragile.
> >> >
> >> > I think that existing executable testcases adequately cover the
> >> > functionality of the patch.
> >> >
> >> > The patch is OK.
> >>
> >> BTW, the ChangeLog is missing.
> >>
> >         * config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Handle v64qi.
> >         (expand_vec_perm_broadcast_1): Ditto.
> >         (expand_vec_perm_vpermi2_vpshub2): New.
> >         (ix86_expand_vec_perm_const_1): Use it.
> >         (ix86_vectorize_vec_perm_const_ok): Handle v64qi.
> >         * config/i386/sse.md (VEC_PERM_AVX2): Add v64qi.
> >         (VEC_PERM_CONST): Ditto.
> >> index ca5d720..6252e7e 100644
> >> --- a/gcc/config/i386/sse.md
> >> +++ b/gcc/config/i386/sse.md
> >> @@ -10678,7 +10678,7 @@
> >>     (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
> >>     (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
> >>     (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
> >> -   (V32HI "TARGET_AVX512BW")])
> >> +   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
> >>
> >> I don't think change for VBMI target belongs in this patch.
> >>
> > Those changes enable non-const v64qi permutes
> > (via single vpermi2b insn), should I split them into separate patch?
> 
> If they are not on the same topic, then please yes. Please don't mix
> separate issues together.
>
OK.
Patch bellow adds variable v64qi permutations.
OK for trunk?
(I plan to commit both of them simultaneously, if this part is approved)

         * config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Handle v64qi.
         * config/i386/sse.md (VEC_PERM_AVX2): Add v64qi.
---
 gcc/config/i386/i386.c | 4 ++++
 gcc/config/i386/sse.md | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ce5dfad..c4dbf78 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21831,6 +21831,10 @@ ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
       if (TARGET_AVX512VL && TARGET_AVX512BW)
 	gen = gen_avx512vl_vpermi2varv16hi3;
       break;
+    case V64QImode:
+      if (TARGET_AVX512VBMI)
+	gen = gen_avx512bw_vpermi2varv64qi3;
+      break;
     case V32HImode:
       if (TARGET_AVX512BW)
 	gen = gen_avx512bw_vpermi2varv32hi3;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 734e6b4..cfbe40c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10691,7 +10691,7 @@
    (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
    (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
    (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
-   (V32HI "TARGET_AVX512BW")])
+   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
 
 (define_expand "vec_perm<mode>"
   [(match_operand:VEC_PERM_AVX2 0 "register_operand")
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-05 16:33                 ` Ilya Tocar
@ 2014-12-06  8:57                   ` Uros Bizjak
  0 siblings, 0 replies; 14+ messages in thread
From: Uros Bizjak @ 2014-12-06  8:57 UTC (permalink / raw)
  To: Ilya Tocar; +Cc: Jakub Jelinek, H.J. Lu, GCC Patches

On Fri, Dec 5, 2014 at 5:33 PM, Ilya Tocar <tocarip.intel@gmail.com> wrote:
> On 04 Dec 15:16, Uros Bizjak wrote:
>> On Thu, Dec 4, 2014 at 2:53 PM, Ilya Tocar <tocarip.intel@gmail.com> wrote:
>>
>> >> >>> >> Can you add a few testcases?
>> >> >>> >
>> >> >>> > Isn't it already covered by gcc.dg/torture/vshuf* ?
>> >> >>> >
>> >> >>>
>> >> >>> I didn't see them fail on my machines today.
>> >> >>
>> >> >> Those are executable testcases, those better should not fail.
>> >> >> The patch just improved code generation and the testcases test
>> >> >> if the improved code generation works well.
>> >> >> Did you mean some scan-assembler test that verifies the better code
>> >> >> generation?  Guess it is possible, though fragile.
>> >> >
>> >> > I think that existing executable testcases adequately cover the
>> >> > functionality of the patch.
>> >> >
>> >> > The patch is OK.
>> >>
>> >> BTW, the ChangeLog is missing.
>> >>
>> >         * config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Handle v64qi.
>> >         (expand_vec_perm_broadcast_1): Ditto.
>> >         (expand_vec_perm_vpermi2_vpshub2): New.
>> >         (ix86_expand_vec_perm_const_1): Use it.
>> >         (ix86_vectorize_vec_perm_const_ok): Handle v64qi.
>> >         * config/i386/sse.md (VEC_PERM_AVX2): Add v64qi.
>> >         (VEC_PERM_CONST): Ditto.
>> >> index ca5d720..6252e7e 100644
>> >> --- a/gcc/config/i386/sse.md
>> >> +++ b/gcc/config/i386/sse.md
>> >> @@ -10678,7 +10678,7 @@
>> >>     (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
>> >>     (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
>> >>     (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
>> >> -   (V32HI "TARGET_AVX512BW")])
>> >> +   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
>> >>
>> >> I don't think change for VBMI target belongs in this patch.
>> >>
>> > Those changes enable non-const v64qi permutes
>> > (via single vpermi2b insn), should I split them into separate patch?
>>
>> If they are not on the same topic, then please yes. Please don't mix
>> separate issues together.
>>
> OK.
> Patch bellow adds variable v64qi permutations.
> OK for trunk?
>
> (I plan to commit both of them simultaneously, if this part is approved)
>
>          * config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Handle v64qi.
>          * config/i386/sse.md (VEC_PERM_AVX2): Add v64qi.

OK.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-04  9:50 [PATCH x86] Enable v64qi permutations Ilya Tocar
  2014-12-04 11:54 ` H.J. Lu
@ 2014-12-10 16:50 ` Richard Henderson
  2014-12-10 16:52   ` Robert Dewar
  1 sibling, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2014-12-10 16:50 UTC (permalink / raw)
  To: Ilya Tocar, Uros Bizjak, Jakub Jelinek; +Cc: GCC Patches

On 12/04/2014 01:49 AM, Ilya Tocar wrote:
> +  if (!TARGET_AVX512BW || !(d->vmode == V64QImode))

Please don't over-complicate the expression.
Use x != y instead of !(x == y).


r~

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH x86] Enable v64qi permutations.
  2014-12-10 16:50 ` Richard Henderson
@ 2014-12-10 16:52   ` Robert Dewar
  0 siblings, 0 replies; 14+ messages in thread
From: Robert Dewar @ 2014-12-10 16:52 UTC (permalink / raw)
  To: Richard Henderson, Ilya Tocar, Uros Bizjak, Jakub Jelinek; +Cc: GCC Patches

On 12/10/2014 11:49 AM, Richard Henderson wrote:
> On 12/04/2014 01:49 AM, Ilya Tocar wrote:
>> +  if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
>
> Please don't over-complicate the expression.
> Use x != y instead of !(x == y).

To me the original reads more clearly, since it
is of the parallel form !X or !Y, I don't see it
as somehow more complicated???
>
>
> r~
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2014-12-10 16:52 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-04  9:50 [PATCH x86] Enable v64qi permutations Ilya Tocar
2014-12-04 11:54 ` H.J. Lu
2014-12-04 11:57   ` Jakub Jelinek
2014-12-04 12:00     ` H.J. Lu
2014-12-04 12:04       ` Jakub Jelinek
2014-12-04 12:45         ` Uros Bizjak
2014-12-04 12:51           ` Uros Bizjak
2014-12-04 13:58             ` Ilya Tocar
2014-12-04 14:16               ` Uros Bizjak
2014-12-05 16:33                 ` Ilya Tocar
2014-12-06  8:57                   ` Uros Bizjak
2014-12-04 12:57         ` H.J. Lu
2014-12-10 16:50 ` Richard Henderson
2014-12-10 16:52   ` Robert Dewar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).