* [PATCH] Fix up AVX512 andnot (PR target/70525)
@ 2016-04-04 17:44 Jakub Jelinek
2016-04-05 7:54 ` Uros Bizjak
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2016-04-04 17:44 UTC (permalink / raw)
To: Uros Bizjak, Kirill Yukhin; +Cc: gcc-patches
Hi!
This patch fixes various *andnot<mode>3* issues. There are two issues on
the ISA side that makes stuff harder for andnot: there are no VPANDNB and
VPANDNW instructions, and while there used to be just VPANDN instruction
in AVX/AVX2, there is only VPANDND and VPANDNQ in EVEX.
The patch changes:
1) simplifies asserts, TARGET_AVX512VL implies both TARGET_AVX2 and
TARGET_SSE2, so asserts like TARGET_AVX2 || TARGET_AVX512VL make no
sense
2) for V32HImode/V64QImode it emits vpandnq instruction, rather than
vpandn that fails to assemble
3) the *andnot<mode>3 pattern clearly wasn't expecting subst, but
as it used (copy-paste?) <mask_operand3_1> in the template, it actually
was substed, which is wrong - we can't implement V64QImode or V32HImode
masking of andnot (well, not in a single instruction); checked
this was the only case of <mask_oeprand3_1> used in define_insn
without <mask_name>; for V*[SD]Imode *andnot<mode>3_mask pattern
should DTRT
4) the *andnot<mode>3_mask pattern makes no sense, for similar reasons
- VPANDNB and VPANDNW are not in the ISA, not even with AVX512-BW
5) formatting fixes
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2016-04-04 Jakub Jelinek <jakub@redhat.com>
PR target/70525
* config/i386/sse.md (*andnot<mode>3): Simplify assertions.
Use vpandn<ssemodesuffix> for V16SI/V8DImode, vpandnq for
V32HI/V64QImode, don't use <mask_operand3_1>, fix up formatting.
(*andnot<mode>3_mask): Remove insn with VI12_AVX512VL iterator.
* gcc.target/i386/pr70525.c: New test.
--- gcc/config/i386/sse.md.jj 2016-04-01 17:21:31.000000000 +0200
+++ gcc/config/i386/sse.md 2016-04-04 14:42:06.296867515 +0200
@@ -11377,45 +11377,46 @@ (define_insn "*andnot<mode>3"
case MODE_XI:
gcc_assert (TARGET_AVX512F);
case MODE_OI:
- gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
+ gcc_assert (TARGET_AVX2);
case MODE_TI:
- gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
+ gcc_assert (TARGET_SSE2);
switch (<MODE>mode)
- {
- case V16SImode:
- case V8DImode:
- if (TARGET_AVX512F)
- {
- tmp = "pandn<ssemodesuffix>";
- break;
- }
- case V8SImode:
- case V4DImode:
- case V4SImode:
- case V2DImode:
- if (TARGET_AVX512VL)
- {
- tmp = "pandn<ssemodesuffix>";
- break;
- }
- default:
- tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
- }
+ {
+ case V64QImode:
+ case V32HImode:
+ /* There is no vpandnb or vpandnw instruction, nor vpandn for
+ 512-bit vectors. Use vpandnq instead. */
+ tmp = "pandnq";
+ break;
+ case V16SImode:
+ case V8DImode:
+ tmp = "pandn<ssemodesuffix>";
+ break;
+ case V8SImode:
+ case V4DImode:
+ case V4SImode:
+ case V2DImode:
+ tmp = TARGET_AVX512VL ? "pandn<ssemodesuffix>" : "pandn";
+ break;
+ default:
+ tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
+ break;
+ }
break;
- case MODE_V16SF:
+ case MODE_V16SF:
gcc_assert (TARGET_AVX512F);
- case MODE_V8SF:
+ case MODE_V8SF:
gcc_assert (TARGET_AVX);
- case MODE_V4SF:
+ case MODE_V4SF:
gcc_assert (TARGET_SSE);
tmp = "andnps";
break;
- default:
+ default:
gcc_unreachable ();
- }
+ }
switch (which_alternative)
{
@@ -11423,7 +11424,7 @@ (define_insn "*andnot<mode>3"
ops = "%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
- ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
+ ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
break;
default:
gcc_unreachable ();
@@ -11471,21 +11472,6 @@ (define_insn "*andnot<mode>3_mask"
"vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "*andnot<mode>3_mask"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
- (vec_merge:VI12_AVX512VL
- (and:VI12_AVX512VL
- (not:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 1 "register_operand" "v"))
- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
- (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
- (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
- "TARGET_AVX512BW"
- "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_expand "<code><mode>3"
--- gcc/testsuite/gcc.target/i386/pr70525.c.jj 2016-04-04 15:13:23.417615588 +0200
+++ gcc/testsuite/gcc.target/i386/pr70525.c 2016-04-04 15:13:04.000000000 +0200
@@ -0,0 +1,32 @@
+/* PR target/70525 */
+/* { dg-do assemble { target avx512bw } } */
+/* { dg-options "-O2 -mavx512bw -mno-avx512vl" } */
+
+typedef char v64qi __attribute__ ((vector_size (64)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef int v16si __attribute__ ((vector_size (64)));
+typedef long long v8di __attribute__ ((vector_size (64)));
+
+v64qi
+f1 (v64qi x, v64qi y)
+{
+ return x & ~y;
+}
+
+v32hi
+f2 (v32hi x, v32hi y)
+{
+ return x & ~y;
+}
+
+v16si
+f3 (v16si x, v16si y)
+{
+ return x & ~y;
+}
+
+v8di
+f4 (v8di x, v8di y)
+{
+ return x & ~y;
+}
Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] Fix up AVX512 andnot (PR target/70525)
2016-04-04 17:44 [PATCH] Fix up AVX512 andnot (PR target/70525) Jakub Jelinek
@ 2016-04-05 7:54 ` Uros Bizjak
0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2016-04-05 7:54 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Kirill Yukhin, gcc-patches
On Mon, Apr 4, 2016 at 7:44 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> This patch fixes various *andnot<mode>3* issues. There are two issues on
> the ISA side that makes stuff harder for andnot: there are no VPANDNB and
> VPANDNW instructions, and while there used to be just VPANDN instruction
> in AVX/AVX2, there is only VPANDND and VPANDNQ in EVEX.
> The patch changes:
> 1) simplifies asserts, TARGET_AVX512VL implies both TARGET_AVX2 and
> TARGET_SSE2, so asserts like TARGET_AVX2 || TARGET_AVX512VL make no
> sense
> 2) for V32HImode/V64QImode it emits vpandnq instruction, rather than
> vpandn that fails to assemble
> 3) the *andnot<mode>3 pattern clearly wasn't expecting subst, but
> as it used (copy-paste?) <mask_operand3_1> in the template, it actually
> was substed, which is wrong - we can't implement V64QImode or V32HImode
> masking of andnot (well, not in a single instruction); checked
> this was the only case of <mask_oeprand3_1> used in define_insn
> without <mask_name>; for V*[SD]Imode *andnot<mode>3_mask pattern
> should DTRT
> 4) the *andnot<mode>3_mask pattern makes no sense, for similar reasons
> - VPANDNB and VPANDNW are not in the ISA, not even with AVX512-BW
> 5) formatting fixes
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2016-04-04 Jakub Jelinek <jakub@redhat.com>
>
> PR target/70525
> * config/i386/sse.md (*andnot<mode>3): Simplify assertions.
> Use vpandn<ssemodesuffix> for V16SI/V8DImode, vpandnq for
> V32HI/V64QImode, don't use <mask_operand3_1>, fix up formatting.
> (*andnot<mode>3_mask): Remove insn with VI12_AVX512VL iterator.
>
> * gcc.target/i386/pr70525.c: New test.
OK.
Thanks,
Uros.
> --- gcc/config/i386/sse.md.jj 2016-04-01 17:21:31.000000000 +0200
> +++ gcc/config/i386/sse.md 2016-04-04 14:42:06.296867515 +0200
> @@ -11377,45 +11377,46 @@ (define_insn "*andnot<mode>3"
> case MODE_XI:
> gcc_assert (TARGET_AVX512F);
> case MODE_OI:
> - gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
> + gcc_assert (TARGET_AVX2);
> case MODE_TI:
> - gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
> + gcc_assert (TARGET_SSE2);
> switch (<MODE>mode)
> - {
> - case V16SImode:
> - case V8DImode:
> - if (TARGET_AVX512F)
> - {
> - tmp = "pandn<ssemodesuffix>";
> - break;
> - }
> - case V8SImode:
> - case V4DImode:
> - case V4SImode:
> - case V2DImode:
> - if (TARGET_AVX512VL)
> - {
> - tmp = "pandn<ssemodesuffix>";
> - break;
> - }
> - default:
> - tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
> - }
> + {
> + case V64QImode:
> + case V32HImode:
> + /* There is no vpandnb or vpandnw instruction, nor vpandn for
> + 512-bit vectors. Use vpandnq instead. */
> + tmp = "pandnq";
> + break;
> + case V16SImode:
> + case V8DImode:
> + tmp = "pandn<ssemodesuffix>";
> + break;
> + case V8SImode:
> + case V4DImode:
> + case V4SImode:
> + case V2DImode:
> + tmp = TARGET_AVX512VL ? "pandn<ssemodesuffix>" : "pandn";
> + break;
> + default:
> + tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
> + break;
> + }
> break;
>
> - case MODE_V16SF:
> + case MODE_V16SF:
> gcc_assert (TARGET_AVX512F);
> - case MODE_V8SF:
> + case MODE_V8SF:
> gcc_assert (TARGET_AVX);
> - case MODE_V4SF:
> + case MODE_V4SF:
> gcc_assert (TARGET_SSE);
>
> tmp = "andnps";
> break;
>
> - default:
> + default:
> gcc_unreachable ();
> - }
> + }
>
> switch (which_alternative)
> {
> @@ -11423,7 +11424,7 @@ (define_insn "*andnot<mode>3"
> ops = "%s\t{%%2, %%0|%%0, %%2}";
> break;
> case 1:
> - ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
> + ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
> break;
> default:
> gcc_unreachable ();
> @@ -11471,21 +11472,6 @@ (define_insn "*andnot<mode>3_mask"
> "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
> [(set_attr "type" "sselog")
> (set_attr "prefix" "evex")
> - (set_attr "mode" "<sseinsnmode>")])
> -
> -(define_insn "*andnot<mode>3_mask"
> - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
> - (vec_merge:VI12_AVX512VL
> - (and:VI12_AVX512VL
> - (not:VI12_AVX512VL
> - (match_operand:VI12_AVX512VL 1 "register_operand" "v"))
> - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
> - (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
> - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
> - "TARGET_AVX512BW"
> - "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
> - [(set_attr "type" "sselog")
> - (set_attr "prefix" "evex")
> (set_attr "mode" "<sseinsnmode>")])
>
> (define_expand "<code><mode>3"
> --- gcc/testsuite/gcc.target/i386/pr70525.c.jj 2016-04-04 15:13:23.417615588 +0200
> +++ gcc/testsuite/gcc.target/i386/pr70525.c 2016-04-04 15:13:04.000000000 +0200
> @@ -0,0 +1,32 @@
> +/* PR target/70525 */
> +/* { dg-do assemble { target avx512bw } } */
> +/* { dg-options "-O2 -mavx512bw -mno-avx512vl" } */
> +
> +typedef char v64qi __attribute__ ((vector_size (64)));
> +typedef short v32hi __attribute__ ((vector_size (64)));
> +typedef int v16si __attribute__ ((vector_size (64)));
> +typedef long long v8di __attribute__ ((vector_size (64)));
> +
> +v64qi
> +f1 (v64qi x, v64qi y)
> +{
> + return x & ~y;
> +}
> +
> +v32hi
> +f2 (v32hi x, v32hi y)
> +{
> + return x & ~y;
> +}
> +
> +v16si
> +f3 (v16si x, v16si y)
> +{
> + return x & ~y;
> +}
> +
> +v8di
> +f4 (v8di x, v8di y)
> +{
> + return x & ~y;
> +}
>
> Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2016-04-05 7:54 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-04 17:44 [PATCH] Fix up AVX512 andnot (PR target/70525) Jakub Jelinek
2016-04-05 7:54 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).