* [PATCH] Fix avx512 vpermq (PR target/79812)
@ 2017-03-03 19:42 Jakub Jelinek
2017-03-06 10:03 ` Uros Bizjak
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2017-03-03 19:42 UTC (permalink / raw)
To: Uros Bizjak, Kirill Yukhin; +Cc: gcc-patches
Hi!
vpermq/vpermpd instructions for 512-bit vectors use bogus RTL and if
we happen to simplify-rtx.c it, we ICE.
The problem is that for V8D[IF]mode VEC_SELECT we need to use a PARALLEL
with 8 elements, not 4.
The <avx512>_vec_dup<mode>_1 change is unrelated to this, spotted
first by manual inspection and verified by the genrecog.c verify_pattern
patch; the broadcast wants to broadcast the first element, so it should be
a scalar vec_select in vec_duplicate, rather than same size vector as
vec_select's operand.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2017-03-03 Jakub Jelinek <jakub@redhat.com>
PR target/79812
* config/i386/sse.md (VI8F_256_512): Remove mode iterator.
(<avx2_avx512>_perm<mode>): Rename to ...
(avx2_perm<mode>): ... this. Use VI8F_256 iterator instead
of VI8F_256_512.
(<avx512>_perm<mode>_mask): Rename to ...
(avx512vl_perm<mode>_mask): ... this. Use VI8F_256 iterator instead
of VI8F_256_512.
(<avx2_avx512>_perm<mode>_1<mask_name>): Rename to ...
(avx2_perm<mode>_1<mask_name): ... this. Use VI8F_256 iterator
instead of VI8F_256_512.
(avx512f_perm<mode>): New define_expand.
(avx512f_perm<mode>_mask): Likewise.
(avx512f_perm<mode>_1<mask_name>): New define_insn.
(<avx512>_vec_dup<mode>_1): Fix up vec_select mode.
* gcc.target/i386/avx512f-vpermq-imm-3.c: New test.
--- gcc/config/i386/sse.md.jj 2017-03-02 10:19:07.000000000 +0100
+++ gcc/config/i386/sse.md 2017-03-03 16:10:42.317111636 +0100
@@ -549,8 +549,6 @@ (define_mode_iterator VI4F_128 [V4SI V4S
(define_mode_iterator VI8F_128 [V2DI V2DF])
(define_mode_iterator VI4F_256 [V8SI V8SF])
(define_mode_iterator VI8F_256 [V4DI V4DF])
-(define_mode_iterator VI8F_256_512
- [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
(define_mode_iterator VI48F_256_512
[V8SI V8SF
(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
@@ -17306,43 +17304,43 @@ (define_insn "<avx512>_permvar<mode><mas
(set_attr "prefix" "<mask_prefix2>")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "<avx2_avx512>_perm<mode>"
- [(match_operand:VI8F_256_512 0 "register_operand")
- (match_operand:VI8F_256_512 1 "nonimmediate_operand")
+(define_expand "avx2_perm<mode>"
+ [(match_operand:VI8F_256 0 "register_operand")
+ (match_operand:VI8F_256 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_255_operand")]
"TARGET_AVX2"
{
int mask = INTVAL (operands[2]);
- emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
- GEN_INT ((mask >> 0) & 3),
- GEN_INT ((mask >> 2) & 3),
- GEN_INT ((mask >> 4) & 3),
- GEN_INT ((mask >> 6) & 3)));
+ emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3)));
DONE;
})
-(define_expand "<avx512>_perm<mode>_mask"
- [(match_operand:VI8F_256_512 0 "register_operand")
- (match_operand:VI8F_256_512 1 "nonimmediate_operand")
+(define_expand "avx512vl_perm<mode>_mask"
+ [(match_operand:VI8F_256 0 "register_operand")
+ (match_operand:VI8F_256 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_255_operand")
- (match_operand:VI8F_256_512 3 "vector_move_operand")
+ (match_operand:VI8F_256 3 "vector_move_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
- "TARGET_AVX512F"
+ "TARGET_AVX512VL"
{
int mask = INTVAL (operands[2]);
emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
- GEN_INT ((mask >> 0) & 3),
- GEN_INT ((mask >> 2) & 3),
- GEN_INT ((mask >> 4) & 3),
- GEN_INT ((mask >> 6) & 3),
- operands[3], operands[4]));
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ operands[3], operands[4]));
DONE;
})
-(define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
- [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
- (vec_select:VI8F_256_512
- (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
+(define_insn "avx2_perm<mode>_1<mask_name>"
+ [(set (match_operand:VI8F_256 0 "register_operand" "=v")
+ (vec_select:VI8F_256
+ (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -17361,6 +17359,77 @@ (define_insn "<avx2_avx512>_perm<mode>_1
(set_attr "prefix" "<mask_prefix2>")
(set_attr "mode" "<sseinsnmode>")])
+(define_expand "avx512f_perm<mode>"
+ [(match_operand:V8FI 0 "register_operand")
+ (match_operand:V8FI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4)));
+ DONE;
+})
+
+(define_expand "avx512f_perm<mode>_mask"
+ [(match_operand:V8FI 0 "register_operand")
+ (match_operand:V8FI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V8FI 3 "vector_move_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4),
+ operands[3], operands[4]));
+ DONE;
+})
+
+(define_insn "avx512f_perm<mode>_1<mask_name>"
+ [(set (match_operand:V8FI 0 "register_operand" "=v")
+ (vec_select:V8FI
+ (match_operand:V8FI 1 "nonimmediate_operand" "vm")
+ (parallel [(match_operand 2 "const_0_to_3_operand")
+ (match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand")
+ (match_operand 5 "const_0_to_3_operand")
+ (match_operand 6 "const_4_to_7_operand")
+ (match_operand 7 "const_4_to_7_operand")
+ (match_operand 8 "const_4_to_7_operand")
+ (match_operand 9 "const_4_to_7_operand")])))]
+ "TARGET_AVX512F && <mask_mode512bit_condition>
+ && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
+ && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
+ && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
+ && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+ return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "<mask_prefix2>")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "avx2_permv2ti"
[(set (match_operand:V4DI 0 "register_operand" "=x")
(unspec:V4DI
@@ -17389,7 +17458,7 @@ (define_insn "avx2_vec_dupv4df"
(define_insn "<avx512>_vec_dup<mode>_1"
[(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
(vec_duplicate:VI_AVX512BW
- (vec_select:VI_AVX512BW
+ (vec_select:<ssescalarmode>
(match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
(parallel [(const_int 0)]))))]
"TARGET_AVX512F"
--- gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-3.c.jj 2017-03-03 16:13:19.852037848 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-3.c 2017-03-03 16:14:14.952312508 +0100
@@ -0,0 +1,5 @@
+/* PR target/79812 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512f" } */
+
+#include "avx512f-vpermq-imm-2.c"
Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] Fix avx512 vpermq (PR target/79812)
2017-03-03 19:42 [PATCH] Fix avx512 vpermq (PR target/79812) Jakub Jelinek
@ 2017-03-06 10:03 ` Uros Bizjak
0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2017-03-06 10:03 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Kirill Yukhin, gcc-patches
On Fri, Mar 3, 2017 at 8:42 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> vpermq/vpermpd instructions for 512-bit vectors use bogus RTL and if
> we happen to simplify-rtx.c it, we ICE.
> The problem is that for V8D[IF]mode VEC_SELECT we need to use a PARALLEL
> with 8 elements, not 4.
> The <avx512>_vec_dup<mode>_1 change is unrelated to this, spotted
> first by manual inspection and verified by the genrecog.c verify_pattern
> patch; the broadcast wants to broadcast the first element, so it should be
> a scalar vec_select in vec_duplicate, rather than same size vector as
> vec_select's operand.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2017-03-03 Jakub Jelinek <jakub@redhat.com>
>
> PR target/79812
> * config/i386/sse.md (VI8F_256_512): Remove mode iterator.
> (<avx2_avx512>_perm<mode>): Rename to ...
> (avx2_perm<mode>): ... this. Use VI8F_256 iterator instead
> of VI8F_256_512.
> (<avx512>_perm<mode>_mask): Rename to ...
> (avx512vl_perm<mode>_mask): ... this. Use VI8F_256 iterator instead
> of VI8F_256_512.
> (<avx2_avx512>_perm<mode>_1<mask_name>): Rename to ...
> (avx2_perm<mode>_1<mask_name): ... this. Use VI8F_256 iterator
> instead of VI8F_256_512.
> (avx512f_perm<mode>): New define_expand.
> (avx512f_perm<mode>_mask): Likewise.
> (avx512f_perm<mode>_1<mask_name>): New define_insn.
> (<avx512>_vec_dup<mode>_1): Fix up vec_select mode.
>
> * gcc.target/i386/avx512f-vpermq-imm-3.c: New test.
LGTM.
Thanks,
Uros.
> --- gcc/config/i386/sse.md.jj 2017-03-02 10:19:07.000000000 +0100
> +++ gcc/config/i386/sse.md 2017-03-03 16:10:42.317111636 +0100
> @@ -549,8 +549,6 @@ (define_mode_iterator VI4F_128 [V4SI V4S
> (define_mode_iterator VI8F_128 [V2DI V2DF])
> (define_mode_iterator VI4F_256 [V8SI V8SF])
> (define_mode_iterator VI8F_256 [V4DI V4DF])
> -(define_mode_iterator VI8F_256_512
> - [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
> (define_mode_iterator VI48F_256_512
> [V8SI V8SF
> (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
> @@ -17306,43 +17304,43 @@ (define_insn "<avx512>_permvar<mode><mas
> (set_attr "prefix" "<mask_prefix2>")
> (set_attr "mode" "<sseinsnmode>")])
>
> -(define_expand "<avx2_avx512>_perm<mode>"
> - [(match_operand:VI8F_256_512 0 "register_operand")
> - (match_operand:VI8F_256_512 1 "nonimmediate_operand")
> +(define_expand "avx2_perm<mode>"
> + [(match_operand:VI8F_256 0 "register_operand")
> + (match_operand:VI8F_256 1 "nonimmediate_operand")
> (match_operand:SI 2 "const_0_to_255_operand")]
> "TARGET_AVX2"
> {
> int mask = INTVAL (operands[2]);
> - emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
> - GEN_INT ((mask >> 0) & 3),
> - GEN_INT ((mask >> 2) & 3),
> - GEN_INT ((mask >> 4) & 3),
> - GEN_INT ((mask >> 6) & 3)));
> + emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
> + GEN_INT ((mask >> 0) & 3),
> + GEN_INT ((mask >> 2) & 3),
> + GEN_INT ((mask >> 4) & 3),
> + GEN_INT ((mask >> 6) & 3)));
> DONE;
> })
>
> -(define_expand "<avx512>_perm<mode>_mask"
> - [(match_operand:VI8F_256_512 0 "register_operand")
> - (match_operand:VI8F_256_512 1 "nonimmediate_operand")
> +(define_expand "avx512vl_perm<mode>_mask"
> + [(match_operand:VI8F_256 0 "register_operand")
> + (match_operand:VI8F_256 1 "nonimmediate_operand")
> (match_operand:SI 2 "const_0_to_255_operand")
> - (match_operand:VI8F_256_512 3 "vector_move_operand")
> + (match_operand:VI8F_256 3 "vector_move_operand")
> (match_operand:<avx512fmaskmode> 4 "register_operand")]
> - "TARGET_AVX512F"
> + "TARGET_AVX512VL"
> {
> int mask = INTVAL (operands[2]);
> emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
> - GEN_INT ((mask >> 0) & 3),
> - GEN_INT ((mask >> 2) & 3),
> - GEN_INT ((mask >> 4) & 3),
> - GEN_INT ((mask >> 6) & 3),
> - operands[3], operands[4]));
> + GEN_INT ((mask >> 0) & 3),
> + GEN_INT ((mask >> 2) & 3),
> + GEN_INT ((mask >> 4) & 3),
> + GEN_INT ((mask >> 6) & 3),
> + operands[3], operands[4]));
> DONE;
> })
>
> -(define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
> - [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
> - (vec_select:VI8F_256_512
> - (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
> +(define_insn "avx2_perm<mode>_1<mask_name>"
> + [(set (match_operand:VI8F_256 0 "register_operand" "=v")
> + (vec_select:VI8F_256
> + (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
> (parallel [(match_operand 2 "const_0_to_3_operand")
> (match_operand 3 "const_0_to_3_operand")
> (match_operand 4 "const_0_to_3_operand")
> @@ -17361,6 +17359,77 @@ (define_insn "<avx2_avx512>_perm<mode>_1
> (set_attr "prefix" "<mask_prefix2>")
> (set_attr "mode" "<sseinsnmode>")])
>
> +(define_expand "avx512f_perm<mode>"
> + [(match_operand:V8FI 0 "register_operand")
> + (match_operand:V8FI 1 "nonimmediate_operand")
> + (match_operand:SI 2 "const_0_to_255_operand")]
> + "TARGET_AVX512F"
> +{
> + int mask = INTVAL (operands[2]);
> + emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
> + GEN_INT ((mask >> 0) & 3),
> + GEN_INT ((mask >> 2) & 3),
> + GEN_INT ((mask >> 4) & 3),
> + GEN_INT ((mask >> 6) & 3),
> + GEN_INT (((mask >> 0) & 3) + 4),
> + GEN_INT (((mask >> 2) & 3) + 4),
> + GEN_INT (((mask >> 4) & 3) + 4),
> + GEN_INT (((mask >> 6) & 3) + 4)));
> + DONE;
> +})
> +
> +(define_expand "avx512f_perm<mode>_mask"
> + [(match_operand:V8FI 0 "register_operand")
> + (match_operand:V8FI 1 "nonimmediate_operand")
> + (match_operand:SI 2 "const_0_to_255_operand")
> + (match_operand:V8FI 3 "vector_move_operand")
> + (match_operand:<avx512fmaskmode> 4 "register_operand")]
> + "TARGET_AVX512F"
> +{
> + int mask = INTVAL (operands[2]);
> + emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
> + GEN_INT ((mask >> 0) & 3),
> + GEN_INT ((mask >> 2) & 3),
> + GEN_INT ((mask >> 4) & 3),
> + GEN_INT ((mask >> 6) & 3),
> + GEN_INT (((mask >> 0) & 3) + 4),
> + GEN_INT (((mask >> 2) & 3) + 4),
> + GEN_INT (((mask >> 4) & 3) + 4),
> + GEN_INT (((mask >> 6) & 3) + 4),
> + operands[3], operands[4]));
> + DONE;
> +})
> +
> +(define_insn "avx512f_perm<mode>_1<mask_name>"
> + [(set (match_operand:V8FI 0 "register_operand" "=v")
> + (vec_select:V8FI
> + (match_operand:V8FI 1 "nonimmediate_operand" "vm")
> + (parallel [(match_operand 2 "const_0_to_3_operand")
> + (match_operand 3 "const_0_to_3_operand")
> + (match_operand 4 "const_0_to_3_operand")
> + (match_operand 5 "const_0_to_3_operand")
> + (match_operand 6 "const_4_to_7_operand")
> + (match_operand 7 "const_4_to_7_operand")
> + (match_operand 8 "const_4_to_7_operand")
> + (match_operand 9 "const_4_to_7_operand")])))]
> + "TARGET_AVX512F && <mask_mode512bit_condition>
> + && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
> + && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
> + && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
> + && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
> +{
> + int mask = 0;
> + mask |= INTVAL (operands[2]) << 0;
> + mask |= INTVAL (operands[3]) << 2;
> + mask |= INTVAL (operands[4]) << 4;
> + mask |= INTVAL (operands[5]) << 6;
> + operands[2] = GEN_INT (mask);
> + return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
> +}
> + [(set_attr "type" "sselog")
> + (set_attr "prefix" "<mask_prefix2>")
> + (set_attr "mode" "<sseinsnmode>")])
> +
> (define_insn "avx2_permv2ti"
> [(set (match_operand:V4DI 0 "register_operand" "=x")
> (unspec:V4DI
> @@ -17389,7 +17458,7 @@ (define_insn "avx2_vec_dupv4df"
> (define_insn "<avx512>_vec_dup<mode>_1"
> [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
> (vec_duplicate:VI_AVX512BW
> - (vec_select:VI_AVX512BW
> + (vec_select:<ssescalarmode>
> (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
> (parallel [(const_int 0)]))))]
> "TARGET_AVX512F"
> --- gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-3.c.jj 2017-03-03 16:13:19.852037848 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-3.c 2017-03-03 16:14:14.952312508 +0100
> @@ -0,0 +1,5 @@
> +/* PR target/79812 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512f" } */
> +
> +#include "avx512f-vpermq-imm-2.c"
>
> Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2017-03-06 10:03 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-03 19:42 [PATCH] Fix avx512 vpermq (PR target/79812) Jakub Jelinek
2017-03-06 10:03 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).