* [PATCH] Add zero extending patterns to vptest{,n}m instructions (PR target/88461)
@ 2018-12-12 22:36 Jakub Jelinek
2018-12-13 7:54 ` Uros Bizjak
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2018-12-12 22:36 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
Hi!
The following patch uses a new mode iterator to avoid 2 almost same
vptestm and 2 almost same vptestnm patterns, and adds patterns that zero
extend those results to wider k registers, because that is what the
instructions actually do.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2018-12-12 Jakub Jelinek <jakub@redhat.com>
PR target/88461
* config/i386/sse.md (VI1248_AVX512VLBW, AVX512ZEXTMASK): New
mode iterators.
(<avx512>_testm<mode>3<mask_scalar_merge_name>,
<avx512>_testnm<mode>3<mask_scalar_merge_name>): Merge patterns
with VI12_AVX512VL and VI48_AVX512VL iterators into ones with
VI1248_AVX512VLBW iterator.
(*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext,
*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask,
*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext,
*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask): New
define_insns.
* gcc.target/i386/pr88461.c: New test.
--- gcc/config/i386/sse.md.jj 2018-12-03 21:56:35.252575482 +0100
+++ gcc/config/i386/sse.md 2018-12-12 16:47:00.370433319 +0100
@@ -12322,22 +12322,22 @@ (define_insn "*<code><mode>3_bcst"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
- [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
- (unspec:<avx512fmaskmode>
- [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
- UNSPEC_TESTM))]
- "TARGET_AVX512BW"
- "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
- [(set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+(define_mode_iterator VI1248_AVX512VLBW
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
+ V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+
+(define_mode_iterator AVX512ZEXTMASK
+ [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
- (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
+ [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
+ (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
UNSPEC_TESTM))]
"TARGET_AVX512F"
"vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
@@ -12347,24 +12347,73 @@ (define_insn "<avx512>_testm<mode>3<mask
(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
+ [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
+ (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
UNSPEC_TESTNM))]
- "TARGET_AVX512BW"
+ "TARGET_AVX512F"
"vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
- [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
- (unspec:<avx512fmaskmode>
- [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
- (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
- UNSPEC_TESTNM))]
- "TARGET_AVX512F"
- "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+(define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
+ [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
+ (zero_extend:AVX512ZEXTMASK
+ (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
+ [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
+ (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
+ UNSPEC_TESTM)))]
+ "TARGET_AVX512BW
+ && (<AVX512ZEXTMASK:MODE_SIZE>
+ > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
+ "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+ (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
+
+(define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
+ [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
+ (zero_extend:AVX512ZEXTMASK
+ (and:<VI1248_AVX512VLBW:avx512fmaskmode>
+ (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
+ [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
+ (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
+ UNSPEC_TESTM)
+ (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
+ "TARGET_AVX512BW
+ && (<AVX512ZEXTMASK:MODE_SIZE>
+ > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
+ "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
+
+(define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
+ [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
+ (zero_extend:AVX512ZEXTMASK
+ (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
+ [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
+ (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
+ UNSPEC_TESTNM)))]
+ "TARGET_AVX512BW
+ && (<AVX512ZEXTMASK:MODE_SIZE>
+ > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
+ "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
+
+(define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
+ [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
+ (zero_extend:AVX512ZEXTMASK
+ (and:<VI1248_AVX512VLBW:avx512fmaskmode>
+ (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
+ [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
+ (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
+ UNSPEC_TESTNM)
+ (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
+ "TARGET_AVX512BW
+ && (<AVX512ZEXTMASK:MODE_SIZE>
+ > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
+ "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
--- gcc/testsuite/gcc.target/i386/pr88461.c.jj 2018-12-12 16:54:03.779529123 +0100
+++ gcc/testsuite/gcc.target/i386/pr88461.c 2018-12-12 16:53:42.308879227 +0100
@@ -0,0 +1,16 @@
+/* PR target/88461 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw" } */
+/* { dg-final { scan-assembler-times "kmovw\[ \t]" 2 } } */
+
+#include <x86intrin.h>
+
+int
+foo (const __m128i *data, int a)
+{
+ __m128i v = _mm_load_si128 (data);
+ __mmask16 m = _mm_testn_epi16_mask (v, v);
+ m = _kshiftli_mask16 (m, 1);
+ m = _kandn_mask16 (m, a);
+ return m;
+}
Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] Add zero extending patterns to vptest{,n}m instructions (PR target/88461)
2018-12-12 22:36 [PATCH] Add zero extending patterns to vptest{,n}m instructions (PR target/88461) Jakub Jelinek
@ 2018-12-13 7:54 ` Uros Bizjak
0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2018-12-13 7:54 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On Wed, Dec 12, 2018 at 11:36 PM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> The following patch uses a new mode iterator to avoid 2 almost same
> vptestm and 2 almost same vptestnm patterns, and adds patterns that zero
> extend those results to wider k registers, because that is what the
> instructions actually do.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2018-12-12 Jakub Jelinek <jakub@redhat.com>
>
> PR target/88461
> * config/i386/sse.md (VI1248_AVX512VLBW, AVX512ZEXTMASK): New
> mode iterators.
> (<avx512>_testm<mode>3<mask_scalar_merge_name>,
> <avx512>_testnm<mode>3<mask_scalar_merge_name>): Merge patterns
> with VI12_AVX512VL and VI48_AVX512VL iterators into ones with
> VI1248_AVX512VLBW iterator.
> (*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext,
> *<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask,
> *<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext,
> *<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask): New
> define_insns.
>
> * gcc.target/i386/pr88461.c: New test.
LGTM.
Thanks,
Uros.
> --- gcc/config/i386/sse.md.jj 2018-12-03 21:56:35.252575482 +0100
> +++ gcc/config/i386/sse.md 2018-12-12 16:47:00.370433319 +0100
> @@ -12322,22 +12322,22 @@ (define_insn "*<code><mode>3_bcst"
> (set_attr "prefix" "evex")
> (set_attr "mode" "<sseinsnmode>")])
>
> -(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
> - [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
> - (unspec:<avx512fmaskmode>
> - [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
> - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
> - UNSPEC_TESTM))]
> - "TARGET_AVX512BW"
> - "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
> - [(set_attr "prefix" "evex")
> - (set_attr "mode" "<sseinsnmode>")])
> +(define_mode_iterator VI1248_AVX512VLBW
> + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
> + (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
> + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
> + (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
> + V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
> + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
> +
> +(define_mode_iterator AVX512ZEXTMASK
> + [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
>
> (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
> [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
> (unspec:<avx512fmaskmode>
> - [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
> - (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
> + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
> + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
> UNSPEC_TESTM))]
> "TARGET_AVX512F"
> "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
> @@ -12347,24 +12347,73 @@ (define_insn "<avx512>_testm<mode>3<mask
> (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
> [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
> (unspec:<avx512fmaskmode>
> - [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
> - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
> + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
> + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
> UNSPEC_TESTNM))]
> - "TARGET_AVX512BW"
> + "TARGET_AVX512F"
> "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
> [(set_attr "prefix" "evex")
> (set_attr "mode" "<sseinsnmode>")])
>
> -(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
> - [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
> - (unspec:<avx512fmaskmode>
> - [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
> - (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
> - UNSPEC_TESTNM))]
> - "TARGET_AVX512F"
> - "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
> +(define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
> + [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
> + (zero_extend:AVX512ZEXTMASK
> + (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
> + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
> + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
> + UNSPEC_TESTM)))]
> + "TARGET_AVX512BW
> + && (<AVX512ZEXTMASK:MODE_SIZE>
> + > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
> + "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
> [(set_attr "prefix" "evex")
> - (set_attr "mode" "<sseinsnmode>")])
> + (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
> +
> +(define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
> + [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
> + (zero_extend:AVX512ZEXTMASK
> + (and:<VI1248_AVX512VLBW:avx512fmaskmode>
> + (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
> + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
> + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
> + UNSPEC_TESTM)
> + (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
> + "TARGET_AVX512BW
> + && (<AVX512ZEXTMASK:MODE_SIZE>
> + > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
> + "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
> + [(set_attr "prefix" "evex")
> + (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
> +
> +(define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
> + [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
> + (zero_extend:AVX512ZEXTMASK
> + (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
> + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
> + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
> + UNSPEC_TESTNM)))]
> + "TARGET_AVX512BW
> + && (<AVX512ZEXTMASK:MODE_SIZE>
> + > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
> + "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
> + [(set_attr "prefix" "evex")
> + (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
> +
> +(define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
> + [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
> + (zero_extend:AVX512ZEXTMASK
> + (and:<VI1248_AVX512VLBW:avx512fmaskmode>
> + (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
> + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
> + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
> + UNSPEC_TESTNM)
> + (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
> + "TARGET_AVX512BW
> + && (<AVX512ZEXTMASK:MODE_SIZE>
> + > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
> + "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
> + [(set_attr "prefix" "evex")
> + (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
>
> ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> ;;
> --- gcc/testsuite/gcc.target/i386/pr88461.c.jj 2018-12-12 16:54:03.779529123 +0100
> +++ gcc/testsuite/gcc.target/i386/pr88461.c 2018-12-12 16:53:42.308879227 +0100
> @@ -0,0 +1,16 @@
> +/* PR target/88461 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512vl -mavx512bw" } */
> +/* { dg-final { scan-assembler-times "kmovw\[ \t]" 2 } } */
> +
> +#include <x86intrin.h>
> +
> +int
> +foo (const __m128i *data, int a)
> +{
> + __m128i v = _mm_load_si128 (data);
> + __mmask16 m = _mm_testn_epi16_mask (v, v);
> + m = _kshiftli_mask16 (m, 1);
> + m = _kandn_mask16 (m, a);
> + return m;
> +}
>
> Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2018-12-13 7:54 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-12 22:36 [PATCH] Add zero extending patterns to vptest{,n}m instructions (PR target/88461) Jakub Jelinek
2018-12-13 7:54 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).