* [PATCH] Improve whole vector right shift
@ 2016-05-04 19:51 Jakub Jelinek
2016-05-06 11:50 ` Kirill Yukhin
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2016-05-04 19:51 UTC (permalink / raw)
To: Uros Bizjak, Kirill Yukhin; +Cc: gcc-patches
Hi!
In this case the situation is more complicated, because for
V*HI we need avx512bw and avx512vl, while for V*SI only avx512vl
is needed and both are in the same pattern. But we already have
a pattern that does the right thing right after the "ashr<mode>3"
- but as it is after it, the "ashr<mode>3" will win during recog
and will limit RA decisions.
The testcase shows that moving the pattern improves it.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2016-05-04 Jakub Jelinek <jakub@redhat.com>
* config/i386/sse.md (<mask_codefor>ashr<mode>3<mask_name>): Move
before the ashr<mode>3 pattern.
* gcc.target/i386/avx512bw-vpsraw-3.c: New test.
* gcc.target/i386/avx512vl-vpsrad-3.c: New test.
--- gcc/config/i386/sse.md.jj 2016-05-04 16:54:31.000000000 +0200
+++ gcc/config/i386/sse.md 2016-05-04 16:55:31.155848054 +0200
@@ -10088,6 +10088,20 @@ (define_expand "usadv32qi"
DONE;
})
+(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
+ [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
+ (ashiftrt:VI24_AVX512BW_1
+ (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
+ (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+ "TARGET_AVX512VL"
+ "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand")
+ (const_string "1")
+ (const_string "0")))
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "ashr<mode>3"
[(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
(ashiftrt:VI24_AVX2
@@ -10107,20 +10121,6 @@ (define_insn "ashr<mode>3"
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
- [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
- (ashiftrt:VI24_AVX512BW_1
- (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
- (match_operand:SI 2 "nonmemory_operand" "v,N")))]
- "TARGET_AVX512VL"
- "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "type" "sseishft")
- (set (attr "length_immediate")
- (if_then_else (match_operand 2 "const_int_operand")
- (const_string "1")
- (const_string "0")))
- (set_attr "mode" "<sseinsnmode>")])
-
(define_insn "<mask_codefor>ashrv2di3<mask_name>"
[(set (match_operand:V2DI 0 "register_operand" "=v,v")
(ashiftrt:V2DI
--- gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-3.c.jj 2016-05-04 17:01:52.332810541 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-3.c 2016-05-04 17:02:56.104966537 +0200
@@ -0,0 +1,44 @@
+/* { dg-do assemble { target { avx512bw && { avx512vl && { ! ia32 } } } } } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+
+#include <x86intrin.h>
+
+void
+f1 (__m128i x, int y)
+{
+ register __m128i a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = _mm_srai_epi16 (a, y);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f2 (__m128i x)
+{
+ register __m128i a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = _mm_srai_epi16 (a, 16);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f3 (__m256i x, int y)
+{
+ register __m256i a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = _mm256_srai_epi16 (a, y);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f4 (__m256i x)
+{
+ register __m256i a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = _mm256_srai_epi16 (a, 16);
+ asm volatile ("" : "+v" (a));
+}
--- gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-3.c.jj 2016-05-04 17:01:58.770725338 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-3.c 2016-05-04 17:00:16.000000000 +0200
@@ -0,0 +1,44 @@
+/* { dg-do assemble { target { avx512vl && { ! ia32 } } } } */
+/* { dg-options "-O2 -mavx512vl" } */
+
+#include <x86intrin.h>
+
+void
+f1 (__m128i x, int y)
+{
+ register __m128i a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = _mm_srai_epi32 (a, y);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f2 (__m128i x)
+{
+ register __m128i a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = _mm_srai_epi32 (a, 16);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f3 (__m256i x, int y)
+{
+ register __m256i a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = _mm256_srai_epi32 (a, y);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f4 (__m256i x)
+{
+ register __m256i a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = _mm256_srai_epi32 (a, 16);
+ asm volatile ("" : "+v" (a));
+}
Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] Improve whole vector right shift
2016-05-04 19:51 [PATCH] Improve whole vector right shift Jakub Jelinek
@ 2016-05-06 11:50 ` Kirill Yukhin
0 siblings, 0 replies; 2+ messages in thread
From: Kirill Yukhin @ 2016-05-06 11:50 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Uros Bizjak, gcc-patches
On 04 May 21:51, Jakub Jelinek wrote:
> Hi!
>
> In this case the situation is more complicated, because for
> V*HI we need avx512bw and avx512vl, while for V*SI only avx512vl
> is needed and both are in the same pattern. But we already have
> a pattern that does the right thing right after the "ashr<mode>3"
> - but as it is after it, the "ashr<mode>3" will win during recog
> and will limit RA decisions.
>
> The testcase shows that moving the pattern improves it.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK for trunk.
--
Thanks, K
>
> 2016-05-04 Jakub Jelinek <jakub@redhat.com>
>
> * config/i386/sse.md (<mask_codefor>ashr<mode>3<mask_name>): Move
> before the ashr<mode>3 pattern.
>
> * gcc.target/i386/avx512bw-vpsraw-3.c: New test.
> * gcc.target/i386/avx512vl-vpsrad-3.c: New test.
>
> --- gcc/config/i386/sse.md.jj 2016-05-04 16:54:31.000000000 +0200
> +++ gcc/config/i386/sse.md 2016-05-04 16:55:31.155848054 +0200
> @@ -10088,6 +10088,20 @@ (define_expand "usadv32qi"
> DONE;
> })
>
> +(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
> + [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
> + (ashiftrt:VI24_AVX512BW_1
> + (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
> + (match_operand:SI 2 "nonmemory_operand" "v,N")))]
> + "TARGET_AVX512VL"
> + "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
> + [(set_attr "type" "sseishft")
> + (set (attr "length_immediate")
> + (if_then_else (match_operand 2 "const_int_operand")
> + (const_string "1")
> + (const_string "0")))
> + (set_attr "mode" "<sseinsnmode>")])
> +
> (define_insn "ashr<mode>3"
> [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
> (ashiftrt:VI24_AVX2
> @@ -10107,20 +10121,6 @@ (define_insn "ashr<mode>3"
> (set_attr "prefix" "orig,vex")
> (set_attr "mode" "<sseinsnmode>")])
>
> -(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
> - [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
> - (ashiftrt:VI24_AVX512BW_1
> - (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
> - (match_operand:SI 2 "nonmemory_operand" "v,N")))]
> - "TARGET_AVX512VL"
> - "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
> - [(set_attr "type" "sseishft")
> - (set (attr "length_immediate")
> - (if_then_else (match_operand 2 "const_int_operand")
> - (const_string "1")
> - (const_string "0")))
> - (set_attr "mode" "<sseinsnmode>")])
> -
> (define_insn "<mask_codefor>ashrv2di3<mask_name>"
> [(set (match_operand:V2DI 0 "register_operand" "=v,v")
> (ashiftrt:V2DI
> --- gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-3.c.jj 2016-05-04 17:01:52.332810541 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-3.c 2016-05-04 17:02:56.104966537 +0200
> @@ -0,0 +1,44 @@
> +/* { dg-do assemble { target { avx512bw && { avx512vl && { ! ia32 } } } } } */
> +/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
> +
> +#include <x86intrin.h>
> +
> +void
> +f1 (__m128i x, int y)
> +{
> + register __m128i a __asm ("xmm16");
> + a = x;
> + asm volatile ("" : "+v" (a));
> + a = _mm_srai_epi16 (a, y);
> + asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f2 (__m128i x)
> +{
> + register __m128i a __asm ("xmm16");
> + a = x;
> + asm volatile ("" : "+v" (a));
> + a = _mm_srai_epi16 (a, 16);
> + asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f3 (__m256i x, int y)
> +{
> + register __m256i a __asm ("xmm16");
> + a = x;
> + asm volatile ("" : "+v" (a));
> + a = _mm256_srai_epi16 (a, y);
> + asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f4 (__m256i x)
> +{
> + register __m256i a __asm ("xmm16");
> + a = x;
> + asm volatile ("" : "+v" (a));
> + a = _mm256_srai_epi16 (a, 16);
> + asm volatile ("" : "+v" (a));
> +}
> --- gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-3.c.jj 2016-05-04 17:01:58.770725338 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-3.c 2016-05-04 17:00:16.000000000 +0200
> @@ -0,0 +1,44 @@
> +/* { dg-do assemble { target { avx512vl && { ! ia32 } } } } */
> +/* { dg-options "-O2 -mavx512vl" } */
> +
> +#include <x86intrin.h>
> +
> +void
> +f1 (__m128i x, int y)
> +{
> + register __m128i a __asm ("xmm16");
> + a = x;
> + asm volatile ("" : "+v" (a));
> + a = _mm_srai_epi32 (a, y);
> + asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f2 (__m128i x)
> +{
> + register __m128i a __asm ("xmm16");
> + a = x;
> + asm volatile ("" : "+v" (a));
> + a = _mm_srai_epi32 (a, 16);
> + asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f3 (__m256i x, int y)
> +{
> + register __m256i a __asm ("xmm16");
> + a = x;
> + asm volatile ("" : "+v" (a));
> + a = _mm256_srai_epi32 (a, y);
> + asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f4 (__m256i x)
> +{
> + register __m256i a __asm ("xmm16");
> + a = x;
> + asm volatile ("" : "+v" (a));
> + a = _mm256_srai_epi32 (a, 16);
> + asm volatile ("" : "+v" (a));
> +}
>
> Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2016-05-06 11:50 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-04 19:51 [PATCH] Improve whole vector right shift Jakub Jelinek
2016-05-06 11:50 ` Kirill Yukhin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).