* [PATCH] Improve 128-bit to 256-bit broadcasts
@ 2016-05-18 21:00 Jakub Jelinek
2016-05-22 7:41 ` Kirill Yukhin
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2016-05-18 21:00 UTC (permalink / raw)
To: Uros Bizjak, Kirill Yukhin; +Cc: gcc-patches
Hi!
vbroadcast[fi]32x4 and vinsert[fi]32x4 are in AVX512VL,
vbroadcast[fi]64x2 and vinsert[fi]64x2 are in AVX512VL & AVX512DQ.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2016-05-18 Jakub Jelinek <jakub@redhat.com>
* config/i386/sse.md (i128vldq): New mode iterator.
(avx2_vbroadcasti128_<mode>, avx_vbroadcastf128_<mode>): Add
avx512dq and avx512vl altenratives.
* gcc.target/i386/avx512dq-vbroadcast-2.c: New test.
* gcc.target/i386/avx512vl-vbroadcast-2.c: New test.
--- gcc/config/i386/sse.md.jj 2016-05-18 12:30:50.000000000 +0200
+++ gcc/config/i386/sse.md 2016-05-18 13:21:35.339616623 +0200
@@ -778,6 +778,12 @@ (define_mode_attr i128
(V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
(V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
+;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
+;; i32x4, f32x4, i64x2 or f64x2 suffixes.
+(define_mode_attr i128vldq
+ [(V8SF "f32x4") (V4DF "f64x2")
+ (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
+
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
(define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
@@ -17038,15 +17044,19 @@ (define_insn "*vec_dupv2di"
(set_attr "mode" "TI,TI,DF,V4SF")])
(define_insn "avx2_vbroadcasti128_<mode>"
- [(set (match_operand:VI_256 0 "register_operand" "=x")
+ [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
(vec_concat:VI_256
- (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
+ (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
(match_dup 1)))]
"TARGET_AVX2"
- "vbroadcasti128\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
+ "@
+ vbroadcasti128\t{%1, %0|%0, %1}
+ vbroadcast<i128vldq>\t{%1, %0|%0, %1}
+ vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "*,avx512dq,avx512vl")
+ (set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "vex,evex,evex")
(set_attr "mode" "OI")])
;; Modes handled by AVX vec_dup patterns.
@@ -17123,19 +17133,24 @@ (define_split
"operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
(define_insn "avx_vbroadcastf128_<mode>"
- [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
+ [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
(vec_concat:V_256
- (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
+ (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
(match_dup 1)))]
"TARGET_AVX"
"@
vbroadcast<i128>\t{%1, %0|%0, %1}
vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
- vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
- [(set_attr "type" "ssemov,sselog1,sselog1")
+ vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
+ vbroadcast<i128vldq>\t{%1, %0|%0, %1}
+ vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
+ vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
+ vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
+ [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
+ (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
(set_attr "prefix_extra" "1")
- (set_attr "length_immediate" "0,1,1")
- (set_attr "prefix" "vex")
+ (set_attr "length_immediate" "0,1,1,0,1,0,1")
+ (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
(set_attr "mode" "<sseinsnmode>")])
;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
--- gcc/testsuite/gcc.target/i386/avx512dq-vbroadcast-2.c.jj 2016-05-18 13:46:05.757523635 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-vbroadcast-2.c 2016-05-18 13:50:31.330891648 +0200
@@ -0,0 +1,49 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512dq" } */
+
+#include <x86intrin.h>
+
+void
+f1 (__m128i x)
+{
+ register __m128i a __asm ("xmm16");
+ register __m256i c;
+ a = x;
+ asm volatile ("" : "+v" (a));
+ c = _mm256_broadcastsi128_si256 (a);
+ register __m256i b __asm ("xmm16");
+ b = c;
+ asm volatile ("" : "+v" (b));
+}
+
+/* { dg-final { scan-assembler "vinserti64x2\[^\n\r]*(xmm16\[^\n\r]*ymm16\[^\n\r]*ymm16|ymm16\[^\n\r]*ymm16\[^\n\r]*xmm16)" } } */
+
+void
+f2 (__m128i *x)
+{
+ register __m256i a __asm ("xmm16");
+ a = _mm256_broadcastsi128_si256 (*x);
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vbroadcasti64x2\[^\n\r]*ymm16" } } */
+
+void
+f3 (__m128 *x)
+{
+ register __m256 a __asm ("xmm16");
+ a = _mm256_broadcast_ps (x);
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vbroadcastf32x4\[^\n\r]*ymm16" } } */
+
+void
+f4 (__m128d *x)
+{
+ register __m256d a __asm ("xmm16");
+ a = _mm256_broadcast_pd (x);
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vbroadcastf64x2\[^\n\r]*ymm16" } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-vbroadcast-2.c.jj 2016-05-18 13:45:40.449869743 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vbroadcast-2.c 2016-05-18 13:50:46.922678414 +0200
@@ -0,0 +1,47 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512dq" } */
+
+#include <x86intrin.h>
+
+void
+f1 (__m128i x)
+{
+ register __m128i a __asm ("xmm16");
+ register __m256i c;
+ a = x;
+ asm volatile ("" : "+v" (a));
+ c = _mm256_broadcastsi128_si256 (a);
+ register __m256i b __asm ("xmm16");
+ b = c;
+ asm volatile ("" : "+v" (b));
+}
+
+/* { dg-final { scan-assembler "vinserti32x4\[^\n\r]*(xmm16\[^\n\r]*ymm16\[^\n\r]*ymm16|ymm16\[^\n\r]*ymm16\[^\n\r]*xmm16)" } } */
+
+void
+f2 (__m128i *x)
+{
+ register __m256i a __asm ("xmm16");
+ a = _mm256_broadcastsi128_si256 (*x);
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vbroadcasti32x4\[^\n\r]*ymm16" } } */
+
+void
+f3 (__m128 *x)
+{
+ register __m256 a __asm ("xmm16");
+ a = _mm256_broadcast_ps (x);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f4 (__m128d *x)
+{
+ register __m256d a __asm ("xmm16");
+ a = _mm256_broadcast_pd (x);
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-times "vbroadcastf32x4\[^\n\r]*ymm16" 2 } } */
Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] Improve 128-bit to 256-bit broadcasts
2016-05-18 21:00 [PATCH] Improve 128-bit to 256-bit broadcasts Jakub Jelinek
@ 2016-05-22 7:41 ` Kirill Yukhin
0 siblings, 0 replies; 2+ messages in thread
From: Kirill Yukhin @ 2016-05-22 7:41 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Uros Bizjak, gcc-patches
Hi Jakub,
On 18 May 23:00, Jakub Jelinek wrote:
> Hi!
>
> vbroadcast[fi]32x4 and vinsert[fi]32x4 are in AVX512VL,
> vbroadcast[fi]64x2 and vinsert[fi]64x2 are in AVX512VL & AVX512DQ.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.
>
> 2016-05-18 Jakub Jelinek <jakub@redhat.com>
>
> * config/i386/sse.md (i128vldq): New mode iterator.
> (avx2_vbroadcasti128_<mode>, avx_vbroadcastf128_<mode>): Add
> avx512dq and avx512vl altenratives.
>
> * gcc.target/i386/avx512dq-vbroadcast-2.c: New test.
> * gcc.target/i386/avx512vl-vbroadcast-2.c: New test.
--
Thanks, K
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2016-05-22 7:41 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-18 21:00 [PATCH] Improve 128-bit to 256-bit broadcasts Jakub Jelinek
2016-05-22 7:41 ` Kirill Yukhin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).