* [PATCH] Improve XMM16+ handling in *vec_concatv2di
@ 2016-06-01 20:18 Jakub Jelinek
2016-06-02 8:24 ` Kirill Yukhin
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2016-06-01 20:18 UTC (permalink / raw)
To: Uros Bizjak, Kirill Yukhin; +Cc: gcc-patches
Hi!
This is the last pattern I'm aware of that didn't have any v/Yv constraints
that ought to be changed (there perhaps are others which have v/Yv in some
of the alternatives, but not in all the ones that could use it).
The testcases show what are the changes useful for.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2016-06-01 Jakub Jelinek <jakub@redhat.com>
* config/i386/sse.md (*vec_concatv2di): Add x86_avx512dq v=Yv,rm
alternative. Change x=xm,C alternative to v=vm,C, x=x,x alternative
to v=Yv,Yv and x=x,m to v=v,m. Use maybe_evex prefix attribute
instead of vex for the last two above mentioned alternatives.
* gcc.target/i386/avx512dq-concatv2di-1.c: New test.
* gcc.target/i386/avx512vl-concatv2di-1.c: New test.
* gcc.target/i386/sse2-init-v2di-2.c: Adjust expected vec_concatv2di
alternative number.
--- gcc/config/i386/sse.md.jj 2016-06-01 14:17:18.000000000 +0200
+++ gcc/config/i386/sse.md 2016-06-01 18:11:35.058942131 +0200
@@ -13567,17 +13567,18 @@ (define_insn "*vec_concatv4si"
;; movd instead of movq is required to handle broken assemblers.
(define_insn "vec_concatv2di"
[(set (match_operand:V2DI 0 "register_operand"
- "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
+ "=Yr,*x,x ,v ,Yi,v ,!x,x,v ,x,x,v")
(vec_concat:V2DI
(match_operand:DI 1 "nonimmediate_operand"
- " 0, 0,x ,r ,xm,*y,0,x,0,0,x")
+ " 0, 0,x ,Yv,r ,vm,*y,0,Yv,0,0,v")
(match_operand:DI 2 "vector_move_operand"
- "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
+ "*rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))]
"TARGET_SSE"
"@
pinsrq\t{$1, %2, %0|%0, %2, 1}
pinsrq\t{$1, %2, %0|%0, %2, 1}
vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
+ vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
* return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
%vmovq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}
@@ -13592,40 +13593,46 @@ (define_insn "vec_concatv2di"
(eq_attr "alternative" "2")
(const_string "x64_avx")
(eq_attr "alternative" "3")
+ (const_string "x64_avx512dq")
+ (eq_attr "alternative" "4")
(const_string "x64")
- (eq_attr "alternative" "4,5")
+ (eq_attr "alternative" "5,6")
(const_string "sse2")
- (eq_attr "alternative" "6")
+ (eq_attr "alternative" "7")
(const_string "sse2_noavx")
- (eq_attr "alternative" "7,10")
+ (eq_attr "alternative" "8,11")
(const_string "avx")
]
(const_string "noavx")))
(set (attr "type")
(if_then_else
- (eq_attr "alternative" "0,1,2,6,7")
+ (eq_attr "alternative" "0,1,2,3,7,8")
(const_string "sselog")
(const_string "ssemov")))
(set (attr "prefix_rex")
- (if_then_else (eq_attr "alternative" "0,1,2,3")
+ (if_then_else (eq_attr "alternative" "0,1,2,3,4")
(const_string "1")
(const_string "*")))
(set (attr "prefix_extra")
- (if_then_else (eq_attr "alternative" "0,1,2")
+ (if_then_else (eq_attr "alternative" "0,1,2,3")
(const_string "1")
(const_string "*")))
(set (attr "length_immediate")
- (if_then_else (eq_attr "alternative" "0,1,2")
+ (if_then_else (eq_attr "alternative" "0,1,2,3")
(const_string "1")
(const_string "*")))
(set (attr "prefix")
- (cond [(eq_attr "alternative" "2,7,10")
+ (cond [(eq_attr "alternative" "2")
(const_string "vex")
- (eq_attr "alternative" "3,4")
+ (eq_attr "alternative" "3")
+ (const_string "evex")
+ (eq_attr "alternative" "4,5")
(const_string "maybe_vex")
+ (eq_attr "alternative" "8,11")
+ (const_string "maybe_evex")
]
(const_string "orig")))
- (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
+ (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
(define_expand "vec_unpacks_lo_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
--- gcc/testsuite/gcc.target/i386/avx512dq-concatv2di-1.c.jj 2016-06-01 18:58:33.037699493 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-concatv2di-1.c 2016-06-01 18:53:29.000000000 +0200
@@ -0,0 +1,78 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att -mtune=haswell" } */
+
+typedef long long V __attribute__((vector_size (16)));
+
+void
+f1 (long long x, long long y)
+{
+ register long long a __asm ("xmm16");
+ register V c __asm ("xmm17");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ c = (V) { a, y };
+ asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vpinsrq\[^\n\r]*\\\$1\[^\n\r]*%rsi\[^\n\r]*%xmm16\[^\n\r]*%xmm17" } } */
+
+void
+f2 (long long x, long long *y)
+{
+ register long long a __asm ("xmm18");
+ register V c __asm ("xmm19");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ c = (V) { a, *y };
+ asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vpinsrq\[^\n\r]*\\\$1\[^\n\r]*%\[re]si\[^\n\r]*%xmm18\[^\n\r]*%xmm19" } } */
+
+void
+f3 (long long x)
+{
+ register V a __asm ("xmm20");
+ a = (V) { x, 0 };
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vmov\[dq]\[^\n\r]*%rdi\[^\n\r]*%xmm20" } } */
+
+void
+f4 (long long *x)
+{
+ register V a __asm ("xmm21");
+ a = (V) { *x, 0 };
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vmovq\[^\n\r]*%\[re]di\[^\n\r]*%xmm21" } } */
+
+void
+f5 (long long x)
+{
+ register long long a __asm ("xmm22");
+ register V c __asm ("xmm23");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ c = (V) { a, 0 };
+ asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vmovq\[^\n\r]*%xmm22\[^\n\r]*%xmm23" } } */
+
+void
+f6 (long long x, long long y)
+{
+ register long long a __asm ("xmm24");
+ register long long b __asm ("xmm25");
+ register V c __asm ("xmm26");
+ a = x;
+ b = y;
+ asm volatile ("" : "+v" (a), "+v" (b));
+ c = (V) { a, b };
+ asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vpunpcklqdq\[^\n\r]*%xmm25\[^\n\r]*%xmm24\[^\n\r]*%xmm26" } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-concatv2di-1.c.jj 2016-06-01 18:58:45.761535711 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-concatv2di-1.c 2016-06-01 19:04:11.367344524 +0200
@@ -0,0 +1,79 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att -mtune=haswell" } */
+
+typedef long long V __attribute__((vector_size (16)));
+
+void
+f1 (long long x, long long y)
+{
+ register long long a __asm ("xmm16");
+ register V c __asm ("xmm17");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ c = (V) { a, y };
+ asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler-not "vpinsrq\[^\n\r]*\[^\n\r]*%xmm1\[6-9]" } } */
+/* { dg-final { scan-assembler-not "vpinsrq\[^\n\r]*\[^\n\r]*%xmm\[23]\[0-9]" } } */
+
+void
+f2 (long long x, long long *y)
+{
+ register long long a __asm ("xmm18");
+ register V c __asm ("xmm19");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ c = (V) { a, *y };
+ asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vmovhps\[^\n\r]*%\[re]si\[^\n\r]*%xmm18\[^\n\r]*%xmm19" } } */
+
+void
+f3 (long long x)
+{
+ register V a __asm ("xmm20");
+ a = (V) { x, 0 };
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vmov\[dq]\[^\n\r]*%rdi\[^\n\r]*%xmm20" } } */
+
+void
+f4 (long long *x)
+{
+ register V a __asm ("xmm21");
+ a = (V) { *x, 0 };
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vmovq\[^\n\r]*%\[re]di\[^\n\r]*%xmm21" } } */
+
+void
+f5 (long long x)
+{
+ register long long a __asm ("xmm22");
+ register V c __asm ("xmm23");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ c = (V) { a, 0 };
+ asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vmovq\[^\n\r]*%xmm22\[^\n\r]*%xmm23" } } */
+
+void
+f6 (long long x, long long y)
+{
+ register long long a __asm ("xmm24");
+ register long long b __asm ("xmm25");
+ register V c __asm ("xmm26");
+ a = x;
+ b = y;
+ asm volatile ("" : "+v" (a), "+v" (b));
+ c = (V) { a, b };
+ asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vpunpcklqdq\[^\n\r]*%xmm25\[^\n\r]*%xmm24\[^\n\r]*%xmm26" } } */
--- gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c.jj 2015-12-31 01:11:11.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c 2016-06-01 21:23:02.455281080 +0200
@@ -10,4 +10,4 @@ test (long long b)
return _mm_cvtsi64_si128 (b);
}
-/* { dg-final { scan-assembler-times "vec_concatv2di/4" 1 } } */
+/* { dg-final { scan-assembler-times "vec_concatv2di/5" 1 } } */
Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] Improve XMM16+ handling in *vec_concatv2di
2016-06-01 20:18 [PATCH] Improve XMM16+ handling in *vec_concatv2di Jakub Jelinek
@ 2016-06-02 8:24 ` Kirill Yukhin
0 siblings, 0 replies; 2+ messages in thread
From: Kirill Yukhin @ 2016-06-02 8:24 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Uros Bizjak, gcc-patches
Hello Jakub,
On 01 Jun 22:17, Jakub Jelinek wrote:
> Hi!
>
> This is the last pattern I'm aware of that didn't have any v/Yv constraints
> that ought to be changed (there perhaps are others which have v/Yv in some
> of the alternatives, but not in all the ones that could use it).
>
> The testcases show what are the changes useful for.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.
--
Thanks, K
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2016-06-02 8:24 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-01 20:18 [PATCH] Improve XMM16+ handling in *vec_concatv2di Jakub Jelinek
2016-06-02 8:24 ` Kirill Yukhin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).