[PATCH] Improve *vec_concatv2si_sse4

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] Improve *vec_concatv2si_sse4_1
@ 2016-05-26 18:00 Jakub Jelinek
  2016-05-27  2:14 ` Uros Bizjak
  0 siblings, 1 reply; 4+ messages in thread
From: Jakub Jelinek @ 2016-05-26 18:00 UTC (permalink / raw)
  To: Uros Bizjak, Kirill Yukhin; +Cc: gcc-patches

Hi!

This patch adds an avx512dq alternative (EVEX vpinsrd requires that) and
enables EVEX vmovd and vpunpckldq.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-05-26  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm
	alternative.  Change x=x,x alternative to v=Yv,Yv and x=rm,C
	alternative to v=rm,C.

	* gcc.target/i386/avx512dq-concatv2si-1.c: New test.
	* gcc.target/i386/avx512vl-concatv2si-1.c: New test.

--- gcc/config/i386/sse.md.jj	2016-05-26 10:44:25.000000000 +0200
+++ gcc/config/i386/sse.md	2016-05-26 14:22:26.819313220 +0200
@@ -13339,29 +13339,30 @@ (define_split
 
 (define_insn "*vec_concatv2si_sse4_1"
   [(set (match_operand:V2SI 0 "register_operand"
-	  "=Yr,*x,x, Yr,*x,x, x, *y,*y")
+	  "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
 	(vec_concat:V2SI
 	  (match_operand:SI 1 "nonimmediate_operand"
-	  "  0, 0,x,  0,0, x,rm,  0,rm")
+	  "  0, 0, x,Yv, 0, 0,Yv,rm,  0,rm")
 	  (match_operand:SI 2 "vector_move_operand"
-	  " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
+	  " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
   "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    pinsrd\t{$1, %2, %0|%0, %2, 1}
    pinsrd\t{$1, %2, %0|%0, %2, 1}
    vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
+   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
    punpckldq\t{%2, %0|%0, %2}
    punpckldq\t{%2, %0|%0, %2}
    vpunpckldq\t{%2, %1, %0|%0, %1, %2}
    %vmovd\t{%1, %0|%0, %1}
    punpckldq\t{%2, %0|%0, %2}
    movd\t{%1, %0|%0, %1}"
-  [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
-   (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
-   (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
-   (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
-   (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
-   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
+  [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
+   (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "prefix_extra" "1,1,1,1,*,*,*,*,*,*")
+   (set_attr "length_immediate" "1,1,1,1,*,*,*,*,*,*")
+   (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
+   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
 
 ;; ??? In theory we can match memory for the MMX alternative, but allowing
 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
--- gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c.jj	2016-05-26 15:14:55.853786550 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c	2016-05-26 15:13:57.000000000 +0200
@@ -0,0 +1,43 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att" } */
+
+typedef int V __attribute__((vector_size (8)));
+
+void
+f1 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register int b __asm ("xmm17");
+  register V c __asm ("xmm3");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  c = (V) { a, b };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
+
+void
+f2 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, y };
+  asm volatile ("" : "+v" (c));
+}
+
+void
+f3 (int x, int *y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, *y };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler-times "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" 2 } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c.jj	2016-05-26 15:15:11.921574803 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c	2016-05-26 15:16:24.936612585 +0200
@@ -0,0 +1,43 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att" } */
+
+typedef int V __attribute__((vector_size (8)));
+
+void
+f1 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register int b __asm ("xmm17");
+  register V c __asm ("xmm3");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  c = (V) { a, b };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
+
+void
+f2 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, y };
+  asm volatile ("" : "+v" (c));
+}
+
+void
+f3 (int x, int *y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, *y };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler-not "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */

	Jakub

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Improve *vec_concatv2si_sse4_1
  2016-05-26 18:00 [PATCH] Improve *vec_concatv2si_sse4_1 Jakub Jelinek
@ 2016-05-27  2:14 ` Uros Bizjak
  2016-05-27  8:23   ` Jakub Jelinek
  0 siblings, 1 reply; 4+ messages in thread
From: Uros Bizjak @ 2016-05-27  2:14 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Kirill Yukhin, gcc-patches

On Thu, May 26, 2016 at 7:05 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> This patch adds an avx512dq alternative (EVEX vpinsrd requires that) and
> enables EVEX vmovd and vpunpckldq.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2016-05-26  Jakub Jelinek  <jakub@redhat.com>
>
>         * config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm
>         alternative.  Change x=x,x alternative to v=Yv,Yv and x=rm,C
>         alternative to v=rm,C.
>
>         * gcc.target/i386/avx512dq-concatv2si-1.c: New test.
>         * gcc.target/i386/avx512vl-concatv2si-1.c: New test.

Ouch, I have just changed these mega strings in attribute definitions
to something more readable. Can you please redo the attribute part? It
should be much more pleasant experience than counting all the
commas...).

Uros.

> --- gcc/config/i386/sse.md.jj   2016-05-26 10:44:25.000000000 +0200
> +++ gcc/config/i386/sse.md      2016-05-26 14:22:26.819313220 +0200
> @@ -13339,29 +13339,30 @@ (define_split
>
>  (define_insn "*vec_concatv2si_sse4_1"
>    [(set (match_operand:V2SI 0 "register_operand"
> -         "=Yr,*x,x, Yr,*x,x, x, *y,*y")
> +         "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
>         (vec_concat:V2SI
>           (match_operand:SI 1 "nonimmediate_operand"
> -         "  0, 0,x,  0,0, x,rm,  0,rm")
> +         "  0, 0, x,Yv, 0, 0,Yv,rm,  0,rm")
>           (match_operand:SI 2 "vector_move_operand"
> -         " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
> +         " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
>    "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>    "@
>     pinsrd\t{$1, %2, %0|%0, %2, 1}
>     pinsrd\t{$1, %2, %0|%0, %2, 1}
>     vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
> +   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
>     punpckldq\t{%2, %0|%0, %2}
>     punpckldq\t{%2, %0|%0, %2}
>     vpunpckldq\t{%2, %1, %0|%0, %1, %2}
>     %vmovd\t{%1, %0|%0, %1}
>     punpckldq\t{%2, %0|%0, %2}
>     movd\t{%1, %0|%0, %1}"
> -  [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
> -   (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
> -   (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
> -   (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
> -   (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
> -   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
> +  [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
> +   (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
> +   (set_attr "prefix_extra" "1,1,1,1,*,*,*,*,*,*")
> +   (set_attr "length_immediate" "1,1,1,1,*,*,*,*,*,*")
> +   (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
> +   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
>
>  ;; ??? In theory we can match memory for the MMX alternative, but allowing
>  ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
> --- gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c.jj    2016-05-26 15:14:55.853786550 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c       2016-05-26 15:13:57.000000000 +0200
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att" } */
> +
> +typedef int V __attribute__((vector_size (8)));
> +
> +void
> +f1 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register int b __asm ("xmm17");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  b = y;
> +  asm volatile ("" : "+v" (a), "+v" (b));
> +  c = (V) { a, b };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
> +
> +void
> +f2 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +void
> +f3 (int x, int *y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, *y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler-times "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" 2 } } */
> --- gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c.jj    2016-05-26 15:15:11.921574803 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c       2016-05-26 15:16:24.936612585 +0200
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att" } */
> +
> +typedef int V __attribute__((vector_size (8)));
> +
> +void
> +f1 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register int b __asm ("xmm17");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  b = y;
> +  asm volatile ("" : "+v" (a), "+v" (b));
> +  c = (V) { a, b };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
> +
> +void
> +f2 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +void
> +f3 (int x, int *y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, *y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler-not "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
>
>         Jakub

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Improve *vec_concatv2si_sse4_1
  2016-05-27  2:14 ` Uros Bizjak
@ 2016-05-27  8:23   ` Jakub Jelinek
  2016-06-02 10:49     ` Uros Bizjak
  0 siblings, 1 reply; 4+ messages in thread
From: Jakub Jelinek @ 2016-05-27  8:23 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Kirill Yukhin, gcc-patches

On Thu, May 26, 2016 at 07:39:01PM +0200, Uros Bizjak wrote:
> On Thu, May 26, 2016 at 7:05 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> > Hi!
> >
> > This patch adds an avx512dq alternative (EVEX vpinsrd requires that) and
> > enables EVEX vmovd and vpunpckldq.
> >
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> >
> > 2016-05-26  Jakub Jelinek  <jakub@redhat.com>
> >
> >         * config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm
> >         alternative.  Change x=x,x alternative to v=Yv,Yv and x=rm,C
> >         alternative to v=rm,C.
> >
> >         * gcc.target/i386/avx512dq-concatv2si-1.c: New test.
> >         * gcc.target/i386/avx512vl-concatv2si-1.c: New test.
> 
> Ouch, I have just changed these mega strings in attribute definitions
> to something more readable. Can you please redo the attribute part? It
> should be much more pleasant experience than counting all the
> commas...).

Here is updated version of this patch (the other two pending sse.md patches
from me still apply cleanly):

2016-05-26  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm
	alternative.  Change x=x,x alternative to v=Yv,Yv and x=rm,C
	alternative to v=rm,C.

	* gcc.target/i386/avx512dq-concatv2si-1.c: New test.
	* gcc.target/i386/avx512vl-concatv2si-1.c: New test.

--- gcc/config/i386/sse.md.jj	2016-05-26 10:44:25.000000000 +0200
+++ gcc/config/i386/sse.md	2016-05-26 14:22:26.819313220 +0200
@@ -13488,43 +13488,44 @@
 
 (define_insn "*vec_concatv2si_sse4_1"
   [(set (match_operand:V2SI 0 "register_operand"
-	  "=Yr,*x,x, Yr,*x,x, x, *y,*y")
+	  "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
 	(vec_concat:V2SI
 	  (match_operand:SI 1 "nonimmediate_operand"
-	  "  0, 0,x,  0,0, x,rm,  0,rm")
+	  "  0, 0, x,Yv, 0, 0,Yv,rm,  0,rm")
 	  (match_operand:SI 2 "vector_move_operand"
-	  " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
+	  " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
   "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    pinsrd\t{$1, %2, %0|%0, %2, 1}
    pinsrd\t{$1, %2, %0|%0, %2, 1}
    vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
+   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
    punpckldq\t{%2, %0|%0, %2}
    punpckldq\t{%2, %0|%0, %2}
    vpunpckldq\t{%2, %1, %0|%0, %1, %2}
    %vmovd\t{%1, %0|%0, %1}
    punpckldq\t{%2, %0|%0, %2}
    movd\t{%1, %0|%0, %1}"
-  [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
+  [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
    (set (attr "type")
-     (cond [(eq_attr "alternative" "6")
+     (cond [(eq_attr "alternative" "7")
 	      (const_string "ssemov")
-	    (eq_attr "alternative" "7")
-	      (const_string "mmxcvt")
 	    (eq_attr "alternative" "8")
+	      (const_string "mmxcvt")
+	    (eq_attr "alternative" "9")
 	      (const_string "mmxmov")
 	   ]
 	   (const_string "sselog")))
    (set (attr "prefix_extra")
-     (if_then_else (eq_attr "alternative" "0,1,2")
+     (if_then_else (eq_attr "alternative" "0,1,2,3")
 		   (const_string "1")
 		   (const_string "*")))
    (set (attr "length_immediate")
-     (if_then_else (eq_attr "alternative" "0,1,2")
+     (if_then_else (eq_attr "alternative" "0,1,2,3")
 		   (const_string "1")
 		   (const_string "*")))
-   (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
-   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
+   (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
+   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
 
 ;; ??? In theory we can match memory for the MMX alternative, but allowing
 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
--- gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c.jj	2016-05-26 15:14:55.853786550 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c	2016-05-26 15:13:57.000000000 +0200
@@ -0,0 +1,43 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att" } */
+
+typedef int V __attribute__((vector_size (8)));
+
+void
+f1 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register int b __asm ("xmm17");
+  register V c __asm ("xmm3");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  c = (V) { a, b };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
+
+void
+f2 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, y };
+  asm volatile ("" : "+v" (c));
+}
+
+void
+f3 (int x, int *y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, *y };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler-times "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" 2 } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c.jj	2016-05-26 15:15:11.921574803 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c	2016-05-26 15:16:24.936612585 +0200
@@ -0,0 +1,43 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att" } */
+
+typedef int V __attribute__((vector_size (8)));
+
+void
+f1 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register int b __asm ("xmm17");
+  register V c __asm ("xmm3");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  c = (V) { a, b };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
+
+void
+f2 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, y };
+  asm volatile ("" : "+v" (c));
+}
+
+void
+f3 (int x, int *y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, *y };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler-not "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */


	Jakub

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Improve *vec_concatv2si_sse4_1
  2016-05-27  8:23   ` Jakub Jelinek
@ 2016-06-02 10:49     ` Uros Bizjak
  0 siblings, 0 replies; 4+ messages in thread
From: Uros Bizjak @ 2016-06-02 10:49 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Kirill Yukhin, gcc-patches

On Thu, May 26, 2016 at 9:24 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, May 26, 2016 at 07:39:01PM +0200, Uros Bizjak wrote:
>> On Thu, May 26, 2016 at 7:05 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>> > Hi!
>> >
>> > This patch adds an avx512dq alternative (EVEX vpinsrd requires that) and
>> > enables EVEX vmovd and vpunpckldq.
>> >
>> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>> >
>> > 2016-05-26  Jakub Jelinek  <jakub@redhat.com>
>> >
>> >         * config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm
>> >         alternative.  Change x=x,x alternative to v=Yv,Yv and x=rm,C
>> >         alternative to v=rm,C.
>> >
>> >         * gcc.target/i386/avx512dq-concatv2si-1.c: New test.
>> >         * gcc.target/i386/avx512vl-concatv2si-1.c: New test.
>>
>> Ouch, I have just changed these mega strings in attribute definitions
>> to something more readable. Can you please redo the attribute part? It
>> should be much more pleasant experience than counting all the
>> commas...).
>
> Here is updated version of this patch (the other two pending sse.md patches
> from me still apply cleanly):
>
> 2016-05-26  Jakub Jelinek  <jakub@redhat.com>
>
>         * config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm
>         alternative.  Change x=x,x alternative to v=Yv,Yv and x=rm,C
>         alternative to v=rm,C.
>
>         * gcc.target/i386/avx512dq-concatv2si-1.c: New test.
>         * gcc.target/i386/avx512vl-concatv2si-1.c: New test.

OK.

Thanks,
Uros.

> --- gcc/config/i386/sse.md.jj   2016-05-26 10:44:25.000000000 +0200
> +++ gcc/config/i386/sse.md      2016-05-26 14:22:26.819313220 +0200
> @@ -13488,43 +13488,44 @@
>
>  (define_insn "*vec_concatv2si_sse4_1"
>    [(set (match_operand:V2SI 0 "register_operand"
> -         "=Yr,*x,x, Yr,*x,x, x, *y,*y")
> +         "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
>         (vec_concat:V2SI
>           (match_operand:SI 1 "nonimmediate_operand"
> -         "  0, 0,x,  0,0, x,rm,  0,rm")
> +         "  0, 0, x,Yv, 0, 0,Yv,rm,  0,rm")
>           (match_operand:SI 2 "vector_move_operand"
> -         " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
> +         " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
>    "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>    "@
>     pinsrd\t{$1, %2, %0|%0, %2, 1}
>     pinsrd\t{$1, %2, %0|%0, %2, 1}
>     vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
> +   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
>     punpckldq\t{%2, %0|%0, %2}
>     punpckldq\t{%2, %0|%0, %2}
>     vpunpckldq\t{%2, %1, %0|%0, %1, %2}
>     %vmovd\t{%1, %0|%0, %1}
>     punpckldq\t{%2, %0|%0, %2}
>     movd\t{%1, %0|%0, %1}"
> -  [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
> +  [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
>     (set (attr "type")
> -     (cond [(eq_attr "alternative" "6")
> +     (cond [(eq_attr "alternative" "7")
>               (const_string "ssemov")
> -           (eq_attr "alternative" "7")
> -             (const_string "mmxcvt")
>             (eq_attr "alternative" "8")
> +             (const_string "mmxcvt")
> +           (eq_attr "alternative" "9")
>               (const_string "mmxmov")
>            ]
>            (const_string "sselog")))
>     (set (attr "prefix_extra")
> -     (if_then_else (eq_attr "alternative" "0,1,2")
> +     (if_then_else (eq_attr "alternative" "0,1,2,3")
>                    (const_string "1")
>                    (const_string "*")))
>     (set (attr "length_immediate")
> -     (if_then_else (eq_attr "alternative" "0,1,2")
> +     (if_then_else (eq_attr "alternative" "0,1,2,3")
>                    (const_string "1")
>                    (const_string "*")))
> -   (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
> -   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
> +   (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
> +   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
>
>  ;; ??? In theory we can match memory for the MMX alternative, but allowing
>  ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
> --- gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c.jj    2016-05-26 15:14:55.853786550 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c       2016-05-26 15:13:57.000000000 +0200
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att" } */
> +
> +typedef int V __attribute__((vector_size (8)));
> +
> +void
> +f1 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register int b __asm ("xmm17");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  b = y;
> +  asm volatile ("" : "+v" (a), "+v" (b));
> +  c = (V) { a, b };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
> +
> +void
> +f2 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +void
> +f3 (int x, int *y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, *y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler-times "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" 2 } } */
> --- gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c.jj    2016-05-26 15:15:11.921574803 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c       2016-05-26 15:16:24.936612585 +0200
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att" } */
> +
> +typedef int V __attribute__((vector_size (8)));
> +
> +void
> +f1 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register int b __asm ("xmm17");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  b = y;
> +  asm volatile ("" : "+v" (a), "+v" (b));
> +  c = (V) { a, b };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
> +
> +void
> +f2 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +void
> +f3 (int x, int *y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, *y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler-not "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
>
>
>         Jakub

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-06-02 10:49 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-26 18:00 [PATCH] Improve *vec_concatv2si_sse4_1 Jakub Jelinek
2016-05-27  2:14 ` Uros Bizjak
2016-05-27  8:23   ` Jakub Jelinek
2016-06-02 10:49     ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).