[PATCH] Improve *pmaddwd

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] Improve *pmaddwd
@ 2016-05-04 19:48 Jakub Jelinek
  2016-05-06 11:49 ` Kirill Yukhin
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2016-05-04 19:48 UTC (permalink / raw)
  To: Uros Bizjak, Kirill Yukhin; +Cc: gcc-patches

Hi!

As the testcase shows, we unnecessarily disallow xmm16+, even when
we can use them for -mavx512bw.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-05-04  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (*avx2_pmaddwd, *sse2_pmaddwd): Use
	v instead of x in vex or maybe_vex alternatives, use
	maybe_evex instead of vex in prefix.

	* gcc.target/i386/avx512bw-vpmaddwd-3.c: New test.

--- gcc/config/i386/sse.md.jj	2016-05-04 14:36:08.000000000 +0200
+++ gcc/config/i386/sse.md	2016-05-04 15:16:44.180894303 +0200
@@ -9803,19 +9817,19 @@ (define_expand "avx2_pmaddwd"
   "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
 
 (define_insn "*avx2_pmaddwd"
-  [(set (match_operand:V8SI 0 "register_operand" "=x")
+  [(set (match_operand:V8SI 0 "register_operand" "=x,v")
 	(plus:V8SI
 	  (mult:V8SI
 	    (sign_extend:V8SI
 	      (vec_select:V8HI
-		(match_operand:V16HI 1 "nonimmediate_operand" "%x")
+		(match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
 			   (const_int 12) (const_int 14)])))
 	    (sign_extend:V8SI
 	      (vec_select:V8HI
-		(match_operand:V16HI 2 "nonimmediate_operand" "xm")
+		(match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
@@ -9836,7 +9850,8 @@ (define_insn "*avx2_pmaddwd"
   "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
   "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseiadd")
-   (set_attr "prefix" "vex")
+   (set_attr "isa" "*,avx512bw")
+   (set_attr "prefix" "vex,evex")
    (set_attr "mode" "OI")])
 
 (define_expand "sse2_pmaddwd"
@@ -9866,17 +9881,17 @@ (define_expand "sse2_pmaddwd"
   "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
 
 (define_insn "*sse2_pmaddwd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
 	(plus:V4SI
 	  (mult:V4SI
 	    (sign_extend:V4SI
 	      (vec_select:V4HI
-		(match_operand:V8HI 1 "vector_operand" "%0,x")
+		(match_operand:V8HI 1 "vector_operand" "%0,x,v")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)])))
 	    (sign_extend:V4SI
 	      (vec_select:V4HI
-		(match_operand:V8HI 2 "vector_operand" "xBm,xm")
+		(match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)]))))
 	  (mult:V4SI
@@ -9891,12 +9906,13 @@ (define_insn "*sse2_pmaddwd"
   "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
   "@
    pmaddwd\t{%2, %0|%0, %2}
+   vpmaddwd\t{%2, %1, %0|%0, %1, %2}
    vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
+  [(set_attr "isa" "noavx,avx,avx512bw")
    (set_attr "type" "sseiadd")
    (set_attr "atom_unit" "simul")
-   (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix_data16" "1,*,*")
+   (set_attr "prefix" "orig,vex,evex")
    (set_attr "mode" "TI")])
 
 (define_insn "avx512dq_mul<mode>3<mask_name>"
--- gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-3.c.jj	2016-05-04 16:37:21.196223424 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-3.c	2016-05-04 16:37:51.867819502 +0200
@@ -0,0 +1,24 @@
+/* { dg-do assemble { target { avx512bw && { avx512vl && { ! ia32 } } } } } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+
+#include <x86intrin.h>
+
+void
+f1 (__m128i x, __m128i y)
+{
+  register __m128i a __asm ("xmm16"), b __asm ("xmm17");
+  a = x; b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  a = _mm_madd_epi16 (a, b);
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f2 (__m256i x, __m256i y)
+{
+  register __m256i a __asm ("xmm16"), b __asm ("xmm17");
+  a = x; b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  a = _mm256_madd_epi16 (a, b);
+  asm volatile ("" : "+v" (a));
+}

	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Improve *pmaddwd
  2016-05-04 19:48 [PATCH] Improve *pmaddwd Jakub Jelinek
@ 2016-05-06 11:49 ` Kirill Yukhin
  0 siblings, 0 replies; 2+ messages in thread
From: Kirill Yukhin @ 2016-05-06 11:49 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Uros Bizjak, gcc-patches

On 04 May 21:48, Jakub Jelinek wrote:
> Hi!
> 
> As the testcase shows, we unnecessarily disallow xmm16+, even when
> we can use them for -mavx512bw.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK for trunk.

--
Thanks, K
> 
> 2016-05-04  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* config/i386/sse.md (*avx2_pmaddwd, *sse2_pmaddwd): Use
> 	v instead of x in vex or maybe_vex alternatives, use
> 	maybe_evex instead of vex in prefix.
> 
> 	* gcc.target/i386/avx512bw-vpmaddwd-3.c: New test.
> 
> --- gcc/config/i386/sse.md.jj	2016-05-04 14:36:08.000000000 +0200
> +++ gcc/config/i386/sse.md	2016-05-04 15:16:44.180894303 +0200
> @@ -9803,19 +9817,19 @@ (define_expand "avx2_pmaddwd"
>    "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
>  
>  (define_insn "*avx2_pmaddwd"
> -  [(set (match_operand:V8SI 0 "register_operand" "=x")
> +  [(set (match_operand:V8SI 0 "register_operand" "=x,v")
>  	(plus:V8SI
>  	  (mult:V8SI
>  	    (sign_extend:V8SI
>  	      (vec_select:V8HI
> -		(match_operand:V16HI 1 "nonimmediate_operand" "%x")
> +		(match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
>  		(parallel [(const_int 0) (const_int 2)
>  			   (const_int 4) (const_int 6)
>  			   (const_int 8) (const_int 10)
>  			   (const_int 12) (const_int 14)])))
>  	    (sign_extend:V8SI
>  	      (vec_select:V8HI
> -		(match_operand:V16HI 2 "nonimmediate_operand" "xm")
> +		(match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
>  		(parallel [(const_int 0) (const_int 2)
>  			   (const_int 4) (const_int 6)
>  			   (const_int 8) (const_int 10)
> @@ -9836,7 +9850,8 @@ (define_insn "*avx2_pmaddwd"
>    "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
>    "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
>    [(set_attr "type" "sseiadd")
> -   (set_attr "prefix" "vex")
> +   (set_attr "isa" "*,avx512bw")
> +   (set_attr "prefix" "vex,evex")
>     (set_attr "mode" "OI")])
>  
>  (define_expand "sse2_pmaddwd"
> @@ -9866,17 +9881,17 @@ (define_expand "sse2_pmaddwd"
>    "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
>  
>  (define_insn "*sse2_pmaddwd"
> -  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
> +  [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
>  	(plus:V4SI
>  	  (mult:V4SI
>  	    (sign_extend:V4SI
>  	      (vec_select:V4HI
> -		(match_operand:V8HI 1 "vector_operand" "%0,x")
> +		(match_operand:V8HI 1 "vector_operand" "%0,x,v")
>  		(parallel [(const_int 0) (const_int 2)
>  			   (const_int 4) (const_int 6)])))
>  	    (sign_extend:V4SI
>  	      (vec_select:V4HI
> -		(match_operand:V8HI 2 "vector_operand" "xBm,xm")
> +		(match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
>  		(parallel [(const_int 0) (const_int 2)
>  			   (const_int 4) (const_int 6)]))))
>  	  (mult:V4SI
> @@ -9891,12 +9906,13 @@ (define_insn "*sse2_pmaddwd"
>    "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
>    "@
>     pmaddwd\t{%2, %0|%0, %2}
> +   vpmaddwd\t{%2, %1, %0|%0, %1, %2}
>     vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
> -  [(set_attr "isa" "noavx,avx")
> +  [(set_attr "isa" "noavx,avx,avx512bw")
>     (set_attr "type" "sseiadd")
>     (set_attr "atom_unit" "simul")
> -   (set_attr "prefix_data16" "1,*")
> -   (set_attr "prefix" "orig,vex")
> +   (set_attr "prefix_data16" "1,*,*")
> +   (set_attr "prefix" "orig,vex,evex")
>     (set_attr "mode" "TI")])
>  
>  (define_insn "avx512dq_mul<mode>3<mask_name>"
> --- gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-3.c.jj	2016-05-04 16:37:21.196223424 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-3.c	2016-05-04 16:37:51.867819502 +0200
> @@ -0,0 +1,24 @@
> +/* { dg-do assemble { target { avx512bw && { avx512vl && { ! ia32 } } } } } */
> +/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
> +
> +#include <x86intrin.h>
> +
> +void
> +f1 (__m128i x, __m128i y)
> +{
> +  register __m128i a __asm ("xmm16"), b __asm ("xmm17");
> +  a = x; b = y;
> +  asm volatile ("" : "+v" (a), "+v" (b));
> +  a = _mm_madd_epi16 (a, b);
> +  asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f2 (__m256i x, __m256i y)
> +{
> +  register __m256i a __asm ("xmm16"), b __asm ("xmm17");
> +  a = x; b = y;
> +  asm volatile ("" : "+v" (a), "+v" (b));
> +  a = _mm256_madd_epi16 (a, b);
> +  asm volatile ("" : "+v" (a));
> +}
> 
> 	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2016-05-06 11:49 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-04 19:48 [PATCH] Improve *pmaddwd Jakub Jelinek
2016-05-06 11:49 ` Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).