public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Allow XMM16-XMM31 in vpmaddubsw
@ 2016-05-13 17:15 Jakub Jelinek
  2016-05-18  8:48 ` Kirill Yukhin
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2016-05-13 17:15 UTC (permalink / raw)
  To: Uros Bizjak, Kirill Yukhin; +Cc: gcc-patches

Hi!

This is either AVX2 or for EVEX AVX512BW (& AVX512VL) instruction,
thus the patch adds it as a separate alternative guarded with avx512bw
isa attribute.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-05-13  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (avx2_pmaddubsw256, ssse3_pmaddubsw128): Add
	avx512bw alternative.

	* gcc.target/i386/avx512bw-vpmaddubsw-3.c: New test.

--- gcc/config/i386/sse.md.jj	2016-05-13 13:58:12.384020131 +0200
+++ gcc/config/i386/sse.md	2016-05-13 14:46:03.563465879 +0200
@@ -13933,12 +13933,12 @@ (define_insn "ssse3_ph<plusminus_mnemoni
    (set_attr "mode" "DI")])
 
 (define_insn "avx2_pmaddubsw256"
-  [(set (match_operand:V16HI 0 "register_operand" "=x")
+  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
 	(ss_plus:V16HI
 	  (mult:V16HI
 	    (zero_extend:V16HI
 	      (vec_select:V16QI
-		(match_operand:V32QI 1 "register_operand" "x")
+		(match_operand:V32QI 1 "register_operand" "x,v")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
@@ -13949,7 +13949,7 @@ (define_insn "avx2_pmaddubsw256"
 			   (const_int 28) (const_int 30)])))
 	    (sign_extend:V16HI
 	      (vec_select:V16QI
-		(match_operand:V32QI 2 "nonimmediate_operand" "xm")
+		(match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
@@ -13981,9 +13981,10 @@ (define_insn "avx2_pmaddubsw256"
 			   (const_int 29) (const_int 31)]))))))]
   "TARGET_AVX2"
   "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sseiadd")
+  [(set_attr "isa" "*,avx512bw")
+   (set_attr "type" "sseiadd")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "vex,evex")
    (set_attr "mode" "OI")])
 
 ;; The correct representation for this is absolutely enormous, and
@@ -14036,19 +14037,19 @@ (define_insn "avx512bw_umulhrswv32hi3<ma
    (set_attr "mode" "XI")])
 
 (define_insn "ssse3_pmaddubsw128"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
 	(ss_plus:V8HI
 	  (mult:V8HI
 	    (zero_extend:V8HI
 	      (vec_select:V8QI
-		(match_operand:V16QI 1 "register_operand" "0,x")
+		(match_operand:V16QI 1 "register_operand" "0,x,v")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
 			   (const_int 12) (const_int 14)])))
 	    (sign_extend:V8HI
 	      (vec_select:V8QI
-		(match_operand:V16QI 2 "vector_operand" "xBm,xm")
+		(match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
@@ -14069,13 +14070,14 @@ (define_insn "ssse3_pmaddubsw128"
   "TARGET_SSSE3"
   "@
    pmaddubsw\t{%2, %0|%0, %2}
+   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
    vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
+  [(set_attr "isa" "noavx,avx,avx512bw")
    (set_attr "type" "sseiadd")
    (set_attr "atom_unit" "simul")
-   (set_attr "prefix_data16" "1,*")
+   (set_attr "prefix_data16" "1,*,*")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,vex,evex")
    (set_attr "mode" "TI")])
 
 (define_insn "ssse3_pmaddubsw"
--- gcc/testsuite/gcc.target/i386/avx512bw-vpmaddubsw-3.c.jj	2016-05-13 14:55:48.714665418 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpmaddubsw-3.c	2016-05-13 14:54:55.567374760 +0200
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw" } */
+
+#include <x86intrin.h>
+
+void
+f1 (__m128i x, __m128i y)
+{
+  register __m128i a __asm ("xmm16"), b __asm ("xmm17");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  a = _mm_maddubs_epi16 (a, b);
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpmaddubsw\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" } } */
+
+void
+f2 (__m256i x, __m256i y)
+{
+  register __m256i a __asm ("xmm16"), b __asm ("xmm17");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  a = _mm256_maddubs_epi16 (a, b);
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpmaddubsw\[^\n\r]*ymm1\[67]\[^\n\r]*ymm1\[67]\[^\n\r]*ymm1\[67]" } } */

	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Allow XMM16-XMM31 in vpmaddubsw
  2016-05-13 17:15 [PATCH] Allow XMM16-XMM31 in vpmaddubsw Jakub Jelinek
@ 2016-05-18  8:48 ` Kirill Yukhin
  0 siblings, 0 replies; 2+ messages in thread
From: Kirill Yukhin @ 2016-05-18  8:48 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Uros Bizjak, gcc-patches

Hello Jakub,
On 13 May 19:14, Jakub Jelinek wrote:
> Hi!
> 
> This is either AVX2 or for EVEX AVX512BW (& AVX512VL) instruction,
> thus the patch adds it as a separate alternative guarded with avx512bw
> isa attribute.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2016-05-13  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* config/i386/sse.md (avx2_pmaddubsw256, ssse3_pmaddubsw128): Add
> 	avx512bw alternative.
> 
> 	* gcc.target/i386/avx512bw-vpmaddubsw-3.c: New test.
OK (sorry for delay).

--
Thanks, K

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2016-05-18  8:48 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-13 17:15 [PATCH] Allow XMM16-XMM31 in vpmaddubsw Jakub Jelinek
2016-05-18  8:48 ` Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).