public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [RFT PATCH, i386]: Optimize zero-extensions from mask registers
@ 2016-08-05 12:22 Uros Bizjak
  2016-08-22 15:25 ` Kirill Yukhin
  2016-09-05 17:11 ` [PATCH, i386]: Fix zero-extension optimizations from mask registers (PR target/77476) Jakub Jelinek
  0 siblings, 2 replies; 4+ messages in thread
From: Uros Bizjak @ 2016-08-05 12:22 UTC (permalink / raw)
  To: gcc-patches; +Cc: Kirill Yukhin

[-- Attachment #1: Type: text/plain, Size: 1303 bytes --]

Hello!

Attached patch was inspired by assembly from PR 72805 testcase.
Currently, the compiler generates:

test:
        vpternlogd      $0xFF, %zmm0, %zmm0, %zmm0
        vpxord  %zmm1, %zmm1, %zmm1
        vpcmpd  $1, %zmm1, %zmm0, %k1
        kmovw   %k1, %eax
        movzwl  %ax, %eax
        ret

Please note that kmovw already zero-extended from a mask register.

Attached patch allows ree pass to propagate mask registers to zext
insn patterns, resulting in:

test:
        vpternlogd      $0xFF, %zmm0, %zmm0, %zmm0      # 24
movv16si_internal/2     [length = 6]
        vpxord  %zmm1, %zmm1, %zmm1     # 25    movv16si_internal/1
 [length = 6]
        vpcmpd  $1, %zmm1, %zmm0, %k1   # 13    avx512f_cmpv16si3
 [length = 7]
        kmovw   %k1, %eax       # 27    *zero_extendhisi2/2     [length = 4]
        ret     # 30    simple_return_internal  [length = 1]

2016-08-05  Uros Bizjak  <ubizjak@gmail.com>

    * config/i386/i386.md (*zero_extendsidi2): Add (*r,*k) alternative.
    (zero_extend<mode>di2): Ditto.
    (*zero_extend<mode>si2): Ditto.
    (*zero_extendqihi2): Ditto.

Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

The patch is in RFT state, since I have no means to test AVX512 stuff.
Kirill, can someone from Intel please test the patch?

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 3606 bytes --]

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 239166)
+++ config/i386/i386.md	(working copy)
@@ -3688,10 +3688,10 @@
 
 (define_insn "*zero_extendsidi2"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-			"=r,?r,?o,r   ,o,?*Ym,?!*y,?r ,?r,?*Yi,?*x")
+			"=r,?r,?o,r   ,o,?*Ym,?!*y,?r ,?r,?*Yi,?*x,*r")
 	(zero_extend:DI
 	 (match_operand:SI 1 "x86_64_zext_operand"
-	        	"0 ,rm,r ,rmWz,0,r   ,m   ,*Yj,*x,r   ,m")))]
+	        	"0 ,rm,r ,rmWz,0,r   ,m   ,*Yj,*x,r   ,m  ,*k")))]
   ""
 {
   switch (get_attr_type (insn))
@@ -3717,6 +3717,9 @@
 
       return "%vmovd\t{%1, %0|%0, %1}";
 
+    case TYPE_MSKMOV:
+      return "kmovd\t{%1, %k0|%k0, %1}";
+
     default:
       gcc_unreachable ();
     }
@@ -3724,7 +3727,7 @@
   [(set (attr "isa")
      (cond [(eq_attr "alternative" "0,1,2")
 	      (const_string "nox64")
-	    (eq_attr "alternative" "3,7")
+	    (eq_attr "alternative" "3,7,11")
 	      (const_string "x64")
 	    (eq_attr "alternative" "8")
 	      (const_string "x64_sse4")
@@ -3741,6 +3744,8 @@
 	      (const_string "ssemov")
 	    (eq_attr "alternative" "8")
 	      (const_string "sselog1")
+	    (eq_attr "alternative" "11")
+	      (const_string "mskmov")
 	   ]
 	   (const_string "imovx")))
    (set (attr "prefix_extra")
@@ -3792,12 +3797,14 @@
   "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
 
 (define_insn "zero_extend<mode>di2"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,*r")
 	(zero_extend:DI
-	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
+	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k")))]
   "TARGET_64BIT"
-  "movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}"
-  [(set_attr "type" "imovx")
+  "@
+   movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
+   kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "imovx,mskmov")
    (set_attr "mode" "SI")])
 
 (define_expand "zero_extend<mode>si2"
@@ -3841,13 +3848,15 @@
    (set_attr "mode" "SI")])
 
 (define_insn "*zero_extend<mode>si2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+  [(set (match_operand:SI 0 "register_operand" "=r,*r")
 	(zero_extend:SI
-	  (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
+	  (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k")))]
   "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
-  "movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imovx")
-   (set_attr "mode" "SI")])
+  "@
+   movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
+   kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx,mskmov")
+   (set_attr "mode" "SI,<MODE>")])
 
 (define_expand "zero_extendqihi2"
   [(set (match_operand:HI 0 "register_operand")
@@ -3890,12 +3899,14 @@
 
 ; zero extend to SImode to avoid partial register stalls
 (define_insn "*zero_extendqihi2"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+  [(set (match_operand:HI 0 "register_operand" "=r,*r")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k")))]
   "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
-  "movz{bl|x}\t{%1, %k0|%k0, %1}"
-  [(set_attr "type" "imovx")
-   (set_attr "mode" "SI")])
+  "@
+   movz{bl|x}\t{%1, %k0|%k0, %1}
+   kmovb\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "imovx,mskmov")
+   (set_attr "mode" "SI,QI")])
 
 (define_insn_and_split "*zext<mode>_doubleword_and"
   [(set (match_operand:DI 0 "register_operand" "=&<r>")

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFT PATCH, i386]: Optimize zero-extensions from mask registers
  2016-08-05 12:22 [RFT PATCH, i386]: Optimize zero-extensions from mask registers Uros Bizjak
@ 2016-08-22 15:25 ` Kirill Yukhin
  2016-09-05 17:11 ` [PATCH, i386]: Fix zero-extension optimizations from mask registers (PR target/77476) Jakub Jelinek
  1 sibling, 0 replies; 4+ messages in thread
From: Kirill Yukhin @ 2016-08-22 15:25 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

Hello Uroš,
On 05 Aug 14:22, Uros Bizjak wrote:
> Hello!
> 
> Attached patch was inspired by assembly from PR 72805 testcase.
> Currently, the compiler generates:
> 
> test:
>         vpternlogd      $0xFF, %zmm0, %zmm0, %zmm0
>         vpxord  %zmm1, %zmm1, %zmm1
>         vpcmpd  $1, %zmm1, %zmm0, %k1
>         kmovw   %k1, %eax
>         movzwl  %ax, %eax
>         ret
> 
> Please note that kmovw already zero-extended from a mask register.
> 
> 2016-08-05  Uros Bizjak  <ubizjak@gmail.com>
> 
>     * config/i386/i386.md (*zero_extendsidi2): Add (*r,*k) alternative.
>     (zero_extend<mode>di2): Ditto.
>     (*zero_extend<mode>si2): Ditto.
>     (*zero_extendqihi2): Ditto.
> 
> Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
> 
> The patch is in RFT state, since I have no means to test AVX512 stuff.
> Kirill, can someone from Intel please test the patch?
I gave a try to your patch and see no regressions or bootstrap failures on i386/x86_64 (run on SDE).

--
Thanks, K
> 
> Uros.

> Index: config/i386/i386.md
> ===================================================================
> --- config/i386/i386.md	(revision 239166)
> +++ config/i386/i386.md	(working copy)
> @@ -3688,10 +3688,10 @@
>  
>  (define_insn "*zero_extendsidi2"
>    [(set (match_operand:DI 0 "nonimmediate_operand"
> -			"=r,?r,?o,r   ,o,?*Ym,?!*y,?r ,?r,?*Yi,?*x")
> +			"=r,?r,?o,r   ,o,?*Ym,?!*y,?r ,?r,?*Yi,?*x,*r")
>  	(zero_extend:DI
>  	 (match_operand:SI 1 "x86_64_zext_operand"
> -	        	"0 ,rm,r ,rmWz,0,r   ,m   ,*Yj,*x,r   ,m")))]
> +	        	"0 ,rm,r ,rmWz,0,r   ,m   ,*Yj,*x,r   ,m  ,*k")))]
>    ""
>  {
>    switch (get_attr_type (insn))
> @@ -3717,6 +3717,9 @@
>  
>        return "%vmovd\t{%1, %0|%0, %1}";
>  
> +    case TYPE_MSKMOV:
> +      return "kmovd\t{%1, %k0|%k0, %1}";
> +
>      default:
>        gcc_unreachable ();
>      }
> @@ -3724,7 +3727,7 @@
>    [(set (attr "isa")
>       (cond [(eq_attr "alternative" "0,1,2")
>  	      (const_string "nox64")
> -	    (eq_attr "alternative" "3,7")
> +	    (eq_attr "alternative" "3,7,11")
>  	      (const_string "x64")
>  	    (eq_attr "alternative" "8")
>  	      (const_string "x64_sse4")
> @@ -3741,6 +3744,8 @@
>  	      (const_string "ssemov")
>  	    (eq_attr "alternative" "8")
>  	      (const_string "sselog1")
> +	    (eq_attr "alternative" "11")
> +	      (const_string "mskmov")
>  	   ]
>  	   (const_string "imovx")))
>     (set (attr "prefix_extra")
> @@ -3792,12 +3797,14 @@
>    "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
>  
>  (define_insn "zero_extend<mode>di2"
> -  [(set (match_operand:DI 0 "register_operand" "=r")
> +  [(set (match_operand:DI 0 "register_operand" "=r,*r")
>  	(zero_extend:DI
> -	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
> +	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k")))]
>    "TARGET_64BIT"
> -  "movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}"
> -  [(set_attr "type" "imovx")
> +  "@
> +   movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
> +   kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
> +  [(set_attr "type" "imovx,mskmov")
>     (set_attr "mode" "SI")])
>  
>  (define_expand "zero_extend<mode>si2"
> @@ -3841,13 +3848,15 @@
>     (set_attr "mode" "SI")])
>  
>  (define_insn "*zero_extend<mode>si2"
> -  [(set (match_operand:SI 0 "register_operand" "=r")
> +  [(set (match_operand:SI 0 "register_operand" "=r,*r")
>  	(zero_extend:SI
> -	  (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
> +	  (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k")))]
>    "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
> -  "movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}"
> -  [(set_attr "type" "imovx")
> -   (set_attr "mode" "SI")])
> +  "@
> +   movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
> +   kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "imovx,mskmov")
> +   (set_attr "mode" "SI,<MODE>")])
>  
>  (define_expand "zero_extendqihi2"
>    [(set (match_operand:HI 0 "register_operand")
> @@ -3890,12 +3899,14 @@
>  
>  ; zero extend to SImode to avoid partial register stalls
>  (define_insn "*zero_extendqihi2"
> -  [(set (match_operand:HI 0 "register_operand" "=r")
> -	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
> +  [(set (match_operand:HI 0 "register_operand" "=r,*r")
> +	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k")))]
>    "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
> -  "movz{bl|x}\t{%1, %k0|%k0, %1}"
> -  [(set_attr "type" "imovx")
> -   (set_attr "mode" "SI")])
> +  "@
> +   movz{bl|x}\t{%1, %k0|%k0, %1}
> +   kmovb\t{%1, %k0|%k0, %1}"
> +  [(set_attr "type" "imovx,mskmov")
> +   (set_attr "mode" "SI,QI")])
>  
>  (define_insn_and_split "*zext<mode>_doubleword_and"
>    [(set (match_operand:DI 0 "register_operand" "=&<r>")

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH, i386]: Fix zero-extension optimizations from mask registers (PR target/77476)
  2016-08-05 12:22 [RFT PATCH, i386]: Optimize zero-extensions from mask registers Uros Bizjak
  2016-08-22 15:25 ` Kirill Yukhin
@ 2016-09-05 17:11 ` Jakub Jelinek
  2016-09-05 18:40   ` Uros Bizjak
  1 sibling, 1 reply; 4+ messages in thread
From: Jakub Jelinek @ 2016-09-05 17:11 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Kirill Yukhin

Hi!

On Fri, Aug 05, 2016 at 02:22:39PM +0200, Uros Bizjak wrote:
> 2016-08-05  Uros Bizjak  <ubizjak@gmail.com>
> 
>     * config/i386/i386.md (*zero_extendsidi2): Add (*r,*k) alternative.
>     (zero_extend<mode>di2): Ditto.
>     (*zero_extend<mode>si2): Ditto.
>     (*zero_extendqihi2): Ditto.

As the PR says, unfortunately not all kmov instructions are supported by all
AVX512F supporting ISAs, kmovb is AVX512DQ, kmovw is AVX512F and kmovd and
kmovq are AVX512BW.

Thus, the following patch enables those alternatives only when the
instructions are available.

Bootstrapped/regtested on x86_64-linux and i686-linux, tested also with sde
-knl on the avx512f-* testcase.  Ok for trunk?

2016-09-05  Jakub Jelinek  <jakub@redhat.com>

	PR target/77476
	* config/i386/i386.md (isa): Add x64_avx512bw.
	(*zero_extendsidi2): For alternative 11 use x64_avx512bw isa.
	(kmov_isa): New mode attr.
	(zero_extend<mode>di2): Use <kmov_isa> isa for the last alternative.
	(*zero_extend<mode>si2): Likewise.
	(*zero_extendqihi2): Use avx512dq isa for the last alternative.

	* gcc.target/i386/avx512f-pr77476.c: New test.
	* gcc.target/i386/avx512bw-pr77476.c: New test.
	* gcc.target/i386/avx512dq-pr77476.c: New test.

--- gcc/config/i386/i386.md.jj	2016-08-29 12:17:41.000000000 +0200
+++ gcc/config/i386/i386.md	2016-09-05 10:35:53.404313654 +0200
@@ -799,7 +799,7 @@ (define_attr "isa" "base,x64,x64_sse4,x6
 		    sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
 		    avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
 		    fma_avx512f,avx512bw,noavx512bw,avx512dq,noavx512dq,
-		    avx512vl,noavx512vl,x64_avx512dq"
+		    avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
   (const_string "base"))
 
 (define_attr "enabled" ""
@@ -812,6 +812,8 @@ (define_attr "enabled" ""
 	   (symbol_ref "TARGET_64BIT && TARGET_AVX")
 	 (eq_attr "isa" "x64_avx512dq")
 	   (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
+	 (eq_attr "isa" "x64_avx512bw")
+	   (symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
 	 (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT")
 	 (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
 	 (eq_attr "isa" "sse2_noavx")
@@ -3735,12 +3737,14 @@ (define_insn "*zero_extendsidi2"
   [(set (attr "isa")
      (cond [(eq_attr "alternative" "0,1,2")
 	      (const_string "nox64")
-	    (eq_attr "alternative" "3,7,11")
+	    (eq_attr "alternative" "3,7")
 	      (const_string "x64")
 	    (eq_attr "alternative" "8")
 	      (const_string "x64_sse4")
 	    (eq_attr "alternative" "10")
 	      (const_string "sse2")
+	    (eq_attr "alternative" "11")
+	      (const_string "x64_avx512bw")
 	   ]
 	   (const_string "*")))
    (set (attr "type")
@@ -3804,6 +3808,9 @@ (define_split
    (set (match_dup 4) (const_int 0))]
   "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
 
+(define_mode_attr kmov_isa
+  [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
+
 (define_insn "zero_extend<mode>di2"
   [(set (match_operand:DI 0 "register_operand" "=r,*r")
 	(zero_extend:DI
@@ -3812,7 +3819,8 @@ (define_insn "zero_extend<mode>di2"
   "@
    movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
    kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
-  [(set_attr "type" "imovx,mskmov")
+  [(set_attr "isa" "*,<kmov_isa>")
+   (set_attr "type" "imovx,mskmov")
    (set_attr "mode" "SI")])
 
 (define_expand "zero_extend<mode>si2"
@@ -3863,7 +3871,8 @@ (define_insn "*zero_extend<mode>si2"
   "@
    movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
    kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imovx,mskmov")
+  [(set_attr "isa" "*,<kmov_isa>")
+   (set_attr "type" "imovx,mskmov")
    (set_attr "mode" "SI,<MODE>")])
 
 (define_expand "zero_extendqihi2"
@@ -3914,6 +3923,7 @@ (define_insn "*zero_extendqihi2"
    movz{bl|x}\t{%1, %k0|%k0, %1}
    kmovb\t{%1, %k0|%k0, %1}"
   [(set_attr "type" "imovx,mskmov")
+   (set_attr "isa" "*,avx512dq")
    (set_attr "mode" "SI,QI")])
 
 (define_insn_and_split "*zext<mode>_doubleword_and"
--- gcc/testsuite/gcc.target/i386/avx512f-pr77476.c.jj	2016-09-05 10:23:42.108364379 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-pr77476.c	2016-09-05 10:23:26.000000000 +0200
@@ -0,0 +1,76 @@
+/* PR target/77476 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#ifndef PR77476_TEST
+#include "avx512f-check.h"
+#define PR77476_TEST avx512f_test
+#endif
+
+unsigned short s;
+unsigned int i;
+unsigned long long l;
+
+void
+f1 (void)
+{
+  unsigned char a = 0xff;
+  asm volatile ("" : "+Yk" (a));
+  s = a;
+}
+
+void
+f2 (void)
+{
+  unsigned char a = 0xff;
+  asm volatile ("" : "+Yk" (a));
+  i = a;
+}
+
+void
+f3 (void)
+{
+  unsigned char a = 0xff;
+  asm volatile ("" : "+Yk" (a));
+  l = a;
+}
+
+void
+f4 (void)
+{
+  unsigned short a = 0xffff;
+  asm volatile ("" : "+Yk" (a));
+  i = a;
+}
+
+void
+f5 (void)
+{
+  unsigned short a = 0xffff;
+  asm volatile ("" : "+Yk" (a));
+  l = a;
+}
+
+#ifdef __AVX512BW__
+void
+f6 (void)
+{
+  unsigned int a = 0xffffffff;
+  asm volatile ("" : "+Yk" (a));
+  l = a;
+}
+#endif
+
+static void
+PR77476_TEST ()
+{
+  f1 (); if (s != 0xff) __builtin_abort (); s = 0;
+  f2 (); if (i != 0xff) __builtin_abort (); i = 0;
+  f3 (); if (l != 0xff) __builtin_abort (); l = 0;
+  f4 (); if (i != 0xffff) __builtin_abort (); i = 0;
+  f5 (); if (l != 0xffff) __builtin_abort (); l = 0;
+#ifdef __AVX512BW__
+  f6 (); if (l != 0xffffffff) __builtin_abort (); l = 0;
+#endif
+}
--- gcc/testsuite/gcc.target/i386/avx512bw-pr77476.c.jj	2016-09-05 10:24:07.078055576 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-pr77476.c	2016-09-05 10:24:37.870674759 +0200
@@ -0,0 +1,9 @@
+/* PR target/77476 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512bw-check.h"
+#define PR77476_TEST avx512bw_test
+
+#include "avx512f-pr77476.c"
--- gcc/testsuite/gcc.target/i386/avx512dq-pr77476.c.jj	2016-09-05 10:24:57.617430548 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-pr77476.c	2016-09-05 10:25:26.406074515 +0200
@@ -0,0 +1,9 @@
+/* PR target/77476 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512dq-check.h"
+#define PR77476_TEST avx512dq_test
+
+#include "avx512f-pr77476.c"


	Jakub

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH, i386]: Fix zero-extension optimizations from mask registers (PR target/77476)
  2016-09-05 17:11 ` [PATCH, i386]: Fix zero-extension optimizations from mask registers (PR target/77476) Jakub Jelinek
@ 2016-09-05 18:40   ` Uros Bizjak
  0 siblings, 0 replies; 4+ messages in thread
From: Uros Bizjak @ 2016-09-05 18:40 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Kirill Yukhin

On Mon, Sep 5, 2016 at 7:07 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> On Fri, Aug 05, 2016 at 02:22:39PM +0200, Uros Bizjak wrote:
>> 2016-08-05  Uros Bizjak  <ubizjak@gmail.com>
>>
>>     * config/i386/i386.md (*zero_extendsidi2): Add (*r,*k) alternative.
>>     (zero_extend<mode>di2): Ditto.
>>     (*zero_extend<mode>si2): Ditto.
>>     (*zero_extendqihi2): Ditto.
>
> As the PR says, unfortunately not all kmov instructions are supported by all
> AVX512F supporting ISAs, kmovb is AVX512DQ, kmovw is AVX512F and kmovd and
> kmovq are AVX512BW.
>
> Thus, the following patch enables those alternatives only when the
> instructions are available.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, tested also with sde
> -knl on the avx512f-* testcase.  Ok for trunk?
>
> 2016-09-05  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/77476
>         * config/i386/i386.md (isa): Add x64_avx512bw.
>         (*zero_extendsidi2): For alternative 11 use x64_avx512bw isa.
>         (kmov_isa): New mode attr.
>         (zero_extend<mode>di2): Use <kmov_isa> isa for the last alternative.
>         (*zero_extend<mode>si2): Likewise.
>         (*zero_extendqihi2): Use avx512dq isa for the last alternative.
>
>         * gcc.target/i386/avx512f-pr77476.c: New test.
>         * gcc.target/i386/avx512bw-pr77476.c: New test.
>         * gcc.target/i386/avx512dq-pr77476.c: New test.

OK.

Thanks,
Uros.

> --- gcc/config/i386/i386.md.jj  2016-08-29 12:17:41.000000000 +0200
> +++ gcc/config/i386/i386.md     2016-09-05 10:35:53.404313654 +0200
> @@ -799,7 +799,7 @@ (define_attr "isa" "base,x64,x64_sse4,x6
>                     sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
>                     avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
>                     fma_avx512f,avx512bw,noavx512bw,avx512dq,noavx512dq,
> -                   avx512vl,noavx512vl,x64_avx512dq"
> +                   avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
>    (const_string "base"))
>
>  (define_attr "enabled" ""
> @@ -812,6 +812,8 @@ (define_attr "enabled" ""
>            (symbol_ref "TARGET_64BIT && TARGET_AVX")
>          (eq_attr "isa" "x64_avx512dq")
>            (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
> +        (eq_attr "isa" "x64_avx512bw")
> +          (symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
>          (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT")
>          (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
>          (eq_attr "isa" "sse2_noavx")
> @@ -3735,12 +3737,14 @@ (define_insn "*zero_extendsidi2"
>    [(set (attr "isa")
>       (cond [(eq_attr "alternative" "0,1,2")
>               (const_string "nox64")
> -           (eq_attr "alternative" "3,7,11")
> +           (eq_attr "alternative" "3,7")
>               (const_string "x64")
>             (eq_attr "alternative" "8")
>               (const_string "x64_sse4")
>             (eq_attr "alternative" "10")
>               (const_string "sse2")
> +           (eq_attr "alternative" "11")
> +             (const_string "x64_avx512bw")
>            ]
>            (const_string "*")))
>     (set (attr "type")
> @@ -3804,6 +3808,9 @@ (define_split
>     (set (match_dup 4) (const_int 0))]
>    "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
>
> +(define_mode_attr kmov_isa
> +  [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
> +
>  (define_insn "zero_extend<mode>di2"
>    [(set (match_operand:DI 0 "register_operand" "=r,*r")
>         (zero_extend:DI
> @@ -3812,7 +3819,8 @@ (define_insn "zero_extend<mode>di2"
>    "@
>     movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
>     kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
> -  [(set_attr "type" "imovx,mskmov")
> +  [(set_attr "isa" "*,<kmov_isa>")
> +   (set_attr "type" "imovx,mskmov")
>     (set_attr "mode" "SI")])
>
>  (define_expand "zero_extend<mode>si2"
> @@ -3863,7 +3871,8 @@ (define_insn "*zero_extend<mode>si2"
>    "@
>     movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
>     kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
> -  [(set_attr "type" "imovx,mskmov")
> +  [(set_attr "isa" "*,<kmov_isa>")
> +   (set_attr "type" "imovx,mskmov")
>     (set_attr "mode" "SI,<MODE>")])
>
>  (define_expand "zero_extendqihi2"
> @@ -3914,6 +3923,7 @@ (define_insn "*zero_extendqihi2"
>     movz{bl|x}\t{%1, %k0|%k0, %1}
>     kmovb\t{%1, %k0|%k0, %1}"
>    [(set_attr "type" "imovx,mskmov")
> +   (set_attr "isa" "*,avx512dq")
>     (set_attr "mode" "SI,QI")])
>
>  (define_insn_and_split "*zext<mode>_doubleword_and"
> --- gcc/testsuite/gcc.target/i386/avx512f-pr77476.c.jj  2016-09-05 10:23:42.108364379 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-pr77476.c     2016-09-05 10:23:26.000000000 +0200
> @@ -0,0 +1,76 @@
> +/* PR target/77476 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx512f" } */
> +/* { dg-require-effective-target avx512f } */
> +
> +#ifndef PR77476_TEST
> +#include "avx512f-check.h"
> +#define PR77476_TEST avx512f_test
> +#endif
> +
> +unsigned short s;
> +unsigned int i;
> +unsigned long long l;
> +
> +void
> +f1 (void)
> +{
> +  unsigned char a = 0xff;
> +  asm volatile ("" : "+Yk" (a));
> +  s = a;
> +}
> +
> +void
> +f2 (void)
> +{
> +  unsigned char a = 0xff;
> +  asm volatile ("" : "+Yk" (a));
> +  i = a;
> +}
> +
> +void
> +f3 (void)
> +{
> +  unsigned char a = 0xff;
> +  asm volatile ("" : "+Yk" (a));
> +  l = a;
> +}
> +
> +void
> +f4 (void)
> +{
> +  unsigned short a = 0xffff;
> +  asm volatile ("" : "+Yk" (a));
> +  i = a;
> +}
> +
> +void
> +f5 (void)
> +{
> +  unsigned short a = 0xffff;
> +  asm volatile ("" : "+Yk" (a));
> +  l = a;
> +}
> +
> +#ifdef __AVX512BW__
> +void
> +f6 (void)
> +{
> +  unsigned int a = 0xffffffff;
> +  asm volatile ("" : "+Yk" (a));
> +  l = a;
> +}
> +#endif
> +
> +static void
> +PR77476_TEST ()
> +{
> +  f1 (); if (s != 0xff) __builtin_abort (); s = 0;
> +  f2 (); if (i != 0xff) __builtin_abort (); i = 0;
> +  f3 (); if (l != 0xff) __builtin_abort (); l = 0;
> +  f4 (); if (i != 0xffff) __builtin_abort (); i = 0;
> +  f5 (); if (l != 0xffff) __builtin_abort (); l = 0;
> +#ifdef __AVX512BW__
> +  f6 (); if (l != 0xffffffff) __builtin_abort (); l = 0;
> +#endif
> +}
> --- gcc/testsuite/gcc.target/i386/avx512bw-pr77476.c.jj 2016-09-05 10:24:07.078055576 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512bw-pr77476.c    2016-09-05 10:24:37.870674759 +0200
> @@ -0,0 +1,9 @@
> +/* PR target/77476 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx512bw" } */
> +/* { dg-require-effective-target avx512bw } */
> +
> +#include "avx512bw-check.h"
> +#define PR77476_TEST avx512bw_test
> +
> +#include "avx512f-pr77476.c"
> --- gcc/testsuite/gcc.target/i386/avx512dq-pr77476.c.jj 2016-09-05 10:24:57.617430548 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512dq-pr77476.c    2016-09-05 10:25:26.406074515 +0200
> @@ -0,0 +1,9 @@
> +/* PR target/77476 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx512dq" } */
> +/* { dg-require-effective-target avx512dq } */
> +
> +#include "avx512dq-check.h"
> +#define PR77476_TEST avx512dq_test
> +
> +#include "avx512f-pr77476.c"
>
>
>         Jakub

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-09-05 18:38 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-05 12:22 [RFT PATCH, i386]: Optimize zero-extensions from mask registers Uros Bizjak
2016-08-22 15:25 ` Kirill Yukhin
2016-09-05 17:11 ` [PATCH, i386]: Fix zero-extension optimizations from mask registers (PR target/77476) Jakub Jelinek
2016-09-05 18:40   ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).