public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATHC][x86] Scalar mask and round RTL templates
@ 2017-06-23  9:00 Peryt, Sebastian
  2017-07-04 17:44 ` Kirill Yukhin
  0 siblings, 1 reply; 6+ messages in thread
From: Peryt, Sebastian @ 2017-06-23  9:00 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak, kirill.yukhin

[-- Attachment #1: Type: text/plain, Size: 2240 bytes --]

Hi,

This patch adds three extra RTL meta-templates for scalar round and mask. Additionally fixes errors caused by previous mask and round usage in some of the intrinsics that I found.

2017-06-23  Sebastian Peryt  <sebastian.peryt@intel.com>

gcc/
	* config/i386/subst.md (mask_scalar, round_scalar, round_saeonly_scalar): New templates.
	(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
	round_scalar_mask_operand3, round_scalar_mask_op3,
	round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
	round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
	round_saeonly_scalar_constraint, round_saeonly_scalar_prefix): New subst attribute.
	* config/i386/sse.md
	(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
	<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name> ... this.
	(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
	<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name> ... this.
	(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
	<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name> ... this.
	(v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
	v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
	(v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
	v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
	(v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
	v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>} ... this.

Is it ok for trunk?

Thanks,
Sebastian

[-- Attachment #2: Scalar-templates.patch --]
[-- Type: application/octet-stream, Size: 6582 bytes --]

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f61ae2b..cc23f1f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1568,21 +1568,21 @@
    (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>"
+(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (plusminus:VF_128
 	    (match_operand:VF_128 1 "register_operand" "0,v")
-	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_constraint>"))
+	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
    <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}"
+   v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
-   (set_attr "prefix" "<round_prefix>")
+   (set_attr "prefix" "<round_scalar_prefix>")
    (set_attr "mode" "<ssescalarmode>")])
 
 (define_expand "mul<mode>3<mask_name><round_name>"
@@ -1608,21 +1608,21 @@
    (set_attr "btver2_decode" "direct,double")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>"
+(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (multdiv:VF_128
 	    (match_operand:VF_128 1 "register_operand" "0,v")
-	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_constraint>"))
+	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
    <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}"
+   v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse<multdiv_mnemonic>")
-   (set_attr "prefix" "<round_prefix>")
+   (set_attr "prefix" "<round_scalar_prefix>")
    (set_attr "btver2_decode" "direct,double")
    (set_attr "mode" "<ssescalarmode>")])
 
@@ -1944,22 +1944,22 @@
    (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<code><mode>3<mask_name><round_saeonly_name>"
+(define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (smaxmin:VF_128
 	    (match_operand:VF_128 1 "register_operand" "0,v")
-	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_constraint>"))
+	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
 	 (match_dup 1)
 	 (const_int 1)))]
   "TARGET_SSE"
   "@
    <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}"
+   v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse")
    (set_attr "btver2_sse_attr" "maxmin")
-   (set_attr "prefix" "<round_saeonly_prefix>")
+   (set_attr "prefix" "<round_saeonly_scalar_prefix>")
    (set_attr "mode" "<ssescalarmode>")])
 
 (define_insn "avx_addsubv4df3"
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 4685db3..1c7306e 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -236,3 +236,66 @@
     (match_dup 3)
     (match_operand:SUBST_V 4 "vector_move_operand")
     (match_operand:<avx512fmaskmode> 5 "register_operand")])
+
+(define_subst_attr "mask_scalar_name" "mask_scalar" "" "_mask")
+(define_subst_attr "mask_scalar_operand3" "mask_scalar" "" "%{%4%}%N3")
+
+(define_subst "mask_scalar"
+  [(set (match_operand:SUBST_V 0)
+	(vec_merge:SUBST_V
+	  (match_operand:SUBST_V 1)
+	  (match_operand:SUBST_V 2)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  [(set (match_dup 0)
+	(vec_merge:SUBST_V
+	  (vec_merge:SUBST_V
+	    (match_dup 1)
+	    (match_operand:SUBST_V 3 "vector_move_operand" "0C")
+	    (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
+	  (match_dup 2)
+	  (const_int 1)))])
+
+(define_subst_attr "round_scalar_name" "round_scalar" "" "_round")
+(define_subst_attr "round_scalar_mask_operand3" "mask_scalar" "%R3" "%R5")
+(define_subst_attr "round_scalar_mask_op3" "round_scalar" "" "<round_scalar_mask_operand3>")
+(define_subst_attr "round_scalar_constraint" "round_scalar" "vm" "v")
+(define_subst_attr "round_scalar_prefix" "round_scalar" "vex" "evex")
+
+(define_subst "round_scalar"
+  [(set (match_operand:SUBST_V 0)
+        (vec_merge:SUBST_V
+          (match_operand:SUBST_V 1)
+          (match_operand:SUBST_V 2)
+          (const_int 1)))]
+  "TARGET_AVX512F"
+  [(set (match_dup 0)
+	(unspec:SUBST_V [
+	     (vec_merge:SUBST_V
+		(match_dup 1)
+		(match_dup 2)
+		(const_int 1))
+	     (match_operand:SI 3 "const_4_or_8_to_11_operand")]
+		UNSPEC_EMBEDDED_ROUNDING))])
+
+(define_subst_attr "round_saeonly_scalar_name" "round_saeonly_scalar" "" "_round")
+(define_subst_attr "round_saeonly_scalar_mask_operand3" "mask_scalar" "%r3" "%r5")
+(define_subst_attr "round_saeonly_scalar_mask_op3" "round_saeonly_scalar" "" "<round_saeonly_scalar_mask_operand3>")
+(define_subst_attr "round_saeonly_scalar_constraint" "round_saeonly_scalar" "vm" "v")
+(define_subst_attr "round_saeonly_scalar_prefix" "round_saeonly_scalar" "vex" "evex")
+
+(define_subst "round_saeonly_scalar"
+  [(set (match_operand:SUBST_V 0)
+        (vec_merge:SUBST_V
+          (match_operand:SUBST_V 1)
+          (match_operand:SUBST_V 2)
+          (const_int 1)))]
+  "TARGET_AVX512F"
+  [(set (match_dup 0)
+	(unspec:SUBST_V [
+	     (vec_merge:SUBST_V
+		(match_dup 1)
+		(match_dup 2)
+		(const_int 1))
+	     (match_operand:SI 3 "const48_operand")]
+		UNSPEC_EMBEDDED_ROUNDING))])

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATHC][x86] Scalar mask and round RTL templates
  2017-06-23  9:00 [PATHC][x86] Scalar mask and round RTL templates Peryt, Sebastian
@ 2017-07-04 17:44 ` Kirill Yukhin
  2017-07-05  6:38   ` Peryt, Sebastian
  0 siblings, 1 reply; 6+ messages in thread
From: Kirill Yukhin @ 2017-07-04 17:44 UTC (permalink / raw)
  To: Peryt, Sebastian; +Cc: gcc-patches, Uros Bizjak

Hello Sebastian,
On 23 Jun 09:00, Peryt, Sebastian wrote:
> Hi,
> 
> This patch adds three extra RTL meta-templates for scalar round and mask. Additionally fixes errors caused by previous mask and round usage in some of the intrinsics that I found.
Could you pls point which intrinsics did you fixed (or which errors)?
I see only MD changes in your patch.

> 
> 2017-06-23  Sebastian Peryt  <sebastian.peryt@intel.com>
> 
> gcc/
> 	* config/i386/subst.md (mask_scalar, round_scalar, round_saeonly_scalar): New templates.
I'd call it meta-templates.
> 	(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
> 	round_scalar_mask_operand3, round_scalar_mask_op3,
> 	round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
> 	round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
> 	round_saeonly_scalar_constraint, round_saeonly_scalar_prefix): New subst attribute.
> 	* config/i386/sse.md
> 	(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
> 	<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name> ... this.
> 	(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
> 	<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name> ... this.
> 	(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
> 	<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name> ... this.
> 	(v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> 	v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> 	(v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> 	v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> 	(v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
> 	v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>} ... this.
We need to obey conventions. Pls break long lines here.

--
Thanks, K
> 
> Is it ok for trunk?
> 
> Thanks,
> Sebastian


^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATHC][x86] Scalar mask and round RTL templates
  2017-07-04 17:44 ` Kirill Yukhin
@ 2017-07-05  6:38   ` Peryt, Sebastian
  2017-07-05 10:35     ` Kirill Yukhin
  0 siblings, 1 reply; 6+ messages in thread
From: Peryt, Sebastian @ 2017-07-05  6:38 UTC (permalink / raw)
  To: Kirill Yukhin; +Cc: gcc-patches

Hi Kirill,

Sorry for this confusion. I meant to write MDs for intrinsics. Those intrinsics are all masked ones for ADD[SD,SS], SUB[SD,SS], MUL[SD,SS], DIV[SD,SS],
MIN[SD,SS] and MAX[SD,SS]. What I found is that for mask equal 0 they were producing wrong results when old mask meta-template was used.

Modified changelog below.

2017-07-05  Sebastian Peryt  <sebastian.peryt@intel.com>

gcc/
	* config/i386/subst.md (mask_scalar, round_scalar, round_saeonly_scalar): New meta-templates.
	(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
	round_scalar_mask_operand3, round_scalar_mask_op3,
	round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
	round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
	round_saeonly_scalar_constraint, round_saeonly_scalar_prefix): New subst attribute.
	* config/i386/sse.md
	(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
	<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name> ... this.
	(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
	<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name> ... this.
	(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
	<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name> ... this.
	(v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
	%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
	v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
	%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
	(v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
	%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
	v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
	%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
	(v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|
	%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
	v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
	%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>} ... this.

Is it ok for trunk?

Thanks,
Sebastian

-----Original Message-----
From: Kirill Yukhin [mailto:kirill.yukhin@gmail.com] 
Sent: Tuesday, July 4, 2017 7:45 PM
To: Peryt, Sebastian <sebastian.peryt@intel.com>
Cc: gcc-patches@gcc.gnu.org; Uros Bizjak <ubizjak@gmail.com>
Subject: Re: [PATHC][x86] Scalar mask and round RTL templates

Hello Sebastian,
On 23 Jun 09:00, Peryt, Sebastian wrote:
> Hi,
> 
> This patch adds three extra RTL meta-templates for scalar round and mask. Additionally fixes errors caused by previous mask and round usage in some of the intrinsics that I found.
Could you pls point which intrinsics did you fixed (or which errors)?
I see only MD changes in your patch.

> 
> 2017-06-23  Sebastian Peryt  <sebastian.peryt@intel.com>
> 
> gcc/
> 	* config/i386/subst.md (mask_scalar, round_scalar, round_saeonly_scalar): New templates.
I'd call it meta-templates.
> 	(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
> 	round_scalar_mask_operand3, round_scalar_mask_op3,
> 	round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
> 	round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
> 	round_saeonly_scalar_constraint, round_saeonly_scalar_prefix): New subst attribute.
> 	* config/i386/sse.md
> 	(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
> 	<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name> ... this.
> 	(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
> 	<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name> ... this.
> 	(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
> 	<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name> ... this.
> 	(v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> 	v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> 	(v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> 	v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> 	(v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
> 	v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>} ... this.
We need to obey conventions. Pls break long lines here.

--
Thanks, K
> 
> Is it ok for trunk?
> 
> Thanks,
> Sebastian



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATHC][x86] Scalar mask and round RTL templates
  2017-07-05  6:38   ` Peryt, Sebastian
@ 2017-07-05 10:35     ` Kirill Yukhin
  2017-07-05 13:51       ` Peryt, Sebastian
  0 siblings, 1 reply; 6+ messages in thread
From: Kirill Yukhin @ 2017-07-05 10:35 UTC (permalink / raw)
  To: Peryt, Sebastian; +Cc: gcc-patches

On 05 Jul 06:38, Peryt, Sebastian wrote:
> Hi Kirill,
> 
> Sorry for this confusion. I meant to write MDs for intrinsics. Those intrinsics are all masked ones for ADD[SD,SS], SUB[SD,SS], MUL[SD,SS], DIV[SD,SS],
> MIN[SD,SS] and MAX[SD,SS]. What I found is that for mask equal 0 they were producing wrong results when old mask meta-template was used.
What you're talking about looks like a bug. Could you pls add a regession test
to your patch?

> Modified changelog below.
> 
> 2017-07-05  Sebastian Peryt  <sebastian.peryt@intel.com>
> 
> gcc/
> 	* config/i386/subst.md (mask_scalar, round_scalar, round_saeonly_scalar): New meta-templates.
> 	(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
> 	round_scalar_mask_operand3, round_scalar_mask_op3,
> 	round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
> 	round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
> 	round_saeonly_scalar_constraint, round_saeonly_scalar_prefix): New subst attribute.
> 	* config/i386/sse.md
> 	(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
> 	<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name> ... this.
> 	(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
> 	<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name> ... this.
> 	(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
> 	<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name> ... this.
> 	(v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
> 	%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> 	v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
> 	%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> 	(v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
> 	%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> 	v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
> 	%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> 	(v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|
> 	%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
> 	v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
> 	%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>} ... this.
Max line length is 79 characters I suppose.

--
Thanks, K
> 
> Is it ok for trunk?
> 
> Thanks,
> Sebastian
> 
> -----Original Message-----
> From: Kirill Yukhin [mailto:kirill.yukhin@gmail.com] 
> Sent: Tuesday, July 4, 2017 7:45 PM
> To: Peryt, Sebastian <sebastian.peryt@intel.com>
> Cc: gcc-patches@gcc.gnu.org; Uros Bizjak <ubizjak@gmail.com>
> Subject: Re: [PATHC][x86] Scalar mask and round RTL templates
> 
> Hello Sebastian,
> On 23 Jun 09:00, Peryt, Sebastian wrote:
> > Hi,
> > 
> > This patch adds three extra RTL meta-templates for scalar round and mask. Additionally fixes errors caused by previous mask and round usage in some of the intrinsics that I found.
> Could you pls point which intrinsics did you fixed (or which errors)?
> I see only MD changes in your patch.
> 
> > 
> > 2017-06-23  Sebastian Peryt  <sebastian.peryt@intel.com>
> > 
> > gcc/
> > 	* config/i386/subst.md (mask_scalar, round_scalar, round_saeonly_scalar): New templates.
> I'd call it meta-templates.
> > 	(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
> > 	round_scalar_mask_operand3, round_scalar_mask_op3,
> > 	round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
> > 	round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
> > 	round_saeonly_scalar_constraint, round_saeonly_scalar_prefix): New subst attribute.
> > 	* config/i386/sse.md
> > 	(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
> > 	<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name> ... this.
> > 	(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
> > 	<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name> ... this.
> > 	(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
> > 	<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name> ... this.
> > 	(v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> > 	v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> > 	(v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> > 	v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> > 	(v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
> > 	v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>} ... this.
> We need to obey conventions. Pls break long lines here.
> 
> --
> Thanks, K
> > 
> > Is it ok for trunk?
> > 
> > Thanks,
> > Sebastian
> 
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATHC][x86] Scalar mask and round RTL templates
  2017-07-05 10:35     ` Kirill Yukhin
@ 2017-07-05 13:51       ` Peryt, Sebastian
  2017-07-05 16:45         ` Kirill Yukhin
  0 siblings, 1 reply; 6+ messages in thread
From: Peryt, Sebastian @ 2017-07-05 13:51 UTC (permalink / raw)
  To: Kirill Yukhin; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 8697 bytes --]

Tests were added. I also updated Changelog and set the max line length to be equal to 79 characters.

gcc/
	* config/i386/subst.md (mask_scalar, round_scalar,
	round_saeonly_scalar): New meta-templates.
	(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
	round_scalar_mask_operand3, round_scalar_mask_op3,
	round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
	round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
	round_saeonly_scalar_constraint, 
	round_saeonly_scalar_prefix): New subst attribute.
	* config/i386/sse.md
	(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
	<sse>_vm<plusminus_insn><mode>3<mask_scalar_name>
	<round_scalar_name> ... this.
	(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
	<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name>
	<round_scalar_name> ... this.
	(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
	<sse>_vm<code><mode>3<mask_scalar_name>
	<round_saeonly_scalar_name> ... this.
	(v<plusminus_mnemonic><ssescalarmodesuffix>
	\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
	%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
	v<plusminus_mnemonic><ssescalarmodesuffix>
	\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
	%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
	(v<multdiv_mnemonic><ssescalarmodesuffix>
	\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
	%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
	v<multdiv_mnemonic><ssescalarmodesuffix>
	\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
	%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
	(v<maxmin_float><ssescalarmodesuffix>
	\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|
	%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
	v<maxmin_float><ssescalarmodesuffix>
	\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
	%0<mask_scalar_operand3>, %1, %<iptr>2
	<round_saeonly_scalar_mask_op3>} ... this.
	
gcc/testsuite
	* gcc.target/i386/avx512f-vaddsd-3.c: New test for mask 0 verification.
	* gcc.target/i386/avx512f-vaddss-3.c: Ditto.
	* gcc.target/i386/avx512f-vdivsd-3.c: Ditto.
	* gcc.target/i386/avx512f-vdivss-3.c: Ditto.
	* gcc.target/i386/avx512f-vmaxsd-3.c: Ditto.
	* gcc.target/i386/avx512f-vmaxss-3.c: Ditto.
	* gcc.target/i386/avx512f-vminsd-3.c: Ditto.
	* gcc.target/i386/avx512f-vminss-3.c: Ditto.
	* gcc.target/i386/avx512f-vmulsd-3.c: Ditto.
	* gcc.target/i386/avx512f-vmulss-3.c: Ditto.
	* gcc.target/i386/avx512f-vsubsd-3.c: Ditto.
	* gcc.target/i386/avx512f-vsubss-3.c: Ditto.

Is it ok for trunk?

Thanks,
Sebastian

-----Original Message-----
From: Kirill Yukhin [mailto:kirill.yukhin@gmail.com] 
Sent: Wednesday, July 5, 2017 12:36 PM
To: Peryt, Sebastian <sebastian.peryt@intel.com>
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATHC][x86] Scalar mask and round RTL templates

On 05 Jul 06:38, Peryt, Sebastian wrote:
> Hi Kirill,
> 
> Sorry for this confusion. I meant to write MDs for intrinsics. Those 
> intrinsics are all masked ones for ADD[SD,SS], SUB[SD,SS], MUL[SD,SS], DIV[SD,SS], MIN[SD,SS] and MAX[SD,SS]. What I found is that for mask equal 0 they were producing wrong results when old mask meta-template was used.
What you're talking about looks like a bug. Could you pls add a regession test to your patch?

> Modified changelog below.
> 
> 2017-07-05  Sebastian Peryt  <sebastian.peryt@intel.com>
> 
> gcc/
> 	* config/i386/subst.md (mask_scalar, round_scalar, round_saeonly_scalar): New meta-templates.
> 	(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
> 	round_scalar_mask_operand3, round_scalar_mask_op3,
> 	round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
> 	round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
> 	round_saeonly_scalar_constraint, round_saeonly_scalar_prefix): New subst attribute.
> 	* config/i386/sse.md
> 	(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
> 	<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name> ... this.
> 	(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
> 	<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name> ... this.
> 	(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
> 	<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name> ... this.
> 	(v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
> 	%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> 	v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
> 	%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> 	(v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
> 	%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> 	v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
> 	%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> 	(v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|
> 	%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
> 	v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
> 	%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>} ... this.
Max line length is 79 characters I suppose.

--
Thanks, K
> 
> Is it ok for trunk?
> 
> Thanks,
> Sebastian
> 
> -----Original Message-----
> From: Kirill Yukhin [mailto:kirill.yukhin@gmail.com]
> Sent: Tuesday, July 4, 2017 7:45 PM
> To: Peryt, Sebastian <sebastian.peryt@intel.com>
> Cc: gcc-patches@gcc.gnu.org; Uros Bizjak <ubizjak@gmail.com>
> Subject: Re: [PATHC][x86] Scalar mask and round RTL templates
> 
> Hello Sebastian,
> On 23 Jun 09:00, Peryt, Sebastian wrote:
> > Hi,
> > 
> > This patch adds three extra RTL meta-templates for scalar round and mask. Additionally fixes errors caused by previous mask and round usage in some of the intrinsics that I found.
> Could you pls point which intrinsics did you fixed (or which errors)?
> I see only MD changes in your patch.
> 
> > 
> > 2017-06-23  Sebastian Peryt  <sebastian.peryt@intel.com>
> > 
> > gcc/
> > 	* config/i386/subst.md (mask_scalar, round_scalar, round_saeonly_scalar): New templates.
> I'd call it meta-templates.
> > 	(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
> > 	round_scalar_mask_operand3, round_scalar_mask_op3,
> > 	round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
> > 	round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
> > 	round_saeonly_scalar_constraint, round_saeonly_scalar_prefix): New subst attribute.
> > 	* config/i386/sse.md
> > 	(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
> > 	<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name> ... this.
> > 	(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
> > 	<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name> ... this.
> > 	(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
> > 	<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name> ... this.
> > 	(v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> > 	v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> > 	(v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> > 	v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> > 	(v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
> > 	v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>} ... this.
> We need to obey conventions. Pls break long lines here.
> 
> --
> Thanks, K
> > 
> > Is it ok for trunk?
> > 
> > Thanks,
> > Sebastian
> 
> 

[-- Attachment #2: 0001-Scalar-templates-patch-with-extra-tests.patch --]
[-- Type: application/octet-stream, Size: 27334 bytes --]

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f61ae2b..cc23f1f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1568,21 +1568,21 @@
    (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>"
+(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (plusminus:VF_128
 	    (match_operand:VF_128 1 "register_operand" "0,v")
-	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_constraint>"))
+	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
    <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}"
+   v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
-   (set_attr "prefix" "<round_prefix>")
+   (set_attr "prefix" "<round_scalar_prefix>")
    (set_attr "mode" "<ssescalarmode>")])
 
 (define_expand "mul<mode>3<mask_name><round_name>"
@@ -1608,21 +1608,21 @@
    (set_attr "btver2_decode" "direct,double")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>"
+(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (multdiv:VF_128
 	    (match_operand:VF_128 1 "register_operand" "0,v")
-	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_constraint>"))
+	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
    <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}"
+   v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse<multdiv_mnemonic>")
-   (set_attr "prefix" "<round_prefix>")
+   (set_attr "prefix" "<round_scalar_prefix>")
    (set_attr "btver2_decode" "direct,double")
    (set_attr "mode" "<ssescalarmode>")])
 
@@ -1944,22 +1944,22 @@
    (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<code><mode>3<mask_name><round_saeonly_name>"
+(define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (smaxmin:VF_128
 	    (match_operand:VF_128 1 "register_operand" "0,v")
-	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_constraint>"))
+	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
 	 (match_dup 1)
 	 (const_int 1)))]
   "TARGET_SSE"
   "@
    <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}"
+   v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse")
    (set_attr "btver2_sse_attr" "maxmin")
-   (set_attr "prefix" "<round_saeonly_prefix>")
+   (set_attr "prefix" "<round_saeonly_scalar_prefix>")
    (set_attr "mode" "<ssescalarmode>")])
 
 (define_insn "avx_addsubv4df3"
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 4685db3..1c7306e 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -236,3 +236,66 @@
     (match_dup 3)
     (match_operand:SUBST_V 4 "vector_move_operand")
     (match_operand:<avx512fmaskmode> 5 "register_operand")])
+
+(define_subst_attr "mask_scalar_name" "mask_scalar" "" "_mask")
+(define_subst_attr "mask_scalar_operand3" "mask_scalar" "" "%{%4%}%N3")
+
+(define_subst "mask_scalar"
+  [(set (match_operand:SUBST_V 0)
+	(vec_merge:SUBST_V
+	  (match_operand:SUBST_V 1)
+	  (match_operand:SUBST_V 2)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  [(set (match_dup 0)
+	(vec_merge:SUBST_V
+	  (vec_merge:SUBST_V
+	    (match_dup 1)
+	    (match_operand:SUBST_V 3 "vector_move_operand" "0C")
+	    (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
+	  (match_dup 2)
+	  (const_int 1)))])
+
+(define_subst_attr "round_scalar_name" "round_scalar" "" "_round")
+(define_subst_attr "round_scalar_mask_operand3" "mask_scalar" "%R3" "%R5")
+(define_subst_attr "round_scalar_mask_op3" "round_scalar" "" "<round_scalar_mask_operand3>")
+(define_subst_attr "round_scalar_constraint" "round_scalar" "vm" "v")
+(define_subst_attr "round_scalar_prefix" "round_scalar" "vex" "evex")
+
+(define_subst "round_scalar"
+  [(set (match_operand:SUBST_V 0)
+        (vec_merge:SUBST_V
+          (match_operand:SUBST_V 1)
+          (match_operand:SUBST_V 2)
+          (const_int 1)))]
+  "TARGET_AVX512F"
+  [(set (match_dup 0)
+	(unspec:SUBST_V [
+	     (vec_merge:SUBST_V
+		(match_dup 1)
+		(match_dup 2)
+		(const_int 1))
+	     (match_operand:SI 3 "const_4_or_8_to_11_operand")]
+		UNSPEC_EMBEDDED_ROUNDING))])
+
+(define_subst_attr "round_saeonly_scalar_name" "round_saeonly_scalar" "" "_round")
+(define_subst_attr "round_saeonly_scalar_mask_operand3" "mask_scalar" "%r3" "%r5")
+(define_subst_attr "round_saeonly_scalar_mask_op3" "round_saeonly_scalar" "" "<round_saeonly_scalar_mask_operand3>")
+(define_subst_attr "round_saeonly_scalar_constraint" "round_saeonly_scalar" "vm" "v")
+(define_subst_attr "round_saeonly_scalar_prefix" "round_saeonly_scalar" "vex" "evex")
+
+(define_subst "round_saeonly_scalar"
+  [(set (match_operand:SUBST_V 0)
+        (vec_merge:SUBST_V
+          (match_operand:SUBST_V 1)
+          (match_operand:SUBST_V 2)
+          (const_int 1)))]
+  "TARGET_AVX512F"
+  [(set (match_dup 0)
+	(unspec:SUBST_V [
+	     (vec_merge:SUBST_V
+		(match_dup 1)
+		(match_dup 2)
+		(const_int 1))
+	     (match_operand:SI 3 "const48_operand")]
+		UNSPEC_EMBEDDED_ROUNDING))])
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-3.c
new file mode 100644
index 0000000..fbe09b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-3.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+static void
+calc_add (double *r, double *s1, double *s2)
+{
+  r[0] = s1[0] + s2[0];
+  r[1] = s1[1];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, res4, src1, src2;
+  MASK_TYPE mask = 0;
+  double res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      res1.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+      sign = sign * -1;
+    }
+
+  res1.x = _mm_mask_add_sd (res1.x, mask, src1.x, src2.x);
+  res2.x = _mm_maskz_add_sd (mask, src1.x, src2.x);
+  res3.x = _mm_mask_add_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res4.x = _mm_maskz_add_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_add (res_ref, src1.a, src2.a);
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res1, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res2, res_ref))
+    abort ();
+
+  calc_add (res_ref, src1.a, src2.a);
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res3, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res4, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddss-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddss-3.c
new file mode 100644
index 0000000..45fb295
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddss-3.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+static void
+calc_add (float *r, float *s1, float *s2)
+{
+  r[0] = s1[0] + s2[0];
+  
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, res4, src1, src2;
+  MASK_TYPE mask = 0;
+  float res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      res1.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+      sign = sign * -1;
+    }
+
+  res1.x = _mm_mask_add_ss (res1.x, mask, src1.x, src2.x);
+  res2.x = _mm_maskz_add_ss (mask, src1.x, src2.x);
+  res3.x = _mm_mask_add_round_ss (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res4.x = _mm_maskz_add_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_add (res_ref, src1.a, src2.a);
+
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res1, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res2, res_ref))
+    abort ();
+
+  calc_add (res_ref, src1.a, src2.a);
+  
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res3, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res4, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-3.c
new file mode 100644
index 0000000..0baaf75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-3.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+static void
+calc_div (double *r, double *s1, double *s2)
+{
+  r[0] = s1[0] / s2[0];
+  r[1] = s1[1];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, res4, src1, src2;
+  MASK_TYPE mask = 0;
+  double res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      res1.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+      sign = sign * -1;
+    }
+
+  res1.x = _mm_mask_div_sd (res1.x, mask, src1.x, src2.x);
+  res2.x = _mm_maskz_div_sd (mask, src1.x, src2.x);
+  res3.x = _mm_mask_div_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res4.x = _mm_maskz_div_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_div (res_ref, src1.a, src2.a);
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res1, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res2, res_ref))
+    abort ();
+
+  calc_div (res_ref, src1.a, src2.a);
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res3, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res4, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivss-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivss-3.c
new file mode 100644
index 0000000..d337bb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivss-3.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+static void
+calc_div (float *r, float *s1, float *s2)
+{
+  r[0] = s1[0] / s2[0];
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, res4, src1, src2;
+  MASK_TYPE mask = 0;
+  float res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      res1.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+      sign = sign * -1;
+    }
+
+  res1.x = _mm_mask_div_ss (res1.x, mask, src1.x, src2.x);
+  res2.x = _mm_maskz_div_ss (mask, src1.x, src2.x);
+  res3.x = _mm_mask_div_round_ss (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res4.x = _mm_maskz_div_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_div (res_ref, src1.a, src2.a);
+
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res1, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res2, res_ref))
+    abort ();
+
+  calc_div (res_ref, src1.a, src2.a);
+  
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res3, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res4, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-3.c
new file mode 100644
index 0000000..95c9c63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-3.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+static void
+calc_max (double *r, double *s1, double *s2)
+{
+  r[0] = s1[0] > s2[0] ? s1[0] : s2[0];
+  r[1] = s1[1];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, res4, src1, src2;
+  MASK_TYPE mask = 0;
+  double res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      res1.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+      sign = sign * -1;
+    }
+
+  res1.x = _mm_mask_max_sd (res1.x, mask, src1.x, src2.x);
+  res2.x = _mm_maskz_max_sd (mask, src1.x, src2.x);
+  res3.x = _mm_mask_max_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res4.x = _mm_maskz_max_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_max (res_ref, src1.a, src2.a);
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res1, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res2, res_ref))
+    abort ();
+
+  calc_max (res_ref, src1.a, src2.a);
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res3, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res4, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-3.c
new file mode 100644
index 0000000..e40c891
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-3.c
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+static void
+calc_max (float *r, float *s1, float *s2)
+{
+  r[0] = s1[0] > s2[0] ? s1[0] : s2[0];
+  int i;
+  for (i = 1; i < SIZE; i++)
+    {
+      r[i] = s1[i];
+    }
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, res4, src1, src2;
+  MASK_TYPE mask = 0;
+  float res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      res1.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+      sign = sign * -1;
+    }
+
+  res1.x = _mm_mask_max_ss (res1.x, mask, src1.x, src2.x);
+  res2.x = _mm_maskz_max_ss (mask, src1.x, src2.x);
+  res3.x = _mm_mask_max_round_ss (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res4.x = _mm_maskz_max_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_max (res_ref, src1.a, src2.a);
+
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res1, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res2, res_ref))
+    abort ();
+
+  calc_max (res_ref, src1.a, src2.a);
+
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res3, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res4, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminsd-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-3.c
new file mode 100644
index 0000000..eac806b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-3.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+static void
+calc_min (double *r, double *s1, double *s2)
+{
+  r[0] = s1[0] < s2[0] ? s1[0] : s2[0];
+  r[1] = s1[1];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, res4, src1, src2;
+  MASK_TYPE mask = 0;
+  double res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      res1.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+      sign = sign * -1;
+    }
+ 
+  res1.x = _mm_mask_min_sd (res1.x, mask, src1.x, src2.x);
+  res2.x = _mm_maskz_min_sd (mask, src1.x, src2.x);
+  res3.x = _mm_mask_min_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res4.x = _mm_maskz_min_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_min (res_ref, src1.a, src2.a);
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res1, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res2, res_ref))
+    abort ();
+
+  calc_min (res_ref, src1.a, src2.a);
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res3, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res4, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminss-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vminss-3.c
new file mode 100644
index 0000000..0ecddd6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminss-3.c
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+static void
+calc_min (float *r, float *s1, float *s2)
+{
+  r[0] = s1[0] < s2[0] ? s1[0] : s2[0];
+  int i;
+  for (i = 1; i < SIZE; i++)
+    {
+      r[i] = s1[i];
+    }
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, res4, src1, src2;
+  MASK_TYPE mask = 0;
+  float res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      res1.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+      sign = sign * -1;
+    }
+
+  res1.x = _mm_mask_min_ss (res1.x, mask, src1.x, src2.x);
+  res2.x = _mm_maskz_min_ss (mask, src1.x, src2.x);
+  res3.x = _mm_mask_min_round_ss (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res4.x = _mm_maskz_min_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_min (res_ref, src1.a, src2.a);
+ 
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res1, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res2, res_ref))
+    abort ();
+
+  calc_min (res_ref, src1.a, src2.a);
+
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res3, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res4, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-3.c
new file mode 100644
index 0000000..f6afb69
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-3.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+static void
+calc_mul (double *r, double *s1, double *s2)
+{
+  r[0] = s1[0] * s2[0];
+  r[1] = s1[1];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, res4, src1, src2;
+  MASK_TYPE mask = 0;
+  double res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      res1.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+      sign = sign * -1;
+    }
+
+  res1.x = _mm_mask_mul_sd (res1.x, mask, src1.x, src2.x);
+  res2.x = _mm_maskz_mul_sd (mask, src1.x, src2.x);
+  res3.x = _mm_mask_mul_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res4.x = _mm_maskz_mul_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_mul (res_ref, src1.a, src2.a);
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res1, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res2, res_ref))
+    abort ();
+
+  calc_mul (res_ref, src1.a, src2.a);
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res3, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res4, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulss-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulss-3.c
new file mode 100644
index 0000000..3d57990
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulss-3.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+static void
+calc_mul (float *r, float *s1, float *s2)
+{
+  r[0] = s1[0] * s2[0];
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, res4, src1, src2;
+  MASK_TYPE mask = 0;
+  float res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      res1.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+      sign = sign * -1;
+    }
+
+  res1.x = _mm_mask_mul_ss (res1.x, mask, src1.x, src2.x);
+  res2.x = _mm_maskz_mul_ss (mask, src1.x, src2.x);
+  res3.x = _mm_mask_mul_round_ss (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res4.x = _mm_maskz_mul_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_mul (res_ref, src1.a, src2.a);
+
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res1, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res2, res_ref))
+    abort ();
+
+  calc_mul (res_ref, src1.a, src2.a);
+
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res3, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res4, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-3.c
new file mode 100644
index 0000000..d9f7913
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-3.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+static void
+calc_sub (double *r, double *s1, double *s2)
+{
+  r[0] = s1[0] - s2[0];
+  r[1] = s1[1];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, res4, src1, src2;
+  MASK_TYPE mask = 0;
+  double res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      res1.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+      sign = sign * -1;
+    }
+
+  res1.x = _mm_mask_sub_sd (res1.x, mask, src1.x, src2.x);
+  res2.x = _mm_maskz_sub_sd (mask, src1.x, src2.x);
+  res3.x = _mm_mask_sub_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res4.x = _mm_maskz_sub_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_sub (res_ref, src1.a, src2.a);
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res1, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res2, res_ref))
+    abort ();
+
+  calc_sub (res_ref, src1.a, src2.a);
+
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res3, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res4, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubss-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubss-3.c
new file mode 100644
index 0000000..bd597c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubss-3.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+static void
+calc_sub (float *r, float *s1, float *s2)
+{
+  r[0] = s1[0] - s2[0];
+  int i;
+  for (i = 1; i < SIZE; i++)
+    r[i] = s1[i];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, res4, src1, src2;
+  MASK_TYPE mask = 0;
+  float res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      res1.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+      sign = sign * -1;
+    }
+
+  res1.x = _mm_mask_sub_ss (res1.x, mask, src1.x, src2.x);
+  res2.x = _mm_maskz_sub_ss (mask, src1.x, src2.x);
+  res3.x = _mm_mask_sub_round_ss (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res4.x = _mm_maskz_sub_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_sub (res_ref, src1.a, src2.a);
+
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res1, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res2, res_ref))
+    abort ();
+
+  calc_sub (res_ref, src1.a, src2.a);
+
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res3, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res4, res_ref))
+    abort ();
+}
+

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATHC][x86] Scalar mask and round RTL templates
  2017-07-05 13:51       ` Peryt, Sebastian
@ 2017-07-05 16:45         ` Kirill Yukhin
  0 siblings, 0 replies; 6+ messages in thread
From: Kirill Yukhin @ 2017-07-05 16:45 UTC (permalink / raw)
  To: Peryt, Sebastian; +Cc: gcc-patches

On 05 Jul 13:51, Peryt, Sebastian wrote:
> Tests were added. I also updated Changelog and set the max line length to be equal to 79 characters.
Thanks!
> 
> Is it ok for trunk?
Your changes are OK for trunk. I've committed the patch.

--
Thanks, K
> 
> Thanks,
> Sebastian
> 
> -----Original Message-----
> From: Kirill Yukhin [mailto:kirill.yukhin@gmail.com] 
> Sent: Wednesday, July 5, 2017 12:36 PM
> To: Peryt, Sebastian <sebastian.peryt@intel.com>
> Cc: gcc-patches@gcc.gnu.org
> Subject: Re: [PATHC][x86] Scalar mask and round RTL templates
> 
> On 05 Jul 06:38, Peryt, Sebastian wrote:
> > Hi Kirill,
> > 
> > Sorry for this confusion. I meant to write MDs for intrinsics. Those 
> > intrinsics are all masked ones for ADD[SD,SS], SUB[SD,SS], MUL[SD,SS], DIV[SD,SS], MIN[SD,SS] and MAX[SD,SS]. What I found is that for mask equal 0 they were producing wrong results when old mask meta-template was used.
> What you're talking about looks like a bug. Could you pls add a regession test to your patch?
> 
> > Modified changelog below.
> > 
> > 2017-07-05  Sebastian Peryt  <sebastian.peryt@intel.com>
> > 
> > gcc/
> > 	* config/i386/subst.md (mask_scalar, round_scalar, round_saeonly_scalar): New meta-templates.
> > 	(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
> > 	round_scalar_mask_operand3, round_scalar_mask_op3,
> > 	round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
> > 	round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
> > 	round_saeonly_scalar_constraint, round_saeonly_scalar_prefix): New subst attribute.
> > 	* config/i386/sse.md
> > 	(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
> > 	<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name> ... this.
> > 	(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
> > 	<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name> ... this.
> > 	(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
> > 	<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name> ... this.
> > 	(v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
> > 	%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> > 	v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
> > 	%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> > 	(v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
> > 	%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> > 	v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
> > 	%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> > 	(v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|
> > 	%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
> > 	v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
> > 	%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>} ... this.
> Max line length is 79 characters I suppose.
> 
> --
> Thanks, K
> > 
> > Is it ok for trunk?
> > 
> > Thanks,
> > Sebastian
> > 
> > -----Original Message-----
> > From: Kirill Yukhin [mailto:kirill.yukhin@gmail.com]
> > Sent: Tuesday, July 4, 2017 7:45 PM
> > To: Peryt, Sebastian <sebastian.peryt@intel.com>
> > Cc: gcc-patches@gcc.gnu.org; Uros Bizjak <ubizjak@gmail.com>
> > Subject: Re: [PATHC][x86] Scalar mask and round RTL templates
> > 
> > Hello Sebastian,
> > On 23 Jun 09:00, Peryt, Sebastian wrote:
> > > Hi,
> > > 
> > > This patch adds three extra RTL meta-templates for scalar round and mask. Additionally fixes errors caused by previous mask and round usage in some of the intrinsics that I found.
> > Could you pls point which intrinsics did you fixed (or which errors)?
> > I see only MD changes in your patch.
> > 
> > > 
> > > 2017-06-23  Sebastian Peryt  <sebastian.peryt@intel.com>
> > > 
> > > gcc/
> > > 	* config/i386/subst.md (mask_scalar, round_scalar, round_saeonly_scalar): New templates.
> > I'd call it meta-templates.
> > > 	(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
> > > 	round_scalar_mask_operand3, round_scalar_mask_op3,
> > > 	round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
> > > 	round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
> > > 	round_saeonly_scalar_constraint, round_saeonly_scalar_prefix): New subst attribute.
> > > 	* config/i386/sse.md
> > > 	(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
> > > 	<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name> ... this.
> > > 	(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
> > > 	<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name> ... this.
> > > 	(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
> > > 	<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name> ... this.
> > > 	(v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> > > 	v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> > > 	(v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
> > > 	v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
> > > 	(v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
> > > 	v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>} ... this.
> > We need to obey conventions. Pls break long lines here.
> > 
> > --
> > Thanks, K
> > > 
> > > Is it ok for trunk?
> > > 
> > > Thanks,
> > > Sebastian
> > 
> > 


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-07-05 16:45 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-23  9:00 [PATHC][x86] Scalar mask and round RTL templates Peryt, Sebastian
2017-07-04 17:44 ` Kirill Yukhin
2017-07-05  6:38   ` Peryt, Sebastian
2017-07-05 10:35     ` Kirill Yukhin
2017-07-05 13:51       ` Peryt, Sebastian
2017-07-05 16:45         ` Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).