public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2 3/8] [APX NF] Support APX NF for left shift insns
       [not found] ` <20240522073710.2039035-3-lingling.kong@intel.com>
@ 2024-05-22  8:41   ` Kong, Lingling
  0 siblings, 0 replies; 6+ messages in thread
From: Kong, Lingling @ 2024-05-22  8:41 UTC (permalink / raw)
  To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak

gcc/ChangeLog:

	* config/i386/i386.md (*ashl<mode>3_1_nf): New.
	(*ashlhi3_1_nf): Ditto.
	(*ashlqi3_1_nf): Ditto.
	* config/i386/sse.md: New define_split.
---
 gcc/config/i386/i386.md | 80 +++++++++++++++++++++++++++--------------
 gcc/config/i386/sse.md  | 13 +++++++
 2 files changed, 67 insertions(+), 26 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 099d7f35c8f..271d449d7c4 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15012,12 +15012,12 @@
   [(set_attr "type" "ishiftx")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*ashl<mode>3_1"
+(define_insn "*ashl<mode>3_1<nf_name>"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
 	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm")
-		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)"
+		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))]
+  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)
+   && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
@@ -15030,7 +15030,7 @@
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      return "add{<imodesuffix>}\t%0, %0";
+      return "<nf_prefix>add{<imodesuffix>}\t%0, %0";
 
     default:
       if (operands[2] == const1_rtx
@@ -15038,11 +15038,11 @@
 	  /* For NDD form instructions related to TARGET_SHIFT1, the $1
 	     immediate do not need to be omitted as assembler will map it
 	     to use shorter encoding. */
-	  && !use_ndd)
+	  && !use_ndd && !<nf_applied>)
 	return "sal{<imodesuffix>}\t%0";
       else
-	return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
-		       : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+	return use_ndd ? "<nf_prefix>sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		       : "<nf_prefix>sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd") @@ -15073,6 +15073,17 @@
    (set_attr "mode" "<MODE>")])
 
 ;; Convert shift to the shiftx pattern to avoid flags dependency.
+;; For NF/NDD doesn't support shift count as r, it just support c<S>, 
+;; but it has no flag.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+		      (match_operand:QI 2 "register_operand")))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(ashift:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
 	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") @@ -15159,12 +15170,12 @@
 	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
-(define_insn "*ashlhi3_1"
+(define_insn "*ashlhi3_1<nf_name>"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
 	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
-		   (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)"
+		   (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))]
+  "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)
+   && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
@@ -15175,16 +15186,16 @@
 
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
-      return "add{w}\t%0, %0";
+      return "<nf_prefix>add{w}\t%0, %0";
 
     default:
       if (operands[2] == const1_rtx
 	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-	  && !use_ndd)
+	  && !use_ndd && !<nf_applied>)
 	return "sal{w}\t%0";
       else
-	return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}"
-		       : "sal{w}\t{%2, %0|%0, %2}";
+	return use_ndd ? "<nf_prefix>sal{w}\t{%2, %1, %0|%0, %1, %2}"
+		       : "<nf_prefix>sal{w}\t{%2, %0|%0, %2}";
     }
 }
   [(set_attr "isa" "*,*,avx512f,apx_ndd") @@ -15212,12 +15223,12 @@
        (const_string "*")))
    (set_attr "mode" "HI,SI,HI,HI")])
 
-(define_insn "*ashlqi3_1"
+(define_insn "*ashlqi3_1<nf_name>"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
 	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
-		   (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)"
+		   (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))]
+  "ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)
+   && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
@@ -15229,14 +15240,14 @@
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
       if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1])))
-        return "add{l}\t%k0, %k0";
+        return "<nf_prefix>add{l}\t%k0, %k0";
       else
-        return "add{b}\t%0, %0";
+        return "<nf_prefix>add{b}\t%0, %0";
 
     default:
       if (operands[2] == const1_rtx
 	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-	  && !use_ndd)
+	  && !use_ndd && !<nf_applied>)
 	{
 	  if (get_attr_mode (insn) == MODE_SI)
 	    return "sal{l}\t%k0";
@@ -15246,10 +15257,10 @@
       else
 	{
 	  if (get_attr_mode (insn) == MODE_SI)
-	    return "sal{l}\t{%2, %k0|%k0, %2}";
+	    return "<nf_prefix>sal{l}\t{%2, %k0|%k0, %2}";
 	  else
-	    return use_ndd ? "sal{b}\t{%2, %1, %0|%0, %1, %2}"
-			   : "sal{b}\t{%2, %0|%0, %2}";
+	    return use_ndd ? "<nf_prefix>sal{b}\t{%2, %1, %0|%0, %1, %2}"
+			   : "<nf_prefix>sal{b}\t{%2, %0|%0, %2}";
 	}
     }
 }
@@ -15334,6 +15345,23 @@
    (set_attr "mode" "<MODE>")])
 
 ;; Convert ashift to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI 0 "general_reg_operand")
+	(ashift:SWI (match_operand:SWI 1 "index_reg_operand")
+		    (match_operand 2 "const_0_to_3_operand")))]
+  "reload_completed
+   && REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_dup 0)
+	(mult:<LEAMODE> (match_dup 1) (match_dup 2)))] {
+  if (<MODE>mode != <LEAMODE>mode)
+    {
+      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
+    }
+  operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+})
+
 (define_split
   [(set (match_operand:SWI 0 "general_reg_operand")
 	(ashift:SWI (match_operand:SWI 1 "index_reg_operand") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 72d4556f47d..498ca5e4d1b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2169,6 +2169,19 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])
 
+(define_split
+  [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
+	(any_lshift:SWI1248_AVX512BW
+	  (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
+	  (match_operand 2 "const_int_operand")))]
+  "TARGET_AVX512F && reload_completed"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_lshift:SWI1248_AVX512BW
+	     (match_dup 1)
+	     (match_dup 2)))
+      (unspec [(const_int 0)] UNSPEC_MASKOP)])])
+
 (define_split
   [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
 	(any_lshift:SWI1248_AVX512BW
--
2.31.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 4/8] [APX NF] Support APX NF for right shift insns
       [not found] ` <20240522073710.2039035-4-lingling.kong@intel.com>
@ 2024-05-22  8:41   ` Kong, Lingling
  0 siblings, 0 replies; 6+ messages in thread
From: Kong, Lingling @ 2024-05-22  8:41 UTC (permalink / raw)
  To: gcc-patches; +Cc: Liu, Hongtao, Uros Bizjak, Kong, Lingling

gcc/ChangeLog:

	* config/i386/i386.md (*ashr<mode>3_1_nf): New.
	(*lshr<mode>3_1_nf): Ditto.
	(*lshrqi3_1_nf): Ditto.
	(*lshrhi3_1_nf): Ditto.
---
 gcc/config/i386/i386.md | 82 +++++++++++++++++++++++------------------
 1 file changed, 46 insertions(+), 36 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 271d449d7c4..7f191749342 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16308,13 +16308,13 @@
   [(set_attr "type" "ishiftx")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*ashr<mode>3_1"
+(define_insn "*ashr<mode>3_1<nf_name>"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
 	(ashiftrt:SWI48
 	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
-	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))]
+  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)
+   && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
@@ -16325,11 +16325,11 @@
     default:
       if (operands[2] == const1_rtx
 	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-	  && !use_ndd)
+	  && !use_ndd && !<nf_applied>)
 	return "sar{<imodesuffix>}\t%0";
       else
-	return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
-		       : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
+	return use_ndd ? "<nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		       : "<nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set_attr "isa" "*,bmi2,apx_ndd")
@@ -16369,14 +16369,13 @@
 }
 [(set_attr "isa" "*,*,*,apx_ndd")])
 
-
-(define_insn "*lshr<mode>3_1"
+(define_insn "*lshr<mode>3_1<nf_name>"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r")
 	(lshiftrt:SWI48
 	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm")
-	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))]
+  "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)
+   && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
@@ -16388,11 +16387,11 @@
     default:
       if (operands[2] == const1_rtx
 	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-	  && !use_ndd)
+	  && !use_ndd && !<nf_applied>)
 	return "shr{<imodesuffix>}\t%0";
       else
-	return use_ndd ? "shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
-		       : "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
+	return use_ndd ? "<nf_prefix>shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		       : "<nf_prefix>shr{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set_attr "isa" "*,bmi2,avx512bw,apx_ndd") @@ -16408,6 +16407,17 @@
    (set_attr "mode" "<MODE>")])
 
 ;; Convert shift to the shiftx pattern to avoid flags dependency.
+;; For NF/NDD doesn't support shift count as r, it just support c<S>, 
+;; but it has no flag.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+			   (match_operand:QI 2 "register_operand")))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
 	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") @@ -16476,22 +16486,22 @@
 	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
-(define_insn "*ashr<mode>3_1"
+(define_insn "*ashr<mode>3_1<nf_name>"
   [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r")
 	(ashiftrt:SWI12
 	  (match_operand:SWI12 1 "nonimmediate_operand" "0, rm")
-	  (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))]
+  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)
+   && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-      && !use_ndd)
+      && !use_ndd && !<nf_applied>)
     return "sar{<imodesuffix>}\t%0";
   else
-    return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
-		   : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return use_ndd ? "<nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		   : "<nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "isa" "*, apx_ndd")
    (set_attr "type" "ishift")
@@ -16504,13 +16514,13 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*lshrqi3_1"
+(define_insn "*lshrqi3_1<nf_name>"
   [(set (match_operand:QI 0 "nonimmediate_operand"  "=qm,?k,r")
 	(lshiftrt:QI
 	  (match_operand:QI 1 "nonimmediate_operand" "0, k, rm")
-	  (match_operand:QI 2 "nonmemory_operand"    "cI,Wb,cI")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD)"
+	  (match_operand:QI 2 "nonmemory_operand"    "cI,Wb,cI")))]
+  "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD)
+   && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
@@ -16518,11 +16528,11 @@
     case TYPE_ISHIFT:
       if (operands[2] == const1_rtx
 	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-	  && !use_ndd)
+	  && !use_ndd && !<nf_applied>)
 	return "shr{b}\t%0";
       else
-	return use_ndd ? "shr{b}\t{%2, %1, %0|%0, %1, %2}"
-		       : "shr{b}\t{%2, %0|%0, %2}";
+	return use_ndd ? "<nf_prefix>shr{b}\t{%2, %1, %0|%0, %1, %2}"
+		       : "<nf_prefix>shr{b}\t{%2, %0|%0, %2}";
     case TYPE_MSKLOG:
       return "#";
     default:
@@ -16541,13 +16551,13 @@
        (const_string "*")))
    (set_attr "mode" "QI")])
 
-(define_insn "*lshrhi3_1"
+(define_insn "*lshrhi3_1<nf_name>"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r")
 	(lshiftrt:HI
 	  (match_operand:HI 1 "nonimmediate_operand" "0, k, rm")
-	  (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD)"
+	  (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI")))]
+  "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD)
+   && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
@@ -16555,11 +16565,11 @@
     case TYPE_ISHIFT:
       if (operands[2] == const1_rtx
 	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-	  && !use_ndd)
+	  && !use_ndd && !<nf_applied>)
 	return "shr{w}\t%0";
       else
-	return use_ndd ? "shr{w}\t{%2, %1, %0|%0, %1, %2}"
-		       : "shr{w}\t{%2, %0|%0, %2}";
+	return use_ndd ? "<nf_prefix>shr{w}\t{%2, %1, %0|%0, %1, %2}"
+		       : "<nf_prefix>shr{w}\t{%2, %0|%0, %2}";
     case TYPE_MSKLOG:
       return "#";
     default:
--
2.31.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 5/8] [APX NF] Support APX NF for rotate insns
       [not found] ` <20240522073710.2039035-5-lingling.kong@intel.com>
@ 2024-05-22  8:42   ` Kong, Lingling
  0 siblings, 0 replies; 6+ messages in thread
From: Kong, Lingling @ 2024-05-22  8:42 UTC (permalink / raw)
  To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak

gcc/ChangeLog:

	* config/i386/i386.md (ashr<mode>3_cvt_nf): New define_insn.
	(*<insn><mode>3_1_nf): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-nf.c: Add NF test for rotate insns.
---
 gcc/config/i386/i386.md                | 53 ++++++++++++++++----------
 gcc/testsuite/gcc.target/i386/apx-nf.c |  5 +++
 2 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 7f191749342..731eb12d13a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16230,19 +16230,19 @@
 (define_mode_attr cvt_mnemonic
   [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
 
-(define_insn "ashr<mode>3_cvt"
+(define_insn "ashr<mode>3_cvt<nf_name>"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r")
 	(ashiftrt:SWI48
 	  (match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm")
-	  (match_operand:QI 2 "const_int_operand")))
-   (clobber (reg:CC FLAGS_REG))]
+	  (match_operand:QI 2 "const_int_operand")))]
   "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
    && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+   && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)
+   && <nf_condition>"
   "@
    <cvt_mnemonic>
-   sar{<imodesuffix>}\t{%2, %0|%0, %2}
-   sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+   <nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}
+   <nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "*,*,apx_ndd")
    (set_attr "type" "imovx,ishift,ishift")
    (set_attr "prefix_0f" "0,*,*")
@@ -17094,13 +17094,13 @@
   [(set_attr "type" "rotatex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<insn><mode>3_1"
+(define_insn "*<insn><mode>3_1<nf_name>"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
 	(any_rotate:SWI48
 	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
-	  (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>")))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)
+   && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
@@ -17111,11 +17111,11 @@
     default:
       if (operands[2] == const1_rtx
 	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-	  && !use_ndd)
+	  && !use_ndd && !<nf_applied>)
 	return "<rotate>{<imodesuffix>}\t%0";
       else
-	return use_ndd ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
-		       : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+	return use_ndd ? "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		       : "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set_attr "isa" "*,bmi2,apx_ndd")
@@ -17135,6 +17135,19 @@
    (set_attr "mode" "<MODE>")])
 
 ;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+		      (match_operand:QI 2 "const_int_operand")))]
+  "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))] {
+  int bitsize = GET_MODE_BITSIZE (<MODE>mode);
+
+  operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
+})
+
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
 	(rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") @@ -17236,22 +17249,22 @@
   [(set (match_dup 0)
 	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
 
-(define_insn "*<insn><mode>3_1"
+(define_insn "*<insn><mode>3_1<nf_name>"
   [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r")
 	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm")
-			  (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
+			  (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)
+   && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-      && !use_ndd)
+      && !use_ndd && !<nf_applied>)
     return "<rotate>{<imodesuffix>}\t%0";
   else
     return use_ndd
-	   ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
-	   : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+	   ? "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+	   : "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "isa" "*,apx_ndd")
    (set_attr "type" "rotate")
diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
index 608dbf8f5f7..6e59803be64 100644
--- a/gcc/testsuite/gcc.target/i386/apx-nf.c
+++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
@@ -3,6 +3,7 @@
 /* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */
 /* { dg-final { scan-assembler-times "\{nf\} and" 1 } } */
 /* { dg-final { scan-assembler-times "\{nf\} or" 1 } } */
+/* { dg-final { scan-assembler-times "\{nf\} rol" 4 } } */
 
 #include "apx-ndd.c"
 
@@ -13,3 +14,7 @@ foo (struct B *b)
 {
     b->bit0 = b->bit0 | b->bit1;
 }
+long int f1 (int x) { return ~(1ULL << (x & 0x3f)); } long int f2 (int 
+x) { return ~(1ULL << x); } long int f3 (unsigned char *x) { return 
+~(1ULL << (x[0] & 0x3f)); } long int f4 (unsigned char *x) { return 
+~(1ULL << x[0]); }
--
2.31.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 6/8] [APX NF] Support APX NF for shld/shrd
       [not found] ` <20240522073710.2039035-6-lingling.kong@intel.com>
@ 2024-05-22  8:43   ` Kong, Lingling
  0 siblings, 0 replies; 6+ messages in thread
From: Kong, Lingling @ 2024-05-22  8:43 UTC (permalink / raw)
  To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak

gcc/ChangeLog:

	* config/i386/i386.md (x86_64_shld_nf): New define_insn.
	(x86_64_shld_ndd_nf): Ditto.
	(x86_64_shld_1_nf): Ditto.
	(x86_64_shld_ndd_1_nf): Ditto.
	(*x86_64_shld_shrd_1_nozext_nf): Ditto.
	(x86_shld_nf): Ditto.
	(x86_shld_ndd_nf): Ditto.
	(x86_shld_1_nf): Ditto.
	(x86_shld_ndd_1_nf): Ditto.
	(*x86_shld_shrd_1_nozext_nf): Ditto.
	(<insn><dwi>3_doubleword_lowpart_nf): Ditto.
	(x86_64_shrd_nf): Ditto.
	(x86_64_shrd_ndd_nf): Ditto.
	(x86_64_shrd_1_nf): Ditto.
	(x86_64_shrd_ndd_1_nf): Ditto.
	(*x86_64_shrd_shld_1_nozext_nf): Ditto.
	(x86_shrd_nf): Ditto.
	(x86_shrd_ndd_nf): Ditto.
	(x86_shrd_1_nf): Ditto.
	(x86_shrd_ndd_1_nf): Ditto.
	(*x86_shrd_shld_1_nozext_nf): Ditto.
---
 gcc/config/i386/i386.md | 377 +++++++++++++++++++++++++++++++---------
 1 file changed, 296 insertions(+), 81 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 731eb12d13a..4d684e8d919 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14552,7 +14552,7 @@
   DONE;
 })
 
-(define_insn "x86_64_shld"
+(define_insn "x86_64_shld<nf_name>"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashift:DI (match_dup 0)
 		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc") @@ -14562,10 +14562,9 @@
 		    (zero_extend:TI
 		      (match_operand:DI 1 "register_operand" "r"))
 		    (minus:QI (const_int 64)
-			      (and:QI (match_dup 2) (const_int 63)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "shld{q}\t{%2, %1, %0|%0, %1, %2}"
+			      (and:QI (match_dup 2) (const_int 63)))) 0)))]
+  "TARGET_64BIT && <nf_condition>"
+  "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
@@ -14573,7 +14572,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_64_shld_ndd"
+(define_insn "x86_64_shld_ndd<nf_name>"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
 		  (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") @@ -14583,14 +14582,13 @@
 		    (zero_extend:TI
 		      (match_operand:DI 2 "register_operand" "r"))
 		    (minus:QI (const_int 64)
-			      (and:QI (match_dup 3) (const_int 63)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_APX_NDD"
-  "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+			      (and:QI (match_dup 3) (const_int 63)))) 0)))]
+  "TARGET_APX_NDD && <nf_condition>"
+  "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
 
-(define_insn "x86_64_shld_1"
+(define_insn "x86_64_shld_1<nf_name>"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashift:DI (match_dup 0)
 			   (match_operand:QI 2 "const_0_to_63_operand")) @@ -14598,11 +14596,11 @@
 		  (lshiftrt:TI
 		    (zero_extend:TI
 		      (match_operand:DI 1 "register_operand" "r"))
-		    (match_operand:QI 3 "const_0_to_255_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+		    (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
   "TARGET_64BIT
-   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
-  "shld{q}\t{%2, %1, %0|%0, %1, %2}"
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && <nf_condition>"
+  "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
@@ -14611,7 +14609,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_64_shld_ndd_1"
+(define_insn "x86_64_shld_ndd_1<nf_name>"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
 			   (match_operand:QI 3 "const_0_to_63_operand")) @@ -14619,15 +14617,66 @@
 		  (lshiftrt:TI
 		    (zero_extend:TI
 		      (match_operand:DI 2 "register_operand" "r"))
-		    (match_operand:QI 4 "const_0_to_255_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+		    (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
   "TARGET_APX_NDD
-   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
-  "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])
+   && <nf_condition>"
+  "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")
    (set_attr "length_immediate" "1")])
 
+(define_insn_and_split "*x86_64_shld_shrd_1_nozext_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
+			     (match_operand:QI 2 "const_0_to_63_operand"))
+		(lshiftrt:DI
+		  (match_operand:DI 1 "nonimmediate_operand")
+		  (match_operand:QI 3 "const_0_to_63_operand"))))]
+  "TARGET_64BIT && TARGET_APX_NF
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[1], operands[2], operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (DImode, operands[4]);
+      emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[4], operands[3], operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+     rtx tmp = gen_reg_rtx (DImode);
+     if (MEM_P (operands[4]))
+       {
+	 operands[1] = force_reg (DImode, operands[1]);
+	 emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
+					   operands[2], operands[3]));
+       }
+     else if (MEM_P (operands[1]))
+       emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[1], operands[4],
+					 operands[3], operands[2]));
+     else
+       emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
+					 operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+    }
+  else
+   {
+     operands[1] = force_reg (DImode, operands[1]);
+     rtx tmp = gen_reg_rtx (DImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_64_shld_1_nf (tmp, operands[1], operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_64_shld_shrd_1_nozext"
   [(set (match_operand:DI 0 "nonimmediate_operand") @@ -14730,7 +14779,7 @@
   emit_move_insn (operands[4], operands[0]);
 })
 
-(define_insn "x86_shld"
+(define_insn "x86_shld<nf_name>"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (ashift:SI (match_dup 0)
 		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic") @@ -14740,10 +14789,9 @@
 		    (zero_extend:DI
 		      (match_operand:SI 1 "register_operand" "r"))
 		    (minus:QI (const_int 32)
-			      (and:QI (match_dup 2) (const_int 31)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "shld{l}\t{%2, %1, %0|%0, %1, %2}"
+			      (and:QI (match_dup 2) (const_int 31)))) 0)))]
+  "<nf_condition>"
+  "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "SI")
@@ -14752,7 +14800,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_shld_ndd"
+(define_insn "x86_shld_ndd<nf_name>"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
         (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
 		  (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") @@ -14762,15 +14810,14 @@
 		    (zero_extend:DI
 		      (match_operand:SI 2 "register_operand" "r"))
 		    (minus:QI (const_int 32)
-			      (and:QI (match_dup 3) (const_int 31)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_APX_NDD"
-  "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+			      (and:QI (match_dup 3) (const_int 31)))) 0)))]
+  "TARGET_APX_NDD && <nf_condition>"
+  "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
 
 
-(define_insn "x86_shld_1"
+(define_insn "x86_shld_1<nf_name>"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (ashift:SI (match_dup 0)
 			   (match_operand:QI 2 "const_0_to_31_operand")) @@ -14778,10 +14825,10 @@
 		  (lshiftrt:DI
 		    (zero_extend:DI
 		      (match_operand:SI 1 "register_operand" "r"))
-		    (match_operand:QI 3 "const_0_to_63_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
-  "shld{l}\t{%2, %1, %0|%0, %1, %2}"
+		    (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
+  "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+  && <nf_condition>"
+  "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "length_immediate" "1")
@@ -14791,7 +14838,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_shld_ndd_1"
+(define_insn "x86_shld_ndd_1<nf_name>"
   [(set (match_operand:SI 0 "register_operand" "=r")
         (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
 			   (match_operand:QI 3 "const_0_to_31_operand")) @@ -14799,15 +14846,66 @@
 		  (lshiftrt:DI
 		    (zero_extend:DI
 		      (match_operand:SI 2 "register_operand" "r"))
-		    (match_operand:QI 4 "const_0_to_63_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+		    (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
   "TARGET_APX_NDD 
-   && INTVAL (operands[4]) == 32 - INTVAL (operands[3])"
-  "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+   && INTVAL (operands[4]) == 32 - INTVAL (operands[3])
+   && <nf_condition>"
+  "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "length_immediate" "1")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*x86_shld_shrd_1_nozext_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
+			     (match_operand:QI 2 "const_0_to_31_operand"))
+	       (lshiftrt:SI
+		   (match_operand:SI 1 "nonimmediate_operand")
+		   (match_operand:QI 3 "const_0_to_31_operand"))))]
+  "TARGET_APX_NF &&
+  INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_x86_shld_1_nf (operands[0], operands[1], operands[2], operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (SImode, operands[4]);
+      emit_insn (gen_x86_shrd_1_nf (operands[0], operands[4], operands[3], operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+     rtx tmp = gen_reg_rtx (SImode);
+     if (MEM_P (operands[4]))
+       {
+	 operands[1] = force_reg (SImode, operands[1]);
+	 emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
+					operands[2], operands[3]));
+       }
+     else if (MEM_P (operands[1]))
+       emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[1], operands[4],
+				      operands[3], operands[2]));
+     else
+       emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
+				      operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+    }
+ else
+   {
+     operands[1] = force_reg (SImode, operands[1]);
+     rtx tmp = gen_reg_rtx (SImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_shld_1_nf (tmp, operands[1], operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_shld_shrd_1_nozext"
   [(set (match_operand:SI 0 "nonimmediate_operand") @@ -15846,6 +15944,26 @@
 })
 
 ;; Split truncations of double word right shifts into x86_shrd_1.
+(define_insn_and_split "<insn><dwi>3_doubleword_lowpart_nf"
+  [(set (match_operand:DWIH 0 "register_operand" "=&r")
+	(subreg:DWIH
+	  (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
+			     (match_operand:QI 2 "const_int_operand")) 0))]
+  "TARGET_APX_NF && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2))
+		  (subreg:DWIH
+		    (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
+				  (match_dup 4)) 0)))]
+{
+  split_double_mode (<DWI>mode, &operands[1], 1, &operands[1], 
+&operands[3]);
+  operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL 
+(operands[2]));
+  if (!rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+})
+
 (define_insn_and_split "<insn><dwi>3_doubleword_lowpart"
   [(set (match_operand:DWIH 0 "register_operand" "=&r")
 	(subreg:DWIH
@@ -15869,7 +15987,7 @@
     emit_move_insn (operands[0], operands[1]);
 })
 
-(define_insn "x86_64_shrd"
+(define_insn "x86_64_shrd<nf_name>"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (lshiftrt:DI (match_dup 0)
 		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc") @@ -15879,10 +15997,9 @@
 		    (zero_extend:TI
 		      (match_operand:DI 1 "register_operand" "r"))
 		    (minus:QI (const_int 64)
-			      (and:QI (match_dup 2) (const_int 63)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+			      (and:QI (match_dup 2) (const_int 63)))) 0)))]
+  "TARGET_64BIT && <nf_condition>"
+  "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
@@ -15890,7 +16007,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_64_shrd_ndd"
+(define_insn "x86_64_shrd_ndd<nf_name>"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
 		  (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") @@ -15900,15 +16017,13 @@
 		    (zero_extend:TI
 		      (match_operand:DI 2 "register_operand" "r"))
 		    (minus:QI (const_int 64)
-			      (and:QI (match_dup 3) (const_int 63)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_APX_NDD"
-  "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+			      (and:QI (match_dup 3) (const_int 63)))) 0)))]
+  "TARGET_APX_NDD && <nf_condition>"
+  "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
 
-
-(define_insn "x86_64_shrd_1"
+(define_insn "x86_64_shrd_1<nf_name>"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (lshiftrt:DI (match_dup 0)
 			     (match_operand:QI 2 "const_0_to_63_operand")) @@ -15916,11 +16031,11 @@
 		  (ashift:TI
 		    (zero_extend:TI
 		      (match_operand:DI 1 "register_operand" "r"))
-		    (match_operand:QI 3 "const_0_to_255_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+		    (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
   "TARGET_64BIT
-   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
-  "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && <nf_condition>"
+  "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "length_immediate" "1")
@@ -15929,7 +16044,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_64_shrd_ndd_1"
+(define_insn "x86_64_shrd_ndd_1<nf_name>"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
 			     (match_operand:QI 3 "const_0_to_63_operand")) @@ -15937,15 +16052,66 @@
 		  (ashift:TI
 		    (zero_extend:TI
 		      (match_operand:DI 2 "register_operand" "r"))
-		    (match_operand:QI 4 "const_0_to_255_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+		    (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
   "TARGET_APX_NDD
-   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
-  "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])
+   && <nf_condition>"
+  "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "length_immediate" "1")
    (set_attr "mode" "DI")])
 
+(define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
+			     (match_operand:QI 2 "const_0_to_63_operand"))
+		(ashift:DI
+		  (match_operand:DI 1 "nonimmediate_operand")
+		  (match_operand:QI 3 "const_0_to_63_operand"))))]
+  "TARGET_64BIT && TARGET_APX_NF
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[1], operands[2], operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (DImode, operands[4]);
+      emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[4], operands[3], operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      if (MEM_P (operands[4]))
+        {
+	  operands[1] = force_reg (DImode, operands[1]);
+	  emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+					    operands[2], operands[3]));
+        }
+       else if (MEM_P (operands[1]))
+         emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[1], operands[4],
+					   operands[3], operands[2]));
+       else
+         emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+					   operands[2], operands[3]));
+       emit_move_insn (operands[0], tmp);
+    }
+  else
+   {
+     operands[1] = force_reg (DImode, operands[1]);
+     rtx tmp = gen_reg_rtx (DImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_64_shrd_1_nf (tmp, operands[1], operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_64_shrd_shld_1_nozext"
   [(set (match_operand:DI 0 "nonimmediate_operand") @@ -16048,7 +16214,7 @@
   emit_move_insn (operands[4], operands[0]);
 })
 
-(define_insn "x86_shrd"
+(define_insn "x86_shrd<nf_name>"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (lshiftrt:SI (match_dup 0)
 		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic") @@ -16058,10 +16224,9 @@
 		    (zero_extend:DI
 		      (match_operand:SI 1 "register_operand" "r"))
 		    (minus:QI (const_int 32)
-			      (and:QI (match_dup 2) (const_int 31)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+			      (and:QI (match_dup 2) (const_int 31)))) 0)))]
+  "<nf_condition>"
+  "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "SI")
@@ -16070,7 +16235,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_shrd_ndd"
+(define_insn "x86_shrd_ndd<nf_name>"
   [(set (match_operand:SI 0 "register_operand" "=r")
         (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
 		  (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") @@ -16080,14 +16245,13 @@
 		    (zero_extend:DI
 		      (match_operand:SI 2 "register_operand" "r"))
 		    (minus:QI (const_int 32)
-			      (and:QI (match_dup 3) (const_int 31)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_APX_NDD"
-  "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+			      (and:QI (match_dup 3) (const_int 31)))) 0)))]
+  "TARGET_APX_NDD && <nf_condition>"
+  "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
 
-(define_insn "x86_shrd_1"
+(define_insn "x86_shrd_1<nf_name>"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (lshiftrt:SI (match_dup 0)
 			     (match_operand:QI 2 "const_0_to_31_operand")) @@ -16095,10 +16259,10 @@
 		  (ashift:DI
 		    (zero_extend:DI
 		      (match_operand:SI 1 "register_operand" "r"))
-		    (match_operand:QI 3 "const_0_to_63_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
-  "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+		    (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
+  "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+   && <nf_condition>"
+  "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "length_immediate" "1")
@@ -16108,7 +16272,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_shrd_ndd_1"
+(define_insn "x86_shrd_ndd_1<nf_name>"
   [(set (match_operand:SI 0 "register_operand" "=r")
         (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
 			     (match_operand:QI 3 "const_0_to_31_operand")) @@ -16116,15 +16280,66 @@
 		  (ashift:DI
 		    (zero_extend:DI
 		      (match_operand:SI 2 "register_operand" "r"))
-		    (match_operand:QI 4 "const_0_to_63_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+		    (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
   "TARGET_APX_NDD
-   && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))"
-  "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+   && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))
+   && <nf_condition>"
+  "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "length_immediate" "1")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*x86_shrd_shld_1_nozext_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
+			     (match_operand:QI 2 "const_0_to_31_operand"))
+	       (ashift:SI
+		   (match_operand:SI 1 "nonimmediate_operand")
+		   (match_operand:QI 3 "const_0_to_31_operand"))))]
+  "TARGET_APX_NF &&
+  INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_x86_shrd_1_nf (operands[0], operands[1], operands[2], operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (SImode, operands[4]);
+      emit_insn (gen_x86_shld_1_nf (operands[0], operands[4], operands[3], operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      if (MEM_P (operands[4]))
+        {
+	  operands[1] = force_reg (SImode, operands[1]);
+	  emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+					 operands[2], operands[3]));
+        }
+      else if (MEM_P (operands[1]))
+        emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[1], operands[4],
+				       operands[3], operands[2]));
+      else
+        emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+				       operands[2], operands[3]));
+      emit_move_insn (operands[0], tmp);
+     }
+   else
+   {
+     operands[1] = force_reg (SImode, operands[1]);
+     rtx tmp = gen_reg_rtx (SImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_shrd_1_nf (tmp, operands[1], operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_shrd_shld_1_nozext"
   [(set (match_operand:SI 0 "nonimmediate_operand")
--
2.31.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 7/8] [APX NF] Support APX NF for mul/div
       [not found] ` <20240522073710.2039035-7-lingling.kong@intel.com>
@ 2024-05-22  8:43   ` Kong, Lingling
  0 siblings, 0 replies; 6+ messages in thread
From: Kong, Lingling @ 2024-05-22  8:43 UTC (permalink / raw)
  To: gcc-patches; +Cc: Kong, Lingling, Liu, Hongtao, Uros Bizjak

gcc/ChangeLog:

	* config/i386/i386.md (*mul<mode>3_1_nf): New define_insn.
	(*mulqi3_1_nf): Ditto.
	(*<u>divmod<mode>4_noext_nf): Ditto.
	(<u>divmodhiqi3_nf): Ditto.
---
 gcc/config/i386/i386.md | 47 ++++++++++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4d684e8d919..087761e5b3a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9896,17 +9896,17 @@
 ;;
 ;; On BDVER1, all HI MULs use DoublePath
 
-(define_insn "*mul<mode>3_1"
+(define_insn "*mul<mode>3_1<nf_name>"
   [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
 	(mult:SWIM248
 	  (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
-	  (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+	  (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))
+   && <nf_condition>"
   "@
-   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
-   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
-   imul{<imodesuffix>}\t{%2, %0|%0, %2}"
+   <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   <nf_prefix>imul{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "imul")
    (set_attr "prefix_0f" "0,0,1")
    (set (attr "athlon_decode")
@@ -9967,14 +9967,14 @@
 ;; MUL reg8 	Direct
 ;; MUL mem8 	Direct
 
-(define_insn "*mulqi3_1"
+(define_insn "*mulqi3_1<nf_name>"
   [(set (match_operand:QI 0 "register_operand" "=a")
 	(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
-		 (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
+		 (match_operand:QI 2 "nonimmediate_operand" "qm")))]
   "TARGET_QIMODE_MATH
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "mul{b}\t%2"
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+   && <nf_condition>"
+  "<nf_prefix>mul{b}\t%2"
   [(set_attr "type" "imul")
    (set_attr "length_immediate" "0")
    (set (attr "athlon_decode")
@@ -11117,6 +11117,19 @@
   [(set_attr "type" "multi")
    (set_attr "mode" "SI")])
 
+(define_insn "*<u>divmod<mode>4_noext_nf"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(any_div:SWIM248
+	  (match_operand:SWIM248 2 "register_operand" "0")
+	  (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=d")
+	(<paired_mod>:SWIM248 (match_dup 2) (match_dup 3)))
+   (use (match_operand:SWIM248 4 "register_operand" "1"))]
+  "TARGET_APX_NF"
+  "%{nf%} <sgnprefix>div{<imodesuffix>}\t%3"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<u>divmod<mode>4_noext"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
 	(any_div:SWIM248
@@ -11264,7 +11277,7 @@
 ;; Change div/mod to HImode and extend the second argument to HImode  ;; so that mode of div/mod matches with mode of arguments.  Otherwise  ;; combine may fail.
-(define_insn "<u>divmodhiqi3"
+(define_insn "<u>divmodhiqi3<nf_name>"
   [(set (match_operand:HI 0 "register_operand" "=a")
 	(ior:HI
 	  (ashift:HI
@@ -11276,10 +11289,10 @@
 	    (const_int 8))
 	  (zero_extend:HI
 	    (truncate:QI
-	      (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "<sgnprefix>div{b}\t%2"
+	      (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))]
+  "TARGET_QIMODE_MATH
+   && <nf_condition>"
+  "<nf_prefix><sgnprefix>div{b}\t%2"
   [(set_attr "type" "idiv")
    (set_attr "mode" "QI")])
 
--
2.31.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt
       [not found] ` <20240522073710.2039035-8-lingling.kong@intel.com>
@ 2024-05-22  8:44   ` Kong, Lingling
  0 siblings, 0 replies; 6+ messages in thread
From: Kong, Lingling @ 2024-05-22  8:44 UTC (permalink / raw)
  To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak

gcc/ChangeLog:

	* config/i386/i386.md (clz<mode>2_lzcnt_nf): New define_insn.
	(*clz<mode>2_lzcnt_falsedep_nf): Ditto.
	(<lt_zcnt>_<mode>_nf): Ditto.
	(*<lt_zcnt>_<mode>_falsedep_nf): Ditto.
	(<lt_zcnt>_hi_nf): Ditto.
	(popcount<mode>2_nf): Ditto.
	(*popcount<mode>2_falsedep_nf): Ditto.
	(popcounthi2_nf): Ditto.
---
 gcc/config/i386/i386.md | 124 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 113 insertions(+), 11 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 087761e5b3a..c9a3a99ca70 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -20250,6 +20250,24 @@
   operands[3] = gen_reg_rtx (<MODE>mode);
 })
 
+(define_insn_and_split "clz<mode>2_lzcnt_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(clz:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+  "TARGET_APX_NF && TARGET_LZCNT"
+  "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (clz:SWI48 (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "clz<mode>2_lzcnt"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(clz:SWI48
@@ -20273,6 +20291,18 @@
 ; False dependency happens when destination is only updated by tzcnt,  ; lzcnt or popcnt.  There is no false dependency when destination is  ; also used in source.
+(define_insn "*clz<mode>2_lzcnt_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(clz:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF && TARGET_LZCNT"
+  "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*clz<mode>2_lzcnt_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(clz:SWI48
@@ -20379,6 +20409,25 @@
 ;; Version of lzcnt/tzcnt that is expanded from intrinsics.  This version  ;; provides operand size as output when source operand is zero. 
 
+(define_insn_and_split "<lt_zcnt>_<mode>_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec:SWI48
+	  [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+  "TARGET_APX_NF"
+  "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "type" "<lt_zcnt_type>")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "<lt_zcnt>_<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(unspec:SWI48
@@ -20403,6 +20452,19 @@
 ; False dependency happens when destination is only updated by tzcnt,  ; lzcnt or popcnt.  There is no false dependency when destination is  ; also used in source.
+(define_insn "*<lt_zcnt>_<mode>_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec:SWI48
+	  [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF"
+  "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "<lt_zcnt_type>")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<lt_zcnt>_<mode>_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(unspec:SWI48
@@ -20417,13 +20479,12 @@
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<lt_zcnt>_hi"
+(define_insn "<lt_zcnt>_hi<nf_name>"
   [(set (match_operand:HI 0 "register_operand" "=r")
 	(unspec:HI
-	  [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "<lt_zcnt>{w}\t{%1, %0|%0, %1}"
+	  [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+  "<nf_condition>"
+  "<nf_prefix><lt_zcnt>{w}\t{%1, %0|%0, %1}"
   [(set_attr "type" "<lt_zcnt_type>")
    (set_attr "prefix_0f" "1")
    (set_attr "prefix_rep" "1")
@@ -20841,6 +20902,30 @@
   [(set_attr "type" "bitmanip")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "popcount<mode>2_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(popcount:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+  "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "%{nf%} popcnt\t{%1, %0|%0, %1}"; #else
+  return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; #endif }
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (popcount:SWI48 (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "popcount<mode>2"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(popcount:SWI48
@@ -20870,6 +20955,24 @@
 ; False dependency happens when destination is only updated by tzcnt,  ; lzcnt or popcnt.  There is no false dependency when destination is  ; also used in source.
+(define_insn "*popcount<mode>2_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(popcount:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "%{nf%} popcnt\t{%1, %0|%0, %1}"; #else
+  return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; #endif }
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*popcount<mode>2_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(popcount:SWI48
@@ -21027,17 +21130,16 @@
   DONE;
 })
 
-(define_insn "popcounthi2"
+(define_insn "popcounthi2<nf_name>"
   [(set (match_operand:HI 0 "register_operand" "=r")
 	(popcount:HI
-	  (match_operand:HI 1 "nonimmediate_operand" "rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_POPCNT"
+	  (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+  "TARGET_POPCNT && <nf_condition>"
 {
 #if TARGET_MACHO
-  return "popcnt\t{%1, %0|%0, %1}";
+  return "<nf_prefix>popcnt\t{%1, %0|%0, %1}";
 #else
-  return "popcnt{w}\t{%1, %0|%0, %1}";
+  return "<nf_prefix>popcnt{w}\t{%1, %0|%0, %1}";
 #endif
 }
   [(set_attr "prefix_rep" "1")
--
2.31.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-05-22  8:44 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20240522073710.2039035-1-lingling.kong@intel.com>
     [not found] ` <20240522073710.2039035-3-lingling.kong@intel.com>
2024-05-22  8:41   ` [PATCH v2 3/8] [APX NF] Support APX NF for left shift insns Kong, Lingling
     [not found] ` <20240522073710.2039035-4-lingling.kong@intel.com>
2024-05-22  8:41   ` [PATCH v2 4/8] [APX NF] Support APX NF for right " Kong, Lingling
     [not found] ` <20240522073710.2039035-5-lingling.kong@intel.com>
2024-05-22  8:42   ` [PATCH v2 5/8] [APX NF] Support APX NF for rotate insns Kong, Lingling
     [not found] ` <20240522073710.2039035-6-lingling.kong@intel.com>
2024-05-22  8:43   ` [PATCH v2 6/8] [APX NF] Support APX NF for shld/shrd Kong, Lingling
     [not found] ` <20240522073710.2039035-7-lingling.kong@intel.com>
2024-05-22  8:43   ` [PATCH v2 7/8] [APX NF] Support APX NF for mul/div Kong, Lingling
     [not found] ` <20240522073710.2039035-8-lingling.kong@intel.com>
2024-05-22  8:44   ` [PATCH v2 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt Kong, Lingling

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).