public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 1/8] [APX NF]: Support APX NF add
       [not found] <20240515070226.3760873-1-lingling.kong@intel.com>
@ 2024-05-15  7:43 ` Kong, Lingling
  2024-05-15  8:14   ` Uros Bizjak
  2024-05-15  8:46   ` Uros Bizjak
       [not found] ` <20240515070226.3760873-2-lingling.kong@intel.com>
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 13+ messages in thread
From: Kong, Lingling @ 2024-05-15  7:43 UTC (permalink / raw)
  To: gcc-patches; +Cc: Liu, Hongtao, Uros Bizjak, Kong, Lingling, Wang, Hongyu

From: Hongyu Wang <hongyu.wang@intel.com>

APX NF(no flags) feature implements suppresses the update of status flags for arithmetic operations.

For NF add, it is not clear whether NF add can be faster than lea. If so, the pattern needs to be adjusted to prefer LEA generation.

gcc/ChangeLog:

        * config/i386/i386-opts.h (enum apx_features): Add nf
        enumeration.
        * config/i386/i386.h (TARGET_APX_NF): New.
        * config/i386/i386.md (*add<mode>_1_nf): New define_insn.
        * config/i386/i386.opt: Add apx_nf enumeration.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/apx-ndd.c: Fixed test.
        * gcc.target/i386/apx-nf.c: New test.

Co-authored-by: Lingling Kong <lingling.kong@intel.com>

Bootstrapped and regtested on x86_64-linux-gnu. And Supported SPEC 2017 run normally on Intel software development emulator.
Ok for trunk?

---
 gcc/config/i386/i386-opts.h             |  3 +-
 gcc/config/i386/i386.h                  |  1 +
 gcc/config/i386/i386.md                 | 42 +++++++++++++++++++++++++
 gcc/config/i386/i386.opt                |  3 ++
 gcc/testsuite/gcc.target/i386/apx-ndd.c |  2 +-
 gcc/testsuite/gcc.target/i386/apx-nf.c  |  6 ++++
 6 files changed, 55 insertions(+), 2 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf.c

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h index ef2825803b3..60176ce609f 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -140,7 +140,8 @@ enum apx_features {
   apx_push2pop2 = 1 << 1,
   apx_ndd = 1 << 2,
   apx_ppx = 1 << 3,
-  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
+  apx_nf = 1<< 4,
+  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
 };
 
 #endif
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 529edff93a4..f20ae4726da 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see  #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)  #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)  #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
+#define TARGET_APX_NF (ix86_apx_features & apx_nf)
 
 #include "config/vxworks-dummy.h"
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 764bfe20ff2..4a9e35c4990 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6233,6 +6233,48 @@
     }
 })
 

+;; NF instructions.
+
+(define_insn "*add<mode>_1_nf"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=rm,rje,r,r,r,r,r,r")
+	(plus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
+	  (match_operand:SWI 2 "x86_64_general_operand" 
+"r,e,BM,0,le,r,e,BM")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+			    TARGET_APX_NDD)"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  if (which_alternative == 3)
+      std::swap (operands[1], operands[2]);
+
+  if (operands[2] == const1_rtx)
+    return use_ndd
+	  ? "%{nf%} inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+	  : "%{nf%} inc{<imodesuffix>}\t{%0|%0}";
+
+  if (operands[2] == constm1_rtx)
+    return use_ndd
+	  ? "%{nf%} dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+	  : "%{nf%} dec{<imodesuffix>}\t{%0|%0}";
+
+  return use_ndd
+	 ? "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+	 : "%{nf%} add{<imodesuffix>}\t{%2, %0|%0, %2}"; }
+  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "4")
+              (const_string "lea")
+	   ]
+	   (const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
 ;; Load effective address instructions
 
 (define_insn "*lea<mode>"
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index d5f793a9e8b..66021d59d4e 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1356,6 +1356,9 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)  EnumValue
 Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
 
+EnumValue
+Enum(apx_features) String(nf) Value(apx_nf) Set(6)
+
 EnumValue
 Enum(apx_features) String(all) Value(apx_all) Set(1)
 
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 0eb751ad225..0ff4df0780c 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-mapxf -march=x86-64 -O2" } */
+/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 
+-O2" } */
 /* { dg-final { scan-assembler-not "movl"} } */
 
 #include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
new file mode 100644
index 00000000000..3adc7a27902
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 
+-O2" } */
+/* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */
+
+#include "apx-ndd.c"
+
--
2.31.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/8] [APX NF] Support APX NF for {sub/and/or/xor/neg}
       [not found] ` <20240515070226.3760873-2-lingling.kong@intel.com>
@ 2024-05-15  7:44   ` Kong, Lingling
  0 siblings, 0 replies; 13+ messages in thread
From: Kong, Lingling @ 2024-05-15  7:44 UTC (permalink / raw)
  To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak

gcc/ChangeLog:

	* config/i386/i386.md (*sub<mode>_1_nf): New define_insn.
	(*anddi_1_nf): Ditto.
	(*and<mode>_1_nf): Ditto.
	(*<code>qi_1_nf): Ditto.
	(*<code><mode>_1_nf): Ditto.
	(*neg<mode>_1_nf): Ditto.
	* config/i386/sse.md : New define_split.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-nf.c: Add test.
---
 gcc/config/i386/i386.md                | 129 +++++++++++++++++++++++++
 gcc/config/i386/sse.md                 |  11 +++
 gcc/testsuite/gcc.target/i386/apx-nf.c |   9 ++
 3 files changed, 149 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4a9e35c4990..66dc5e1035f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -7888,6 +7888,24 @@
   "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
 
+(define_insn "*sub<mode>_1_nf"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,rjM,<r>,r,r,r")
+	(minus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "0,0,0,rm,rjM,r")
+	  (match_operand:SWI 2 "<general_operand>" "<r>,<i>,<m>,r,<i>,<m>")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  %{nf%} sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  %{nf%} sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  %{nf%} sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  %{nf%} sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  %{nf%} sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  %{nf%} sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*sub<mode>_1"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r,r")
 	(minus:SWI
@@ -11790,6 +11808,27 @@
 }
 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
 
+(define_insn "*anddi_1_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,rjM,r,r,r,r,?k")
+	(and:DI
+	 (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,0,rm,rjM,r,k")
+	 (match_operand:DI 2 "x86_64_szext_general_operand" 
+"Z,Z,r,e,m,r,e,m,k")))]
+  "TARGET_APX_NF
+   && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} and{l}\t{%k2, %k0|%k0, %k2}
+   %{nf%} and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
+   %{nf%} and{q}\t{%2, %0|%0, %2}
+   %{nf%} and{q}\t{%2, %0|%0, %2}
+   %{nf%} and{q}\t{%2, %0|%0, %2}
+   %{nf%} and{q}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} and{q}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} and{q}\t{%2, %1, %0|%0, %1, %2}
+   #"
+  [(set_attr "isa" "*,apx_ndd,*,*,*,apx_ndd,apx_ndd,apx_ndd,avx512bw")
+   (set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,msklog")
+   (set_attr "mode" "SI,SI,DI,DI,DI,DI,DI,DI,DI")])
+
 (define_insn "*anddi_1"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,r,?k")
 	(and:DI
@@ -11889,6 +11928,33 @@
    (set_attr "isa" "*,apx_ndd,apx_ndd,apx_ndd")
    (set_attr "mode" "SI")])
 
+(define_insn "*and<mode>_1_nf"
+  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,rjM,r,r,r,r,?k")
+	(and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,0,rm,rjM,r,k")
+		   (match_operand:SWI24 2 "<general_operand>" 
+"r,<i>,<m>,r,<i>,<m>,k")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} and{<imodesuffix>}\t{%2, %0|%0, %2}
+   %{nf%} and{<imodesuffix>}\t{%2, %0|%0, %2}
+   %{nf%} and{<imodesuffix>}\t{%2, %0|%0, %2}
+   %{nf%} and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   #"
+  [(set (attr "isa")
+	(cond [(eq_attr "alternative" "3,4,5")
+		 (const_string "apx_ndd")
+	       (eq_attr "alternative" "6")
+		 (if_then_else (eq_attr "mode" "SI")
+		   (const_string "avx512bw")
+		   (const_string "avx512f"))
+	      ]
+	      (const_string "*")))
+   (set_attr "type" "alu,alu,alu,alu,alu,alu,msklog")
+   (set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*and<mode>_1"
   [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,r,r,r,Ya,?k")
 	(and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,rm,rjM,r,qm,k") @@ -11923,6 +11989,37 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")])
 
+;; NF for and,or,xor
+
+(define_insn "*<code>qi_1_nf"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
+	(any_logic:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
+		   (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (<CODE>, QImode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} <logic>{b}\t{%2, %0|%0, %2}
+   %{nf%} <logic>{b}\t{%2, %0|%0, %2}
+   %{nf%} <logic>{l}\t{%k2, %k0|%k0, %k2}
+   %{nf%} <logic>{b}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} <logic>{b}\t{%2, %1, %0|%0, %1, %2}
+   #"
+  [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
+   (set_attr "type" "alu,alu,alu,alu,alu,msklog")
+   (set (attr "mode")
+	(cond [(eq_attr "alternative" "2")
+		 (const_string "SI")
+		(and (eq_attr "alternative" "5")
+		     (match_test "!TARGET_AVX512DQ"))
+		 (const_string "HI")
+	       ]
+	       (const_string "QI")))
+   ;; Potential partial reg stall on alternative 2.
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "2")
+	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+	   (symbol_ref "true")))])
+
 (define_insn "*andqi_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
 	(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k") @@ -12797,6 +12894,26 @@  }  [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
 
+;; or xor
+(define_insn "*<code><mode>_1_nf"
+  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,rjM,r,r,r,r,?k")
+	(any_or:SWI248
+	  (match_operand:SWI248 1 "nonimmediate_operand" "0,0,0,rm,rjM,r,k")
+	  (match_operand:SWI248 2 "<general_operand>" 
+"r,<i>,<m>,r,<i>,<m>,k")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  %{nf%} <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+  %{nf%} <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+  %{nf%} <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+  %{nf%} <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  %{nf%} <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  %{nf%} <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  #"
+  [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd,<kmov_isa>")
+   (set_attr "type" "alu,alu, alu, alu, alu, alu, msklog")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<code><mode>_1"
   [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,r,?k")
 	(any_or:SWI248
@@ -13529,6 +13646,18 @@
 			      (const_int 0)))
      (clobber (reg:CC FLAGS_REG))])])
 
+(define_insn "*neg<mode>_1_nf"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
+	(neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")))]
+  "TARGET_APX_NF &&
+   ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  %{nf%} neg{<imodesuffix>}\t%0
+  %{nf%} neg{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "negnot")
+   (set_attr "isa" "*,apx_ndd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*neg<mode>_1"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
 	(neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f57f36ae380..72d4556f47d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2005,6 +2005,17 @@
 	   ]
 	   (const_string "<MODE>")))])
 
+(define_split
+  [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
+	(any_logic:SWI1248_AVX512BW
+	  (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
+	  (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))]
+  "TARGET_AVX512F && reload_completed"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_logic:SWI1248_AVX512BW (match_dup 1) (match_dup 2)))
+      (unspec [(const_int 0)] UNSPEC_MASKOP)])])
+
 (define_split
   [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
 	(any_logic:SWI1248_AVX512BW
diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
index 3adc7a27902..608dbf8f5f7 100644
--- a/gcc/testsuite/gcc.target/i386/apx-nf.c
+++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
@@ -1,6 +1,15 @@
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 -O2" } */
 /* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */
+/* { dg-final { scan-assembler-times "\{nf\} and" 1 } } */
+/* { dg-final { scan-assembler-times "\{nf\} or" 1 } } */
 
 #include "apx-ndd.c"
 
+struct B { unsigned bit0 : 1; unsigned bit1 : 1; };
+
+void
+foo (struct B *b)
+{
+    b->bit0 = b->bit0 | b->bit1;
+}
--
2.31.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 3/8] [APX NF] Support APX NF for left shift insns
       [not found] ` <20240515070226.3760873-3-lingling.kong@intel.com>
@ 2024-05-15  7:44   ` Kong, Lingling
  0 siblings, 0 replies; 13+ messages in thread
From: Kong, Lingling @ 2024-05-15  7:44 UTC (permalink / raw)
  To: gcc-patches; +Cc: Liu, Hongtao, Uros Bizjak, Kong, Lingling

gcc/ChangeLog:

	* config/i386/i386.md (*ashl<mode>3_1_nf): New.
	(*ashlhi3_1_nf): Ditto.
	(*ashlqi3_1_nf): Ditto.
	* config/i386/sse.md: New define_split.
---
 gcc/config/i386/i386.md | 175 ++++++++++++++++++++++++++++++++++++++++
 gcc/config/i386/sse.md  |  13 +++
 2 files changed, 188 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 66dc5e1035f..9ffdb3fe71a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15126,6 +15126,54 @@
   [(set_attr "type" "ishiftx")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*ashl<mode>3_1_nf"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm")
+		      (match_operand:QI 2 "nonmemory_operand" 
+"c<S>,M,r,<KS>,c<S>")))]
+  "TARGET_APX_NF &&
+  ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+    case TYPE_ISHIFTX:
+    case TYPE_MSKLOG:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      return "%{nf%} add{<imodesuffix>}\t%0, %0";
+
+    default:
+      return use_ndd ? "%{nf%} sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		     : "%{nf%} sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
+	    (eq_attr "alternative" "4")
+	      (const_string "ishift")
+            (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		      (match_operand 0 "register_operand"))
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	    (eq_attr "alternative" "3")
+	      (const_string "msklog")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (eq_attr "type" "alu")
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ashl<mode>3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
 	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm") @@ -15187,6 +15235,17 @@
    (set_attr "mode" "<MODE>")])
 
 ;; Convert shift to the shiftx pattern to avoid flags dependency.
+;; For NF/NDD doesn't support shift count as r, it just support c<S>, 
+;; but it has no flag.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+		      (match_operand:QI 2 "register_operand")))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(ashift:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
 	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") @@ -15273,6 +15332,50 @@
 	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
+(define_insn "*ashlhi3_1_nf"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
+	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+    case TYPE_MSKLOG:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "%{nf%} add{w}\t%0, %0";
+
+    default:
+      return use_ndd ? "%{nf%} sal{w}\t{%2, %1, %0|%0, %1, %2}"
+		     : "%{nf%} sal{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "isa" "*,*,avx512f,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "msklog")
+	    (eq_attr "alternative" "3")
+	      (const_string "ishift")
+            (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		      (match_operand 0 "register_operand"))
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (eq_attr "type" "alu")
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "HI,SI,HI,HI")])
+
 (define_insn "*ashlhi3_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
 	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm") @@ -15326,6 +15429,61 @@
        (const_string "*")))
    (set_attr "mode" "HI,SI,HI,HI")])
 
+(define_insn "*ashlqi3_1_nf"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
+	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+    case TYPE_MSKLOG:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1])))
+        return "%{nf%} add{l}\t%k0, %k0";
+      else
+        return "%{nf%} add{b}\t%0, %0";
+
+    default:
+      if (get_attr_mode (insn) == MODE_SI)
+	return "%{nf%} sal{l}\t{%2, %k0|%k0, %2}";
+      else
+	return use_ndd ? "%{nf%} sal{b}\t{%2, %1, %0|%0, %1, %2}"
+		       : "%{nf%} sal{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "isa" "*,*,*,avx512dq,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "lea")
+	    (eq_attr "alternative" "3")
+	      (const_string "msklog")
+	    (eq_attr "alternative" "4")
+	      (const_string "ishift")
+            (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		      (match_operand 0 "register_operand"))
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (eq_attr "type" "alu")
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI,SI,SI,QI,QI")
+   ;; Potential partial reg stall on alternative 1.
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "1")
+	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+	   (symbol_ref "true")))])
+
 (define_insn "*ashlqi3_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
 	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm") @@ -15448,6 +15606,23 @@
    (set_attr "mode" "<MODE>")])
 
 ;; Convert ashift to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI 0 "general_reg_operand")
+	(ashift:SWI (match_operand:SWI 1 "index_reg_operand")
+		    (match_operand 2 "const_0_to_3_operand")))]
+  "reload_completed
+   && REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_dup 0)
+	(mult:<LEAMODE> (match_dup 1) (match_dup 2)))] {
+  if (<MODE>mode != <LEAMODE>mode)
+    {
+      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
+    }
+  operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+})
+
 (define_split
   [(set (match_operand:SWI 0 "general_reg_operand")
 	(ashift:SWI (match_operand:SWI 1 "index_reg_operand") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 72d4556f47d..498ca5e4d1b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2169,6 +2169,19 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])
 
+(define_split
+  [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
+	(any_lshift:SWI1248_AVX512BW
+	  (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
+	  (match_operand 2 "const_int_operand")))]
+  "TARGET_AVX512F && reload_completed"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_lshift:SWI1248_AVX512BW
+	     (match_dup 1)
+	     (match_dup 2)))
+      (unspec [(const_int 0)] UNSPEC_MASKOP)])])
+
 (define_split
   [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
 	(any_lshift:SWI1248_AVX512BW
--
2.31.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 4/8] [APX NF] Support APX NF for right shift insns
       [not found] ` <20240515070226.3760873-4-lingling.kong@intel.com>
@ 2024-05-15  7:45   ` Kong, Lingling
  0 siblings, 0 replies; 13+ messages in thread
From: Kong, Lingling @ 2024-05-15  7:45 UTC (permalink / raw)
  To: gcc-patches; +Cc: Liu, Hongtao, Uros Bizjak, Kong, Lingling

gcc/ChangeLog:

	* config/i386/i386.md (*ashr<mode>3_1_nf): New.
	(*lshr<mode>3_1_nf): Ditto.
	(*lshrqi3_1_nf): Ditto.
	(*lshrhi3_1_nf): Ditto.
---
 gcc/config/i386/i386.md | 85 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9ffdb3fe71a..adcb09fcdd0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16569,6 +16569,21 @@
   [(set_attr "type" "ishiftx")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*ashr<mode>3_1_nf"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
+	(ashiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} sar{<imodesuffix>}\t{%2, %0|%0, %2}
+   #
+   %{nf%} sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,bmi2,apx_ndd")
+   (set_attr "type" "ishift,ishiftx,ishift")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ashr<mode>3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
 	(ashiftrt:SWI48
@@ -16630,6 +16645,21 @@
 }
 [(set_attr "isa" "*,*,*,apx_ndd")])
 
+(define_insn "*lshr<mode>3_1_nf"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r")
+	(lshiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))]
+  "TARGET_APX_NF &&
+  ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} shr{<imodesuffix>}\t{%2, %0|%0, %2}
+   #
+   #
+   %{nf%} shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,bmi2,avx512bw,apx_ndd")
+   (set_attr "type" "ishift,ishiftx,msklog,ishift")
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*lshr<mode>3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r") @@ -16669,6 +16699,17 @@
    (set_attr "mode" "<MODE>")])
 
 ;; Convert shift to the shiftx pattern to avoid flags dependency.
+;; For NF/NDD doesn't support shift count as r, it just support c<S>, 
+;; but it has no flag.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+			   (match_operand:QI 2 "register_operand")))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
 	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") @@ -16737,6 +16778,20 @@
 	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
+(define_insn "*ashr<mode>3_1_nf"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r")
+	(ashiftrt:SWI12
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0, rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} sar{<imodesuffix>}\t{%2, %0|%0, %2}
+   %{nf%} sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*, apx_ndd")
+   (set_attr "type" "ishift")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ashr<mode>3_1"
   [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r")
 	(ashiftrt:SWI12
@@ -16765,6 +16820,21 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*lshrqi3_1_nf"
+  [(set (match_operand:QI 0 "nonimmediate_operand"  "=qm,?k,r")
+	(lshiftrt:QI
+	  (match_operand:QI 1 "nonimmediate_operand" "0,k,rm")
+	  (match_operand:QI 2 "nonmemory_operand"    "cI,Wb,cI")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} shr{b}\t{%2, %0|%0, %2}
+   #
+   %{nf%} shr{b}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,avx512dq,apx_ndd")
+   (set_attr "type" "ishift,msklog,ishift")
+   (set_attr "mode" "QI")])
+
 (define_insn "*lshrqi3_1"
   [(set (match_operand:QI 0 "nonimmediate_operand"  "=qm,?k,r")
 	(lshiftrt:QI
@@ -16802,6 +16872,21 @@
        (const_string "*")))
    (set_attr "mode" "QI")])
 
+(define_insn "*lshrhi3_1_nf"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,?k,r")
+	(lshiftrt:HI
+	  (match_operand:HI 1 "nonimmediate_operand" "0,k,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "cI,Ww,cI")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} shr{w}\t{%2, %0|%0, %2}
+   #
+   %{nf%} shr{w}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*, avx512f, apx_ndd")
+   (set_attr "type" "ishift,msklog,ishift")
+   (set_attr "mode" "HI")])
+
 (define_insn "*lshrhi3_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r")
 	(lshiftrt:HI
--
2.31.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 5/8] [APX NF] Support APX NF for rotate insns
       [not found] ` <20240515070226.3760873-5-lingling.kong@intel.com>
@ 2024-05-15  7:45   ` Kong, Lingling
  0 siblings, 0 replies; 13+ messages in thread
From: Kong, Lingling @ 2024-05-15  7:45 UTC (permalink / raw)
  To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak

gcc/ChangeLog:

	* config/i386/i386.md (ashr<mode>3_cvt_nf): New define_insn.
	(*<insn><mode>3_1_nf): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-nf.c: Add NF test for rotate insns.
---
 gcc/config/i386/i386.md                | 80 ++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/apx-nf.c |  5 ++
 2 files changed, 85 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index adcb09fcdd0..ff44154b26b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16491,6 +16491,25 @@
 (define_mode_attr cvt_mnemonic
   [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
 
+(define_insn "ashr<mode>3_cvt_nf"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(ashiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "const_int_operand")))]
+  "TARGET_APX_NF &&
+   INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} sar{<imodesuffix>}\t{%2, %0|%0, %2}
+   %{nf%} sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "ishift")
+   (set_attr "prefix_0f" "*")
+   (set_attr "length_immediate" "*")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "ashr<mode>3_cvt"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r")
 	(ashiftrt:SWI48
@@ -17430,6 +17449,39 @@
   [(set_attr "type" "rotatex")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*<insn><mode>3_1_nf"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))]
+  "TARGET_APX_NF &&
+  ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
+    return "%{nf%} <rotate>{<imodesuffix>}\t%0";
+  else
+    return use_ndd ? "%{nf%} <rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		   : "%{nf%} <rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; }
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "rotate")
+   (set (attr "preferred_for_size")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "true")]
+	   (symbol_ref "false")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand")
+		 (ior (match_test "TARGET_SHIFT1")
+		      (match_test "optimize_function_for_size_p (cfun)"))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<insn><mode>3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
 	(any_rotate:SWI48
@@ -17572,6 +17624,34 @@
   [(set (match_dup 0)
 	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
 
+(define_insn "*<insn><mode>3_1_nf"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r")
+	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm")
+			  (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))]
+  "TARGET_APX_NF &&
+  ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
+    return "%{nf%} <rotate>{<imodesuffix>}\t%0";
+  else
+    return use_ndd
+	   ? "%{nf%} <rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+	   : "%{nf%} <rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; }
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "rotate")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand")
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<insn><mode>3_1"
   [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r")
 	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm") diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
index 608dbf8f5f7..6e59803be64 100644
--- a/gcc/testsuite/gcc.target/i386/apx-nf.c
+++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
@@ -3,6 +3,7 @@
 /* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */
 /* { dg-final { scan-assembler-times "\{nf\} and" 1 } } */
 /* { dg-final { scan-assembler-times "\{nf\} or" 1 } } */
+/* { dg-final { scan-assembler-times "\{nf\} rol" 4 } } */
 
 #include "apx-ndd.c"
 
@@ -13,3 +14,7 @@ foo (struct B *b)
 {
     b->bit0 = b->bit0 | b->bit1;
 }
+long int f1 (int x) { return ~(1ULL << (x & 0x3f)); } long int f2 (int 
+x) { return ~(1ULL << x); } long int f3 (unsigned char *x) { return 
+~(1ULL << (x[0] & 0x3f)); } long int f4 (unsigned char *x) { return 
+~(1ULL << x[0]); }
--
2.31.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 6/8] [APX NF] Support APX NF for shld/shrd
       [not found] ` <20240515070226.3760873-6-lingling.kong@intel.com>
@ 2024-05-15  7:46   ` Kong, Lingling
  0 siblings, 0 replies; 13+ messages in thread
From: Kong, Lingling @ 2024-05-15  7:46 UTC (permalink / raw)
  To: gcc-patches; +Cc: Kong, Lingling, Liu, Hongtao, Uros Bizjak

gcc/ChangeLog:

	* config/i386/i386.md (x86_64_shld_nf): New define_insn.
	(x86_64_shld_ndd_nf): Ditto.
	(x86_64_shld_1_nf): Ditto.
	(x86_64_shld_ndd_1_nf): Ditto.
	(*x86_64_shld_shrd_1_nozext_nf): Ditto.
	(x86_shld_nf): Ditto.
	(x86_shld_ndd_nf): Ditto.
	(x86_shld_1_nf): Ditto.
	(x86_shld_ndd_1_nf): Ditto.
	(*x86_shld_shrd_1_nozext_nf): Ditto.
	(<insn><dwi>3_doubleword_lowpart_nf): Ditto.
	(x86_64_shrd_nf): Ditto.
	(x86_64_shrd_ndd_nf): Ditto.
	(x86_64_shrd_1_nf): Ditto.
	(x86_64_shrd_ndd_1_nf): Ditto.
	(*x86_64_shrd_shld_1_nozext_nf): Ditto.
	(x86_shrd_nf): Ditto.
	(x86_shrd_ndd_nf): Ditto.
	(x86_shrd_1_nf): Ditto.
	(x86_shrd_ndd_1_nf): Ditto.
	(*x86_shrd_shld_1_nozext_nf): Ditto.
---
 gcc/config/i386/i386.md | 518 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 518 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ff44154b26b..f9a62fba0c4 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14666,6 +14666,26 @@
   DONE;
 })
 
+(define_insn "x86_64_shld_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (ashift:DI (match_dup 0)
+		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
+			  (const_int 63)))
+		(subreg:DI
+		  (lshiftrt:TI
+		    (zero_extend:TI
+		      (match_operand:DI 1 "register_operand" "r"))
+		    (minus:QI (const_int 64)
+			      (and:QI (match_dup 2) (const_int 63)))) 0)))]
+  "TARGET_APX_NF"
+  "%{nf%} shld{q}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
 (define_insn "x86_64_shld"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashift:DI (match_dup 0) @@ -14687,6 +14707,22 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_64_shld_ndd_nf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+		  (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
+			  (const_int 63)))
+		(subreg:DI
+		  (lshiftrt:TI
+		    (zero_extend:TI
+		      (match_operand:DI 2 "register_operand" "r"))
+		    (minus:QI (const_int 64)
+			      (and:QI (match_dup 3) (const_int 63)))) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF"
+  "%{nf%} shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
 (define_insn "x86_64_shld_ndd"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm") @@ -14704,6 +14740,43 @@
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
 
+(define_insn "x86_64_shld_1_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (ashift:DI (match_dup 0)
+			   (match_operand:QI 2 "const_0_to_63_operand"))
+		(subreg:DI
+		  (lshiftrt:TI
+		    (zero_extend:TI
+		      (match_operand:DI 1 "register_operand" "r"))
+		    (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
+  "TARGET_64BIT
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && TARGET_APX_NF"
+  "%{nf%} shld{q}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "length_immediate" "1")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_64_shld_ndd_1_nf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+			   (match_operand:QI 3 "const_0_to_63_operand"))
+		(subreg:DI
+		  (lshiftrt:TI
+		    (zero_extend:TI
+		      (match_operand:DI 2 "register_operand" "r"))
+		    (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF
+   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
+  "%{nf%} shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")
+   (set_attr "length_immediate" "1")])
+
 (define_insn "x86_64_shld_1"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashift:DI (match_dup 0) @@ -14742,6 +14815,58 @@
    (set_attr "mode" "DI")
    (set_attr "length_immediate" "1")])
 
+(define_insn_and_split "*x86_64_shld_shrd_1_nozext_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
+			     (match_operand:QI 2 "const_0_to_63_operand"))
+		(lshiftrt:DI
+		  (match_operand:DI 1 "nonimmediate_operand")
+		  (match_operand:QI 3 "const_0_to_63_operand"))))]
+  "TARGET_64BIT && TARGET_APX_NF
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[1], operands[2], operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (DImode, operands[4]);
+      emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[4], operands[3], operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+     rtx tmp = gen_reg_rtx (DImode);
+     if (MEM_P (operands[4]))
+       {
+	 operands[1] = force_reg (DImode, operands[1]);
+	 emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
+					   operands[2], operands[3]));
+       }
+     else if (MEM_P (operands[1]))
+       emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[1], operands[4],
+					 operands[3], operands[2]));
+     else
+       emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
+					 operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+    }
+  else
+   {
+     operands[1] = force_reg (DImode, operands[1]);
+     rtx tmp = gen_reg_rtx (DImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_64_shld_1_nf (tmp, operands[1], operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
+
 
 (define_insn_and_split "*x86_64_shld_shrd_1_nozext"
   [(set (match_operand:DI 0 "nonimmediate_operand") @@ -14844,6 +14969,81 @@
   emit_move_insn (operands[4], operands[0]);
 })
 
+(define_insn "x86_shld_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (ashift:SI (match_dup 0)
+		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
+			  (const_int 31)))
+		(subreg:SI
+		  (lshiftrt:DI
+		    (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		    (minus:QI (const_int 32)
+			      (and:QI (match_dup 2) (const_int 31)))) 0)))]
+  "TARGET_APX_NF"
+  "%{nf%} shld{l}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_shld_ndd_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		  (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
+			  (const_int 31)))
+		(subreg:SI
+		  (lshiftrt:DI
+		    (zero_extend:DI
+		      (match_operand:SI 2 "register_operand" "r"))
+		    (minus:QI (const_int 32)
+			      (and:QI (match_dup 3) (const_int 31)))) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF"
+  "%{nf%} shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+
+(define_insn "x86_shld_1_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (ashift:SI (match_dup 0)
+			   (match_operand:QI 2 "const_0_to_31_operand"))
+		(subreg:SI
+		  (lshiftrt:DI
+		    (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		    (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
+  "TARGET_APX_NF
+   && INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
+  "%{nf%} shld{l}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_shld_ndd_1_nf"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+			   (match_operand:QI 3 "const_0_to_31_operand"))
+		(subreg:SI
+		  (lshiftrt:DI
+		    (zero_extend:DI
+		      (match_operand:SI 2 "register_operand" "r"))
+		    (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF
+   && INTVAL (operands[4]) == 32 - INTVAL (operands[3])"
+  "%{nf%} shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "SI")])
+
 (define_insn "x86_shld"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (ashift:SI (match_dup 0) @@ -14922,6 +15122,57 @@
    (set_attr "length_immediate" "1")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*x86_shld_shrd_1_nozext_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
+			     (match_operand:QI 2 "const_0_to_31_operand"))
+	       (lshiftrt:SI
+		   (match_operand:SI 1 "nonimmediate_operand")
+		   (match_operand:QI 3 "const_0_to_31_operand"))))]
+  "TARGET_APX_NF &&
+  INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_x86_shld_1_nf (operands[0], operands[1], operands[2], operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (SImode, operands[4]);
+      emit_insn (gen_x86_shrd_1_nf (operands[0], operands[4], operands[3], operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+     rtx tmp = gen_reg_rtx (SImode);
+     if (MEM_P (operands[4]))
+       {
+	 operands[1] = force_reg (SImode, operands[1]);
+	 emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
+					operands[2], operands[3]));
+       }
+     else if (MEM_P (operands[1]))
+       emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[1], operands[4],
+				      operands[3], operands[2]));
+     else
+       emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
+				      operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+    }
+ else
+   {
+     operands[1] = force_reg (SImode, operands[1]);
+     rtx tmp = gen_reg_rtx (SImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_shld_1_nf (tmp, operands[1], operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_shld_shrd_1_nozext"
   [(set (match_operand:SI 0 "nonimmediate_operand") @@ -16107,6 +16358,26 @@
 })
 
 ;; Split truncations of double word right shifts into x86_shrd_1.
+(define_insn_and_split "<insn><dwi>3_doubleword_lowpart_nf"
+  [(set (match_operand:DWIH 0 "register_operand" "=&r")
+	(subreg:DWIH
+	  (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
+			     (match_operand:QI 2 "const_int_operand")) 0))]
+  "TARGET_APX_NF && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2))
+		  (subreg:DWIH
+		    (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
+				  (match_dup 4)) 0)))]
+{
+  split_double_mode (<DWI>mode, &operands[1], 1, &operands[1], 
+&operands[3]);
+  operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL 
+(operands[2]));
+  if (!rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+})
+
 (define_insn_and_split "<insn><dwi>3_doubleword_lowpart"
   [(set (match_operand:DWIH 0 "register_operand" "=&r")
 	(subreg:DWIH
@@ -16130,6 +16401,26 @@
     emit_move_insn (operands[0], operands[1]);
 })
 
+(define_insn "x86_64_shrd_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (lshiftrt:DI (match_dup 0)
+		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
+			  (const_int 63)))
+		(subreg:DI
+		  (ashift:TI
+		    (zero_extend:TI
+		      (match_operand:DI 1 "register_operand" "r"))
+		    (minus:QI (const_int 64)
+			      (and:QI (match_dup 2) (const_int 63)))) 0)))]
+  "TARGET_APX_NF"
+  "%{nf%} shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
 (define_insn "x86_64_shrd"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (lshiftrt:DI (match_dup 0) @@ -16151,6 +16442,22 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_64_shrd_ndd_nf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+		  (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
+			  (const_int 63)))
+		(subreg:DI
+		  (ashift:TI
+		    (zero_extend:TI
+		      (match_operand:DI 2 "register_operand" "r"))
+		    (minus:QI (const_int 64)
+			      (and:QI (match_dup 3) (const_int 63)))) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF"
+  "%{nf%} shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
 (define_insn "x86_64_shrd_ndd"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm") @@ -16168,6 +16475,25 @@
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
 
+(define_insn "x86_64_shrd_1_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (lshiftrt:DI (match_dup 0)
+			     (match_operand:QI 2 "const_0_to_63_operand"))
+		(subreg:DI
+		  (ashift:TI
+		    (zero_extend:TI
+		      (match_operand:DI 1 "register_operand" "r"))
+		    (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
+  "TARGET_APX_NF
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
+  "%{nf%} shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
 
 (define_insn "x86_64_shrd_1"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") @@ -16190,6 +16516,22 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_64_shrd_ndd_1_nf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+			     (match_operand:QI 3 "const_0_to_63_operand"))
+		(subreg:DI
+		  (ashift:TI
+		    (zero_extend:TI
+		      (match_operand:DI 2 "register_operand" "r"))
+		    (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF
+   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
+  "%{nf%} shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")])
+
 (define_insn "x86_64_shrd_ndd_1"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm") @@ -16207,6 +16549,57 @@
    (set_attr "length_immediate" "1")
    (set_attr "mode" "DI")])
 
+(define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
+			     (match_operand:QI 2 "const_0_to_63_operand"))
+		(ashift:DI
+		  (match_operand:DI 1 "nonimmediate_operand")
+		  (match_operand:QI 3 "const_0_to_63_operand"))))]
+  "TARGET_64BIT && TARGET_APX_NF
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[1], operands[2], operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (DImode, operands[4]);
+      emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[4], operands[3], operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      if (MEM_P (operands[4]))
+        {
+	  operands[1] = force_reg (DImode, operands[1]);
+	  emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+					    operands[2], operands[3]));
+        }
+       else if (MEM_P (operands[1]))
+         emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[1], operands[4],
+					   operands[3], operands[2]));
+       else
+         emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+					   operands[2], operands[3]));
+       emit_move_insn (operands[0], tmp);
+    }
+  else
+   {
+     operands[1] = force_reg (DImode, operands[1]);
+     rtx tmp = gen_reg_rtx (DImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_64_shrd_1_nf (tmp, operands[1], operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_64_shrd_shld_1_nozext"
   [(set (match_operand:DI 0 "nonimmediate_operand") @@ -16309,6 +16702,27 @@
   emit_move_insn (operands[4], operands[0]);
 })
 
+(define_insn "x86_shrd_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (lshiftrt:SI (match_dup 0)
+		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
+			  (const_int 31)))
+		(subreg:SI
+		  (ashift:DI
+		    (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		    (minus:QI (const_int 32)
+			      (and:QI (match_dup 2) (const_int 31)))) 0)))]
+  "TARGET_APX_NF"
+  "%{nf%} shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
 (define_insn "x86_shrd"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (lshiftrt:SI (match_dup 0) @@ -16331,6 +16745,22 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_shrd_ndd_nf"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		  (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
+			  (const_int 31)))
+		(subreg:SI
+		  (ashift:DI
+		    (zero_extend:DI
+		      (match_operand:SI 2 "register_operand" "r"))
+		    (minus:QI (const_int 32)
+			      (and:QI (match_dup 3) (const_int 31)))) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF"
+  "%{nf%} shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
 (define_insn "x86_shrd_ndd"
   [(set (match_operand:SI 0 "register_operand" "=r")
         (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") @@ -16348,6 +16778,27 @@
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
 
+(define_insn "x86_shrd_1_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (lshiftrt:SI (match_dup 0)
+			     (match_operand:QI 2 "const_0_to_31_operand"))
+		(subreg:SI
+		  (ashift:DI
+		    (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		    (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
+  "TARGET_APX_NF
+  && INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
+  "%{nf%} shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
 (define_insn "x86_shrd_1"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (lshiftrt:SI (match_dup 0) @@ -16369,6 +16820,22 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_shrd_ndd_1_nf"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+			     (match_operand:QI 3 "const_0_to_31_operand"))
+		(subreg:SI
+		  (ashift:DI
+		    (zero_extend:DI
+		      (match_operand:SI 2 "register_operand" "r"))
+		    (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF
+   && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))"
+  "%{nf%} shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "SI")])
+
 (define_insn "x86_shrd_ndd_1"
   [(set (match_operand:SI 0 "register_operand" "=r")
         (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") @@ -16386,6 +16853,57 @@
    (set_attr "length_immediate" "1")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*x86_shrd_shld_1_nozext_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
+			     (match_operand:QI 2 "const_0_to_31_operand"))
+	       (ashift:SI
+		   (match_operand:SI 1 "nonimmediate_operand")
+		   (match_operand:QI 3 "const_0_to_31_operand"))))]
+  "TARGET_APX_NF &&
+  INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_x86_shrd_1_nf (operands[0], operands[1], operands[2], operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (SImode, operands[4]);
+      emit_insn (gen_x86_shld_1_nf (operands[0], operands[4], operands[3], operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      if (MEM_P (operands[4]))
+        {
+	  operands[1] = force_reg (SImode, operands[1]);
+	  emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+					 operands[2], operands[3]));
+        }
+      else if (MEM_P (operands[1]))
+        emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[1], operands[4],
+				       operands[3], operands[2]));
+      else
+        emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+				       operands[2], operands[3]));
+      emit_move_insn (operands[0], tmp);
+     }
+   else
+   {
+     operands[1] = force_reg (SImode, operands[1]);
+     rtx tmp = gen_reg_rtx (SImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_shrd_1_nf (tmp, operands[1], operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_shrd_shld_1_nozext"
   [(set (match_operand:SI 0 "nonimmediate_operand")
--
2.31.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 7/8] [APX NF] Support APX NF for mul/div
       [not found] ` <20240515070226.3760873-7-lingling.kong@intel.com>
@ 2024-05-15  7:46   ` Kong, Lingling
  0 siblings, 0 replies; 13+ messages in thread
From: Kong, Lingling @ 2024-05-15  7:46 UTC (permalink / raw)
  To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak

gcc/ChangeLog:

	* config/i386/i386.md (*mul<mode>3_1_nf): New define_insn.
	(*mulqi3_1_nf): Ditto.
	(*<u>divmod<mode>4_noext_nf): Ditto.
	(<u>divmodhiqi3_nf): Ditto.
---
 gcc/config/i386/i386.md | 86 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f9a62fba0c4..55f65a31b16 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9907,6 +9907,42 @@
 ;;
 ;; On BDVER1, all HI MULs use DoublePath
 
+(define_insn "*mul<mode>3_1_nf"
+  [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
+	(mult:SWIM248
+	  (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
+	  (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))]
+  "TARGET_APX_NF &&
+  !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   %{nf%} imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} imul{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1")
+		  (const_string "vector")
+	       (and (eq_attr "alternative" "2")
+	       	    (ior (match_test "<MODE>mode == HImode")
+		         (match_operand 1 "memory_operand")))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+	      	    (ior (match_test "<MODE>mode == HImode")
+		         (match_operand 1 "memory_operand")))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "bdver1_decode")
+   	(if_then_else
+	  (match_test "<MODE>mode == HImode")
+	    (const_string "double")
+	    (const_string "direct")))
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*mul<mode>3_1"
   [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
 	(mult:SWIM248
@@ -9978,6 +10014,24 @@
 ;; MUL reg8 	Direct
 ;; MUL mem8 	Direct
 
+(define_insn "*mulqi3_1_nf"
+  [(set (match_operand:QI 0 "register_operand" "=a")
+	(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+		 (match_operand:QI 2 "nonimmediate_operand" "qm")))]
+  "TARGET_APX_NF &&
+  TARGET_QIMODE_MATH
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "%{nf%} mul{b}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "QI")])
+
 (define_insn "*mulqi3_1"
   [(set (match_operand:QI 0 "register_operand" "=a")
 	(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0") @@ -11128,6 +11182,19 @@
   [(set_attr "type" "multi")
    (set_attr "mode" "SI")])
 
+(define_insn "*<u>divmod<mode>4_noext_nf"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(any_div:SWIM248
+	  (match_operand:SWIM248 2 "register_operand" "0")
+	  (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=d")
+	(<paired_mod>:SWIM248 (match_dup 2) (match_dup 3)))
+   (use (match_operand:SWIM248 4 "register_operand" "1"))]
+  "TARGET_APX_NF"
+  "%{nf%} <sgnprefix>div{<imodesuffix>}\t%3"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<u>divmod<mode>4_noext"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
 	(any_div:SWIM248
@@ -11275,6 +11342,25 @@
 ;; Change div/mod to HImode and extend the second argument to HImode  ;; so that mode of div/mod matches with mode of arguments.  Otherwise  ;; combine may fail.
+(define_insn "<u>divmodhiqi3_nf"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(ior:HI
+	  (ashift:HI
+	    (zero_extend:HI
+	      (truncate:QI
+		(mod:HI (match_operand:HI 1 "register_operand" "0")
+			(any_extend:HI
+			  (match_operand:QI 2 "nonimmediate_operand" "qm")))))
+	    (const_int 8))
+	  (zero_extend:HI
+	    (truncate:QI
+	      (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))]
+  "TARGET_APX_NF
+  && TARGET_QIMODE_MATH"
+  "%{nf%} <sgnprefix>div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_insn "<u>divmodhiqi3"
   [(set (match_operand:HI 0 "register_operand" "=a")
 	(ior:HI
--
2.31.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt
       [not found] ` <20240515070226.3760873-8-lingling.kong@intel.com>
@ 2024-05-15  7:47   ` Kong, Lingling
  0 siblings, 0 replies; 13+ messages in thread
From: Kong, Lingling @ 2024-05-15  7:47 UTC (permalink / raw)
  To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak

gcc/ChangeLog:

	* config/i386/i386.md (clz<mode>2_lzcnt_nf): New define_insn.
	(*clz<mode>2_lzcnt_falsedep_nf): Ditto.
	(<lt_zcnt>_<mode>_nf): Ditto.
	(*<lt_zcnt>_<mode>_falsedep_nf): Ditto.
	(<lt_zcnt>_hi_nf): Ditto.
	(popcount<mode>2_nf): Ditto.
	(*popcount<mode>2_falsedep_nf): Ditto.
	(popcounthi2_nf): Ditto.
---
 gcc/config/i386/i386.md | 132 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 55f65a31b16..ddde83e57f5 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -21029,6 +21029,24 @@
   operands[3] = gen_reg_rtx (<MODE>mode);
 })
 
+(define_insn_and_split "clz<mode>2_lzcnt_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(clz:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+  "TARGET_APX_NF && TARGET_LZCNT"
+  "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (clz:SWI48 (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "clz<mode>2_lzcnt"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(clz:SWI48
@@ -21052,6 +21070,18 @@
 ; False dependency happens when destination is only updated by tzcnt,  ; lzcnt or popcnt.  There is no false dependency when destination is  ; also used in source.
+(define_insn "*clz<mode>2_lzcnt_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(clz:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF && TARGET_LZCNT"
+  "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*clz<mode>2_lzcnt_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(clz:SWI48
@@ -21158,6 +21188,25 @@
 ;; Version of lzcnt/tzcnt that is expanded from intrinsics.  This version  ;; provides operand size as output when source operand is zero. 
 
+(define_insn_and_split "<lt_zcnt>_<mode>_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec:SWI48
+	  [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+  "TARGET_APX_NF"
+  "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "type" "<lt_zcnt_type>")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "<lt_zcnt>_<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(unspec:SWI48
@@ -21182,6 +21231,20 @@
 ; False dependency happens when destination is only updated by tzcnt,  ; lzcnt or popcnt.  There is no false dependency when destination is  ; also used in source.
+; also used in source.
+(define_insn "*<lt_zcnt>_<mode>_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec:SWI48
+	  [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF"
+  "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "<lt_zcnt_type>")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<lt_zcnt>_<mode>_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(unspec:SWI48
@@ -21196,6 +21259,17 @@
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "<lt_zcnt>_hi_nf"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(unspec:HI
+	  [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+  "TARGET_APX_NF"
+  "%{nf%} <lt_zcnt>{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "<lt_zcnt_type>")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "HI")])
+
 (define_insn "<lt_zcnt>_hi"
   [(set (match_operand:HI 0 "register_operand" "=r")
 	(unspec:HI
@@ -21620,6 +21694,30 @@
   [(set_attr "type" "bitmanip")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "popcount<mode>2_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(popcount:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+  "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "%{nf%} popcnt\t{%1, %0|%0, %1}"; #else
+  return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; #endif }
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (popcount:SWI48 (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "popcount<mode>2"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(popcount:SWI48
@@ -21649,6 +21747,24 @@
 ; False dependency happens when destination is only updated by tzcnt,  ; lzcnt or popcnt.  There is no false dependency when destination is  ; also used in source.
+(define_insn "*popcount<mode>2_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(popcount:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "%{nf%} popcnt\t{%1, %0|%0, %1}"; #else
+  return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; #endif }
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*popcount<mode>2_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(popcount:SWI48
@@ -21806,6 +21922,22 @@
   DONE;
 })
 
+(define_insn "popcounthi2_nf"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(popcount:HI
+	  (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+  "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "%{nf%} popcnt\t{%1, %0|%0, %1}"; #else
+  return "%{nf%} popcnt{w}\t{%1, %0|%0, %1}"; #endif }
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "HI")])
+
 (define_insn "popcounthi2"
   [(set (match_operand:HI 0 "register_operand" "=r")
 	(popcount:HI
--
2.31.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/8] [APX NF]: Support APX NF add
  2024-05-15  7:43 ` [PATCH 1/8] [APX NF]: Support APX NF add Kong, Lingling
@ 2024-05-15  8:14   ` Uros Bizjak
  2024-05-15  8:36     ` Kong, Lingling
  2024-05-15  8:46   ` Uros Bizjak
  1 sibling, 1 reply; 13+ messages in thread
From: Uros Bizjak @ 2024-05-15  8:14 UTC (permalink / raw)
  To: Kong, Lingling; +Cc: gcc-patches, Liu, Hongtao, Wang, Hongyu

On Wed, May 15, 2024 at 9:43 AM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> From: Hongyu Wang <hongyu.wang@intel.com>
>
> APX NF(no flags) feature implements suppresses the update of status flags for arithmetic operations.
>
> For NF add, it is not clear whether NF add can be faster than lea. If so, the pattern needs to be adjusted to prefer LEA generation.

> diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> index 0eb751ad225..0ff4df0780c 100644
> --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
> +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { ! ia32 } } } */
> -/* { dg-options "-mapxf -march=x86-64 -O2" } */
> +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64
> +-O2" } */

Please do not split options to a separate line; here and in other places.

Uros.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH 1/8] [APX NF]: Support APX NF add
  2024-05-15  8:14   ` Uros Bizjak
@ 2024-05-15  8:36     ` Kong, Lingling
  0 siblings, 0 replies; 13+ messages in thread
From: Kong, Lingling @ 2024-05-15  8:36 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Liu, Hongtao, Wang, Hongyu

[-- Attachment #1: Type: text/plain, Size: 1405 bytes --]

> -----Original Message-----
> From: Uros Bizjak <ubizjak@gmail.com>
> Sent: Wednesday, May 15, 2024 4:15 PM
> To: Kong, Lingling <lingling.kong@intel.com>
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com>; Wang,
> Hongyu <hongyu.wang@intel.com>
> Subject: Re: [PATCH 1/8] [APX NF]: Support APX NF add
> 
> On Wed, May 15, 2024 at 9:43 AM Kong, Lingling <lingling.kong@intel.com>
> wrote:
> >
> > From: Hongyu Wang <hongyu.wang@intel.com>
> >
> > APX NF(no flags) feature implements suppresses the update of status flags for
> arithmetic operations.
> >
> > For NF add, it is not clear whether NF add can be faster than lea. If so, the
> pattern needs to be adjusted to prefer LEA generation.
> 
> > diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c
> > b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> > index 0eb751ad225..0ff4df0780c 100644
> > --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
> > +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile { target { ! ia32 } } } */
> > -/* { dg-options "-mapxf -march=x86-64 -O2" } */
> > +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64
> > +-O2" } */
> 
> Please do not split options to a separate line; here and in other places.
> 
> Uros.

Sorry,  my send-email adjusted some formatting incorrectly, I added attachments.

Thanks, 
Lingling


[-- Attachment #2: 0004-APX-NF-Support-APX-NF-for-right-shift-insns.patch --]
[-- Type: application/octet-stream, Size: 5177 bytes --]

From 95ffd42bbb48eb8379709f1976d05fe673bbf396 Mon Sep 17 00:00:00 2001
From: konglin1 <lingling.kong@intel.com>
Date: Tue, 27 Feb 2024 09:51:15 +0800
Subject: [PATCH 4/8] [APX NF] Support APX NF for right shift insns

gcc/ChangeLog:

	* config/i386/i386.md (*ashr<mode>3_1_nf): New.
	(*lshr<mode>3_1_nf): Ditto.
	(*lshrqi3_1_nf): Ditto.
	(*lshrhi3_1_nf): Ditto.
---
 gcc/config/i386/i386.md | 85 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9ffdb3fe71a..adcb09fcdd0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16569,6 +16569,21 @@
   [(set_attr "type" "ishiftx")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*ashr<mode>3_1_nf"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
+	(ashiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} sar{<imodesuffix>}\t{%2, %0|%0, %2}
+   #
+   %{nf%} sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,bmi2,apx_ndd")
+   (set_attr "type" "ishift,ishiftx,ishift")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ashr<mode>3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
 	(ashiftrt:SWI48
@@ -16630,6 +16645,21 @@
 }
 [(set_attr "isa" "*,*,*,apx_ndd")])
 
+(define_insn "*lshr<mode>3_1_nf"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r")
+	(lshiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))]
+  "TARGET_APX_NF &&
+  ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} shr{<imodesuffix>}\t{%2, %0|%0, %2}
+   #
+   #
+   %{nf%} shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,bmi2,avx512bw,apx_ndd")
+   (set_attr "type" "ishift,ishiftx,msklog,ishift")
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*lshr<mode>3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r")
@@ -16669,6 +16699,17 @@
    (set_attr "mode" "<MODE>")])
 
 ;; Convert shift to the shiftx pattern to avoid flags dependency.
+;; For NF/NDD doesn't support shift count as r, it just support c<S>,
+;; but it has no flag.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+			   (match_operand:QI 2 "register_operand")))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
 	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
@@ -16737,6 +16778,20 @@
 	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
+(define_insn "*ashr<mode>3_1_nf"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r")
+	(ashiftrt:SWI12
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0, rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} sar{<imodesuffix>}\t{%2, %0|%0, %2}
+   %{nf%} sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*, apx_ndd")
+   (set_attr "type" "ishift")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ashr<mode>3_1"
   [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r")
 	(ashiftrt:SWI12
@@ -16765,6 +16820,21 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*lshrqi3_1_nf"
+  [(set (match_operand:QI 0 "nonimmediate_operand"  "=qm,?k,r")
+	(lshiftrt:QI
+	  (match_operand:QI 1 "nonimmediate_operand" "0,k,rm")
+	  (match_operand:QI 2 "nonmemory_operand"    "cI,Wb,cI")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} shr{b}\t{%2, %0|%0, %2}
+   #
+   %{nf%} shr{b}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,avx512dq,apx_ndd")
+   (set_attr "type" "ishift,msklog,ishift")
+   (set_attr "mode" "QI")])
+
 (define_insn "*lshrqi3_1"
   [(set (match_operand:QI 0 "nonimmediate_operand"  "=qm,?k,r")
 	(lshiftrt:QI
@@ -16802,6 +16872,21 @@
        (const_string "*")))
    (set_attr "mode" "QI")])
 
+(define_insn "*lshrhi3_1_nf"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,?k,r")
+	(lshiftrt:HI
+	  (match_operand:HI 1 "nonimmediate_operand" "0,k,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "cI,Ww,cI")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} shr{w}\t{%2, %0|%0, %2}
+   #
+   %{nf%} shr{w}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*, avx512f, apx_ndd")
+   (set_attr "type" "ishift,msklog,ishift")
+   (set_attr "mode" "HI")])
+
 (define_insn "*lshrhi3_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r")
 	(lshiftrt:HI
-- 
2.31.1


[-- Attachment #3: 0005-APX-NF-Support-APX-NF-for-rotate-insns.patch --]
[-- Type: application/octet-stream, Size: 5326 bytes --]

From 59dfb0038196720075348d2e86a88add16c6333e Mon Sep 17 00:00:00 2001
From: konglin1 <lingling.kong@intel.com>
Date: Tue, 27 Feb 2024 16:43:32 +0800
Subject: [PATCH 5/8] [APX NF] Support APX NF for rotate insns

gcc/ChangeLog:

	* config/i386/i386.md (ashr<mode>3_cvt_nf): New define_insn.
	(*<insn><mode>3_1_nf): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-nf.c: Add NF test for rotate insns.
---
 gcc/config/i386/i386.md                | 80 ++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/apx-nf.c |  5 ++
 2 files changed, 85 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index adcb09fcdd0..ff44154b26b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16491,6 +16491,25 @@
 (define_mode_attr cvt_mnemonic
   [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
 
+(define_insn "ashr<mode>3_cvt_nf"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(ashiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "const_int_operand")))]
+  "TARGET_APX_NF &&
+   INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} sar{<imodesuffix>}\t{%2, %0|%0, %2}
+   %{nf%} sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "ishift")
+   (set_attr "prefix_0f" "*")
+   (set_attr "length_immediate" "*")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "ashr<mode>3_cvt"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r")
 	(ashiftrt:SWI48
@@ -17430,6 +17449,39 @@
   [(set_attr "type" "rotatex")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*<insn><mode>3_1_nf"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))]
+  "TARGET_APX_NF &&
+  ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
+    return "%{nf%} <rotate>{<imodesuffix>}\t%0";
+  else
+    return use_ndd ? "%{nf%} <rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		   : "%{nf%} <rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "rotate")
+   (set (attr "preferred_for_size")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "true")]
+	   (symbol_ref "false")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand")
+		 (ior (match_test "TARGET_SHIFT1")
+		      (match_test "optimize_function_for_size_p (cfun)"))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<insn><mode>3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
 	(any_rotate:SWI48
@@ -17572,6 +17624,34 @@
   [(set (match_dup 0)
 	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
 
+(define_insn "*<insn><mode>3_1_nf"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r")
+	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm")
+			  (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))]
+  "TARGET_APX_NF &&
+  ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
+    return "%{nf%} <rotate>{<imodesuffix>}\t%0";
+  else
+    return use_ndd
+	   ? "%{nf%} <rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+	   : "%{nf%} <rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "rotate")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand")
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<insn><mode>3_1"
   [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r")
 	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm")
diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
index 608dbf8f5f7..6e59803be64 100644
--- a/gcc/testsuite/gcc.target/i386/apx-nf.c
+++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
@@ -3,6 +3,7 @@
 /* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */
 /* { dg-final { scan-assembler-times "\{nf\} and" 1 } } */
 /* { dg-final { scan-assembler-times "\{nf\} or" 1 } } */
+/* { dg-final { scan-assembler-times "\{nf\} rol" 4 } } */
 
 #include "apx-ndd.c"
 
@@ -13,3 +14,7 @@ foo (struct B *b)
 {
     b->bit0 = b->bit0 | b->bit1;
 }
+long int f1 (int x) { return ~(1ULL << (x & 0x3f)); }
+long int f2 (int x) { return ~(1ULL << x); }
+long int f3 (unsigned char *x) { return ~(1ULL << (x[0] & 0x3f)); }
+long int f4 (unsigned char *x) { return ~(1ULL << x[0]); }
-- 
2.31.1


[-- Attachment #4: 0006-APX-NF-Support-APX-NF-for-shld-shrd.patch --]
[-- Type: application/octet-stream, Size: 23855 bytes --]

From 124afb19fdd0ffce2053462b55fd043b1bd0cdd1 Mon Sep 17 00:00:00 2001
From: konglin1 <lingling.kong@intel.com>
Date: Wed, 28 Feb 2024 14:43:34 +0800
Subject: [PATCH 6/8] [APX NF] Support APX NF for shld/shrd

gcc/ChangeLog:

	* config/i386/i386.md (x86_64_shld_nf): New define_insn.
	(x86_64_shld_ndd_nf): Ditto.
	(x86_64_shld_1_nf): Ditto.
	(x86_64_shld_ndd_1_nf): Ditto.
	(*x86_64_shld_shrd_1_nozext_nf): Ditto.
	(x86_shld_nf): Ditto.
	(x86_shld_ndd_nf): Ditto.
	(x86_shld_1_nf): Ditto.
	(x86_shld_ndd_1_nf): Ditto.
	(*x86_shld_shrd_1_nozext_nf): Ditto.
	(<insn><dwi>3_doubleword_lowpart_nf): Ditto.
	(x86_64_shrd_nf): Ditto.
	(x86_64_shrd_ndd_nf): Ditto.
	(x86_64_shrd_1_nf): Ditto.
	(x86_64_shrd_ndd_1_nf): Ditto.
	(*x86_64_shrd_shld_1_nozext_nf): Ditto.
	(x86_shrd_nf): Ditto.
	(x86_shrd_ndd_nf): Ditto.
	(x86_shrd_1_nf): Ditto.
	(x86_shrd_ndd_1_nf): Ditto.
	(*x86_shrd_shld_1_nozext_nf): Ditto.
---
 gcc/config/i386/i386.md | 518 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 518 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ff44154b26b..f9a62fba0c4 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14666,6 +14666,26 @@
   DONE;
 })
 
+(define_insn "x86_64_shld_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (ashift:DI (match_dup 0)
+		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
+			  (const_int 63)))
+		(subreg:DI
+		  (lshiftrt:TI
+		    (zero_extend:TI
+		      (match_operand:DI 1 "register_operand" "r"))
+		    (minus:QI (const_int 64)
+			      (and:QI (match_dup 2) (const_int 63)))) 0)))]
+  "TARGET_APX_NF"
+  "%{nf%} shld{q}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
 (define_insn "x86_64_shld"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashift:DI (match_dup 0)
@@ -14687,6 +14707,22 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_64_shld_ndd_nf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+		  (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
+			  (const_int 63)))
+		(subreg:DI
+		  (lshiftrt:TI
+		    (zero_extend:TI
+		      (match_operand:DI 2 "register_operand" "r"))
+		    (minus:QI (const_int 64)
+			      (and:QI (match_dup 3) (const_int 63)))) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF"
+  "%{nf%} shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
 (define_insn "x86_64_shld_ndd"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
@@ -14704,6 +14740,43 @@
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
 
+(define_insn "x86_64_shld_1_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (ashift:DI (match_dup 0)
+			   (match_operand:QI 2 "const_0_to_63_operand"))
+		(subreg:DI
+		  (lshiftrt:TI
+		    (zero_extend:TI
+		      (match_operand:DI 1 "register_operand" "r"))
+		    (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
+  "TARGET_64BIT
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && TARGET_APX_NF"
+  "%{nf%} shld{q}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "length_immediate" "1")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_64_shld_ndd_1_nf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+			   (match_operand:QI 3 "const_0_to_63_operand"))
+		(subreg:DI
+		  (lshiftrt:TI
+		    (zero_extend:TI
+		      (match_operand:DI 2 "register_operand" "r"))
+		    (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF
+   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
+  "%{nf%} shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")
+   (set_attr "length_immediate" "1")])
+
 (define_insn "x86_64_shld_1"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashift:DI (match_dup 0)
@@ -14742,6 +14815,58 @@
    (set_attr "mode" "DI")
    (set_attr "length_immediate" "1")])
 
+(define_insn_and_split "*x86_64_shld_shrd_1_nozext_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
+			     (match_operand:QI 2 "const_0_to_63_operand"))
+		(lshiftrt:DI
+		  (match_operand:DI 1 "nonimmediate_operand")
+		  (match_operand:QI 3 "const_0_to_63_operand"))))]
+  "TARGET_64BIT && TARGET_APX_NF
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[1], operands[2], operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (DImode, operands[4]);
+      emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[4], operands[3], operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+     rtx tmp = gen_reg_rtx (DImode);
+     if (MEM_P (operands[4]))
+       {
+	 operands[1] = force_reg (DImode, operands[1]);
+	 emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
+					   operands[2], operands[3]));
+       }
+     else if (MEM_P (operands[1]))
+       emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[1], operands[4],
+					 operands[3], operands[2]));
+     else
+       emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
+					 operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+    }
+  else
+   {
+     operands[1] = force_reg (DImode, operands[1]);
+     rtx tmp = gen_reg_rtx (DImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_64_shld_1_nf (tmp, operands[1], operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
+
 
 (define_insn_and_split "*x86_64_shld_shrd_1_nozext"
   [(set (match_operand:DI 0 "nonimmediate_operand")
@@ -14844,6 +14969,81 @@
   emit_move_insn (operands[4], operands[0]);
 })
 
+(define_insn "x86_shld_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (ashift:SI (match_dup 0)
+		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
+			  (const_int 31)))
+		(subreg:SI
+		  (lshiftrt:DI
+		    (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		    (minus:QI (const_int 32)
+			      (and:QI (match_dup 2) (const_int 31)))) 0)))]
+  "TARGET_APX_NF"
+  "%{nf%} shld{l}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_shld_ndd_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		  (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
+			  (const_int 31)))
+		(subreg:SI
+		  (lshiftrt:DI
+		    (zero_extend:DI
+		      (match_operand:SI 2 "register_operand" "r"))
+		    (minus:QI (const_int 32)
+			      (and:QI (match_dup 3) (const_int 31)))) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF"
+  "%{nf%} shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+
+(define_insn "x86_shld_1_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (ashift:SI (match_dup 0)
+			   (match_operand:QI 2 "const_0_to_31_operand"))
+		(subreg:SI
+		  (lshiftrt:DI
+		    (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		    (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
+  "TARGET_APX_NF
+   && INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
+  "%{nf%} shld{l}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_shld_ndd_1_nf"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+			   (match_operand:QI 3 "const_0_to_31_operand"))
+		(subreg:SI
+		  (lshiftrt:DI
+		    (zero_extend:DI
+		      (match_operand:SI 2 "register_operand" "r"))
+		    (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF
+   && INTVAL (operands[4]) == 32 - INTVAL (operands[3])"
+  "%{nf%} shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "SI")])
+
 (define_insn "x86_shld"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (ashift:SI (match_dup 0)
@@ -14922,6 +15122,57 @@
    (set_attr "length_immediate" "1")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*x86_shld_shrd_1_nozext_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
+			     (match_operand:QI 2 "const_0_to_31_operand"))
+	       (lshiftrt:SI
+		   (match_operand:SI 1 "nonimmediate_operand")
+		   (match_operand:QI 3 "const_0_to_31_operand"))))]
+  "TARGET_APX_NF &&
+  INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_x86_shld_1_nf (operands[0], operands[1], operands[2], operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (SImode, operands[4]);
+      emit_insn (gen_x86_shrd_1_nf (operands[0], operands[4], operands[3], operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+     rtx tmp = gen_reg_rtx (SImode);
+     if (MEM_P (operands[4]))
+       {
+	 operands[1] = force_reg (SImode, operands[1]);
+	 emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
+					operands[2], operands[3]));
+       }
+     else if (MEM_P (operands[1]))
+       emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[1], operands[4],
+				      operands[3], operands[2]));
+     else
+       emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
+				      operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+    }
+ else
+   {
+     operands[1] = force_reg (SImode, operands[1]);
+     rtx tmp = gen_reg_rtx (SImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_shld_1_nf (tmp, operands[1], operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_shld_shrd_1_nozext"
   [(set (match_operand:SI 0 "nonimmediate_operand")
@@ -16107,6 +16358,26 @@
 })
 
 ;; Split truncations of double word right shifts into x86_shrd_1.
+(define_insn_and_split "<insn><dwi>3_doubleword_lowpart_nf"
+  [(set (match_operand:DWIH 0 "register_operand" "=&r")
+	(subreg:DWIH
+	  (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
+			     (match_operand:QI 2 "const_int_operand")) 0))]
+  "TARGET_APX_NF && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2))
+		  (subreg:DWIH
+		    (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
+				  (match_dup 4)) 0)))]
+{
+  split_double_mode (<DWI>mode, &operands[1], 1, &operands[1], &operands[3]);
+  operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL (operands[2]));
+  if (!rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+})
+
 (define_insn_and_split "<insn><dwi>3_doubleword_lowpart"
   [(set (match_operand:DWIH 0 "register_operand" "=&r")
 	(subreg:DWIH
@@ -16130,6 +16401,26 @@
     emit_move_insn (operands[0], operands[1]);
 })
 
+(define_insn "x86_64_shrd_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (lshiftrt:DI (match_dup 0)
+		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
+			  (const_int 63)))
+		(subreg:DI
+		  (ashift:TI
+		    (zero_extend:TI
+		      (match_operand:DI 1 "register_operand" "r"))
+		    (minus:QI (const_int 64)
+			      (and:QI (match_dup 2) (const_int 63)))) 0)))]
+  "TARGET_APX_NF"
+  "%{nf%} shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
 (define_insn "x86_64_shrd"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (lshiftrt:DI (match_dup 0)
@@ -16151,6 +16442,22 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_64_shrd_ndd_nf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+		  (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
+			  (const_int 63)))
+		(subreg:DI
+		  (ashift:TI
+		    (zero_extend:TI
+		      (match_operand:DI 2 "register_operand" "r"))
+		    (minus:QI (const_int 64)
+			      (and:QI (match_dup 3) (const_int 63)))) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF"
+  "%{nf%} shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
 (define_insn "x86_64_shrd_ndd"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
@@ -16168,6 +16475,25 @@
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
 
+(define_insn "x86_64_shrd_1_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (lshiftrt:DI (match_dup 0)
+			     (match_operand:QI 2 "const_0_to_63_operand"))
+		(subreg:DI
+		  (ashift:TI
+		    (zero_extend:TI
+		      (match_operand:DI 1 "register_operand" "r"))
+		    (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
+  "TARGET_APX_NF
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
+  "%{nf%} shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
 
 (define_insn "x86_64_shrd_1"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
@@ -16190,6 +16516,22 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_64_shrd_ndd_1_nf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+			     (match_operand:QI 3 "const_0_to_63_operand"))
+		(subreg:DI
+		  (ashift:TI
+		    (zero_extend:TI
+		      (match_operand:DI 2 "register_operand" "r"))
+		    (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF
+   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
+  "%{nf%} shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")])
+
 (define_insn "x86_64_shrd_ndd_1"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
@@ -16207,6 +16549,57 @@
    (set_attr "length_immediate" "1")
    (set_attr "mode" "DI")])
 
+(define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
+			     (match_operand:QI 2 "const_0_to_63_operand"))
+		(ashift:DI
+		  (match_operand:DI 1 "nonimmediate_operand")
+		  (match_operand:QI 3 "const_0_to_63_operand"))))]
+  "TARGET_64BIT && TARGET_APX_NF
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[1], operands[2], operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (DImode, operands[4]);
+      emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[4], operands[3], operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      if (MEM_P (operands[4]))
+        {
+	  operands[1] = force_reg (DImode, operands[1]);
+	  emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+					    operands[2], operands[3]));
+        }
+       else if (MEM_P (operands[1]))
+         emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[1], operands[4],
+					   operands[3], operands[2]));
+       else
+         emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+					   operands[2], operands[3]));
+       emit_move_insn (operands[0], tmp);
+    }
+  else
+   {
+     operands[1] = force_reg (DImode, operands[1]);
+     rtx tmp = gen_reg_rtx (DImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_64_shrd_1_nf (tmp, operands[1], operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_64_shrd_shld_1_nozext"
   [(set (match_operand:DI 0 "nonimmediate_operand")
@@ -16309,6 +16702,27 @@
   emit_move_insn (operands[4], operands[0]);
 })
 
+(define_insn "x86_shrd_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (lshiftrt:SI (match_dup 0)
+		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
+			  (const_int 31)))
+		(subreg:SI
+		  (ashift:DI
+		    (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		    (minus:QI (const_int 32)
+			      (and:QI (match_dup 2) (const_int 31)))) 0)))]
+  "TARGET_APX_NF"
+  "%{nf%} shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
 (define_insn "x86_shrd"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (lshiftrt:SI (match_dup 0)
@@ -16331,6 +16745,22 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_shrd_ndd_nf"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		  (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
+			  (const_int 31)))
+		(subreg:SI
+		  (ashift:DI
+		    (zero_extend:DI
+		      (match_operand:SI 2 "register_operand" "r"))
+		    (minus:QI (const_int 32)
+			      (and:QI (match_dup 3) (const_int 31)))) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF"
+  "%{nf%} shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
 (define_insn "x86_shrd_ndd"
   [(set (match_operand:SI 0 "register_operand" "=r")
         (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
@@ -16348,6 +16778,27 @@
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
 
+(define_insn "x86_shrd_1_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (lshiftrt:SI (match_dup 0)
+			     (match_operand:QI 2 "const_0_to_31_operand"))
+		(subreg:SI
+		  (ashift:DI
+		    (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		    (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
+  "TARGET_APX_NF
+  && INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
+  "%{nf%} shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
 (define_insn "x86_shrd_1"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (lshiftrt:SI (match_dup 0)
@@ -16369,6 +16820,22 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_shrd_ndd_1_nf"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+			     (match_operand:QI 3 "const_0_to_31_operand"))
+		(subreg:SI
+		  (ashift:DI
+		    (zero_extend:DI
+		      (match_operand:SI 2 "register_operand" "r"))
+		    (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
+  "TARGET_APX_NDD && TARGET_APX_NF
+   && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))"
+  "%{nf%} shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "SI")])
+
 (define_insn "x86_shrd_ndd_1"
   [(set (match_operand:SI 0 "register_operand" "=r")
         (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
@@ -16386,6 +16853,57 @@
    (set_attr "length_immediate" "1")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*x86_shrd_shld_1_nozext_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
+			     (match_operand:QI 2 "const_0_to_31_operand"))
+	       (ashift:SI
+		   (match_operand:SI 1 "nonimmediate_operand")
+		   (match_operand:QI 3 "const_0_to_31_operand"))))]
+  "TARGET_APX_NF &&
+  INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_x86_shrd_1_nf (operands[0], operands[1], operands[2], operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (SImode, operands[4]);
+      emit_insn (gen_x86_shld_1_nf (operands[0], operands[4], operands[3], operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      if (MEM_P (operands[4]))
+        {
+	  operands[1] = force_reg (SImode, operands[1]);
+	  emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+					 operands[2], operands[3]));
+        }
+      else if (MEM_P (operands[1]))
+        emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[1], operands[4],
+				       operands[3], operands[2]));
+      else
+        emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+				       operands[2], operands[3]));
+      emit_move_insn (operands[0], tmp);
+     }
+   else
+   {
+     operands[1] = force_reg (SImode, operands[1]);
+     rtx tmp = gen_reg_rtx (SImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_shrd_1_nf (tmp, operands[1], operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_shrd_shld_1_nozext"
   [(set (match_operand:SI 0 "nonimmediate_operand")
-- 
2.31.1


[-- Attachment #5: 0007-APX-NF-Support-APX-NF-for-mul-div.patch --]
[-- Type: application/octet-stream, Size: 4614 bytes --]

From d69278225731bd3d8dbe2a2a0bb4a922b391395f Mon Sep 17 00:00:00 2001
From: konglin1 <lingling.kong@intel.com>
Date: Mon, 4 Mar 2024 11:16:25 +0800
Subject: [PATCH 7/8] [APX NF] Support APX NF for mul/div

gcc/ChangeLog:

	* config/i386/i386.md (*mul<mode>3_1_nf): New define_insn.
	(*mulqi3_1_nf): Ditto.
	(*<u>divmod<mode>4_noext_nf): Ditto.
	(<u>divmodhiqi3_nf): Ditto.
---
 gcc/config/i386/i386.md | 86 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index f9a62fba0c4..55f65a31b16 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9907,6 +9907,42 @@
 ;;
 ;; On BDVER1, all HI MULs use DoublePath
 
+(define_insn "*mul<mode>3_1_nf"
+  [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
+	(mult:SWIM248
+	  (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
+	  (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))]
+  "TARGET_APX_NF &&
+  !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   %{nf%} imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} imul{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1")
+		  (const_string "vector")
+	       (and (eq_attr "alternative" "2")
+	       	    (ior (match_test "<MODE>mode == HImode")
+		         (match_operand 1 "memory_operand")))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+	      	    (ior (match_test "<MODE>mode == HImode")
+		         (match_operand 1 "memory_operand")))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "bdver1_decode")
+   	(if_then_else
+	  (match_test "<MODE>mode == HImode")
+	    (const_string "double")
+	    (const_string "direct")))
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*mul<mode>3_1"
   [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
 	(mult:SWIM248
@@ -9978,6 +10014,24 @@
 ;; MUL reg8 	Direct
 ;; MUL mem8 	Direct
 
+(define_insn "*mulqi3_1_nf"
+  [(set (match_operand:QI 0 "register_operand" "=a")
+	(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+		 (match_operand:QI 2 "nonimmediate_operand" "qm")))]
+  "TARGET_APX_NF &&
+  TARGET_QIMODE_MATH
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "%{nf%} mul{b}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "QI")])
+
 (define_insn "*mulqi3_1"
   [(set (match_operand:QI 0 "register_operand" "=a")
 	(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
@@ -11128,6 +11182,19 @@
   [(set_attr "type" "multi")
    (set_attr "mode" "SI")])
 
+(define_insn "*<u>divmod<mode>4_noext_nf"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(any_div:SWIM248
+	  (match_operand:SWIM248 2 "register_operand" "0")
+	  (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=d")
+	(<paired_mod>:SWIM248 (match_dup 2) (match_dup 3)))
+   (use (match_operand:SWIM248 4 "register_operand" "1"))]
+  "TARGET_APX_NF"
+  "%{nf%} <sgnprefix>div{<imodesuffix>}\t%3"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<u>divmod<mode>4_noext"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
 	(any_div:SWIM248
@@ -11275,6 +11342,25 @@
 ;; Change div/mod to HImode and extend the second argument to HImode
 ;; so that mode of div/mod matches with mode of arguments.  Otherwise
 ;; combine may fail.
+(define_insn "<u>divmodhiqi3_nf"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(ior:HI
+	  (ashift:HI
+	    (zero_extend:HI
+	      (truncate:QI
+		(mod:HI (match_operand:HI 1 "register_operand" "0")
+			(any_extend:HI
+			  (match_operand:QI 2 "nonimmediate_operand" "qm")))))
+	    (const_int 8))
+	  (zero_extend:HI
+	    (truncate:QI
+	      (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))]
+  "TARGET_APX_NF
+  && TARGET_QIMODE_MATH"
+  "%{nf%} <sgnprefix>div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_insn "<u>divmodhiqi3"
   [(set (match_operand:HI 0 "register_operand" "=a")
 	(ior:HI
-- 
2.31.1


[-- Attachment #6: 0008-APX-NF-Support-APX-NF-for-lzcnt-tzcnt-popcnt.patch --]
[-- Type: application/octet-stream, Size: 7216 bytes --]

From 94b3e2011ba1021d8de387bdc176c7f848e457c1 Mon Sep 17 00:00:00 2001
From: konglin1 <lingling.kong@intel.com>
Date: Tue, 5 Mar 2024 10:04:51 +0800
Subject: [PATCH 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt

gcc/ChangeLog:

	* config/i386/i386.md (clz<mode>2_lzcnt_nf): New define_insn.
	(*clz<mode>2_lzcnt_falsedep_nf): Ditto.
	(<lt_zcnt>_<mode>_nf): Ditto.
	(*<lt_zcnt>_<mode>_falsedep_nf): Ditto.
	(<lt_zcnt>_hi_nf): Ditto.
	(popcount<mode>2_nf): Ditto.
	(*popcount<mode>2_falsedep_nf): Ditto.
	(popcounthi2_nf): Ditto.
---
 gcc/config/i386/i386.md | 132 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 55f65a31b16..ddde83e57f5 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -21029,6 +21029,24 @@
   operands[3] = gen_reg_rtx (<MODE>mode);
 })
 
+(define_insn_and_split "clz<mode>2_lzcnt_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(clz:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+  "TARGET_APX_NF && TARGET_LZCNT"
+  "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (clz:SWI48 (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "clz<mode>2_lzcnt"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(clz:SWI48
@@ -21052,6 +21070,18 @@
 ; False dependency happens when destination is only updated by tzcnt,
 ; lzcnt or popcnt.  There is no false dependency when destination is
 ; also used in source.
+(define_insn "*clz<mode>2_lzcnt_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(clz:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF && TARGET_LZCNT"
+  "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*clz<mode>2_lzcnt_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(clz:SWI48
@@ -21158,6 +21188,25 @@
 ;; Version of lzcnt/tzcnt that is expanded from intrinsics.  This version
 ;; provides operand size as output when source operand is zero. 
 
+(define_insn_and_split "<lt_zcnt>_<mode>_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec:SWI48
+	  [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+  "TARGET_APX_NF"
+  "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "type" "<lt_zcnt_type>")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "<lt_zcnt>_<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(unspec:SWI48
@@ -21182,6 +21231,20 @@
 ; False dependency happens when destination is only updated by tzcnt,
 ; lzcnt or popcnt.  There is no false dependency when destination is
 ; also used in source.
+; also used in source.
+(define_insn "*<lt_zcnt>_<mode>_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec:SWI48
+	  [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF"
+  "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "<lt_zcnt_type>")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<lt_zcnt>_<mode>_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(unspec:SWI48
@@ -21196,6 +21259,17 @@
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "<lt_zcnt>_hi_nf"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(unspec:HI
+	  [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+  "TARGET_APX_NF"
+  "%{nf%} <lt_zcnt>{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "<lt_zcnt_type>")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "HI")])
+
 (define_insn "<lt_zcnt>_hi"
   [(set (match_operand:HI 0 "register_operand" "=r")
 	(unspec:HI
@@ -21620,6 +21694,30 @@
   [(set_attr "type" "bitmanip")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "popcount<mode>2_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(popcount:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+  "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "%{nf%} popcnt\t{%1, %0|%0, %1}";
+#else
+  return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (popcount:SWI48 (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "popcount<mode>2"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(popcount:SWI48
@@ -21649,6 +21747,24 @@
 ; False dependency happens when destination is only updated by tzcnt,
 ; lzcnt or popcnt.  There is no false dependency when destination is
 ; also used in source.
+(define_insn "*popcount<mode>2_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(popcount:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "%{nf%} popcnt\t{%1, %0|%0, %1}";
+#else
+  return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*popcount<mode>2_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(popcount:SWI48
@@ -21806,6 +21922,22 @@
   DONE;
 })
 
+(define_insn "popcounthi2_nf"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(popcount:HI
+	  (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+  "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "%{nf%} popcnt\t{%1, %0|%0, %1}";
+#else
+  return "%{nf%} popcnt{w}\t{%1, %0|%0, %1}";
+#endif
+}
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "HI")])
+
 (define_insn "popcounthi2"
   [(set (match_operand:HI 0 "register_operand" "=r")
 	(popcount:HI
-- 
2.31.1


[-- Attachment #7: 0001-APX-NF-Support-APX-NF-add.patch --]
[-- Type: application/octet-stream, Size: 5276 bytes --]

From 3c5d5b38d3fb36feb14b08bdf7dfad1b95c4c92d Mon Sep 17 00:00:00 2001
From: Hongyu Wang <hongyu.wang@intel.com>
Date: Tue, 9 Jan 2024 13:25:59 +0800
Subject: [PATCH 1/8] [APX NF]: Support APX NF add

APX NF(no flags) feature implements suppresses the update of status flags
for arithmetic operations.

For NF add, it is not clear whether nf add can be faster than lea. If so,
the pattern needs to be adjusted to perfer lea generation.

gcc/ChangeLog:

        * config/i386/i386-opts.h (enum apx_features): Add nf
        enumeration.
        * config/i386/i386.h (TARGET_APX_NF): New.
        * config/i386/i386.md (*add<mode>_1_nf): New define_insn.
        * config/i386/i386.opt: Add apx_nf enumeration.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/apx-ndd.c: Fixed test.
        * gcc.target/i386/apx-nf.c: New test.

Co-authored-by: konglin1 <lingling.kong@intel.com>
---
 gcc/config/i386/i386-opts.h             |  3 +-
 gcc/config/i386/i386.h                  |  1 +
 gcc/config/i386/i386.md                 | 42 +++++++++++++++++++++++++
 gcc/config/i386/i386.opt                |  3 ++
 gcc/testsuite/gcc.target/i386/apx-ndd.c |  2 +-
 gcc/testsuite/gcc.target/i386/apx-nf.c  |  6 ++++
 6 files changed, 55 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf.c

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index ef2825803b3..60176ce609f 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -140,7 +140,8 @@ enum apx_features {
   apx_push2pop2 = 1 << 1,
   apx_ndd = 1 << 2,
   apx_ppx = 1 << 3,
-  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
+  apx_nf = 1<< 4,
+  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
 };
 
 #endif
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 529edff93a4..f20ae4726da 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)
 #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
 #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
+#define TARGET_APX_NF (ix86_apx_features & apx_nf)
 
 #include "config/vxworks-dummy.h"
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 764bfe20ff2..4a9e35c4990 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6233,6 +6233,48 @@
     }
 })
 \f
+;; NF instructions.
+
+(define_insn "*add<mode>_1_nf"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=rm,rje,r,r,r,r,r,r")
+	(plus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
+	  (match_operand:SWI 2 "x86_64_general_operand" "r,e,BM,0,le,r,e,BM")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+			    TARGET_APX_NDD)"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  if (which_alternative == 3)
+      std::swap (operands[1], operands[2]);
+
+  if (operands[2] == const1_rtx)
+    return use_ndd
+	  ? "%{nf%} inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+	  : "%{nf%} inc{<imodesuffix>}\t{%0|%0}";
+
+  if (operands[2] == constm1_rtx)
+    return use_ndd
+	  ? "%{nf%} dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+	  : "%{nf%} dec{<imodesuffix>}\t{%0|%0}";
+
+  return use_ndd
+	 ? "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+	 : "%{nf%} add{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "4")
+              (const_string "lea")
+	   ]
+	   (const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
 ;; Load effective address instructions
 
 (define_insn "*lea<mode>"
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index d5f793a9e8b..66021d59d4e 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1356,6 +1356,9 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
 EnumValue
 Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
 
+EnumValue
+Enum(apx_features) String(nf) Value(apx_nf) Set(6)
+
 EnumValue
 Enum(apx_features) String(all) Value(apx_all) Set(1)
 
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 0eb751ad225..0ff4df0780c 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-mapxf -march=x86-64 -O2" } */
+/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } */
 /* { dg-final { scan-assembler-not "movl"} } */
 
 #include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
new file mode 100644
index 00000000000..3adc7a27902
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 -O2" } */
+/* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */
+
+#include "apx-ndd.c"
+
-- 
2.31.1


[-- Attachment #8: 0002-APX-NF-Support-APX-NF-for-sub-and-or-xor-neg.patch --]
[-- Type: application/octet-stream, Size: 9074 bytes --]

From cc27b68039628c03124abe40e2cc4b055e60baa8 Mon Sep 17 00:00:00 2001
From: konglin1 <lingling.kong@intel.com>
Date: Wed, 24 Jan 2024 15:26:38 +0800
Subject: [PATCH 2/8] [APX NF] Support APX NF for {sub/and/or/xor/neg}

gcc/ChangeLog:

	* config/i386/i386.md (*sub<mode>_1_nf): New define_insn.
	(*anddi_1_nf): Ditto.
	(*and<mode>_1_nf): Ditto.
	(*<code>qi_1_nf): Ditto.
	(*<code><mode>_1_nf): Ditto.
	(*neg<mode>_1_nf): Ditto.
	* config/i386/sse.md : New define_split.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-nf.c: Add test.
---
 gcc/config/i386/i386.md                | 129 +++++++++++++++++++++++++
 gcc/config/i386/sse.md                 |  11 +++
 gcc/testsuite/gcc.target/i386/apx-nf.c |   9 ++
 3 files changed, 149 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4a9e35c4990..66dc5e1035f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -7888,6 +7888,24 @@
   "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
 
+(define_insn "*sub<mode>_1_nf"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,rjM,<r>,r,r,r")
+	(minus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "0,0,0,rm,rjM,r")
+	  (match_operand:SWI 2 "<general_operand>" "<r>,<i>,<m>,r,<i>,<m>")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  %{nf%} sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  %{nf%} sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  %{nf%} sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  %{nf%} sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  %{nf%} sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  %{nf%} sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*sub<mode>_1"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r,r")
 	(minus:SWI
@@ -11790,6 +11808,27 @@
 }
 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
 
+(define_insn "*anddi_1_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,rjM,r,r,r,r,?k")
+	(and:DI
+	 (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,0,rm,rjM,r,k")
+	 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,r,e,m,r,e,m,k")))]
+  "TARGET_APX_NF
+   && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} and{l}\t{%k2, %k0|%k0, %k2}
+   %{nf%} and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
+   %{nf%} and{q}\t{%2, %0|%0, %2}
+   %{nf%} and{q}\t{%2, %0|%0, %2}
+   %{nf%} and{q}\t{%2, %0|%0, %2}
+   %{nf%} and{q}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} and{q}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} and{q}\t{%2, %1, %0|%0, %1, %2}
+   #"
+  [(set_attr "isa" "*,apx_ndd,*,*,*,apx_ndd,apx_ndd,apx_ndd,avx512bw")
+   (set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,msklog")
+   (set_attr "mode" "SI,SI,DI,DI,DI,DI,DI,DI,DI")])
+
 (define_insn "*anddi_1"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,r,?k")
 	(and:DI
@@ -11889,6 +11928,33 @@
    (set_attr "isa" "*,apx_ndd,apx_ndd,apx_ndd")
    (set_attr "mode" "SI")])
 
+(define_insn "*and<mode>_1_nf"
+  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,rjM,r,r,r,r,?k")
+	(and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,0,rm,rjM,r,k")
+		   (match_operand:SWI24 2 "<general_operand>" "r,<i>,<m>,r,<i>,<m>,k")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} and{<imodesuffix>}\t{%2, %0|%0, %2}
+   %{nf%} and{<imodesuffix>}\t{%2, %0|%0, %2}
+   %{nf%} and{<imodesuffix>}\t{%2, %0|%0, %2}
+   %{nf%} and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   #"
+  [(set (attr "isa")
+	(cond [(eq_attr "alternative" "3,4,5")
+		 (const_string "apx_ndd")
+	       (eq_attr "alternative" "6")
+		 (if_then_else (eq_attr "mode" "SI")
+		   (const_string "avx512bw")
+		   (const_string "avx512f"))
+	      ]
+	      (const_string "*")))
+   (set_attr "type" "alu,alu,alu,alu,alu,alu,msklog")
+   (set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*and<mode>_1"
   [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,r,r,r,Ya,?k")
 	(and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,rm,rjM,r,qm,k")
@@ -11923,6 +11989,37 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")])
 
+;; NF for and,or,xor
+
+(define_insn "*<code>qi_1_nf"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
+	(any_logic:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
+		   (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (<CODE>, QImode, operands, TARGET_APX_NDD)"
+  "@
+   %{nf%} <logic>{b}\t{%2, %0|%0, %2}
+   %{nf%} <logic>{b}\t{%2, %0|%0, %2}
+   %{nf%} <logic>{l}\t{%k2, %k0|%k0, %k2}
+   %{nf%} <logic>{b}\t{%2, %1, %0|%0, %1, %2}
+   %{nf%} <logic>{b}\t{%2, %1, %0|%0, %1, %2}
+   #"
+  [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
+   (set_attr "type" "alu,alu,alu,alu,alu,msklog")
+   (set (attr "mode")
+	(cond [(eq_attr "alternative" "2")
+		 (const_string "SI")
+		(and (eq_attr "alternative" "5")
+		     (match_test "!TARGET_AVX512DQ"))
+		 (const_string "HI")
+	       ]
+	       (const_string "QI")))
+   ;; Potential partial reg stall on alternative 2.
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "2")
+	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+	   (symbol_ref "true")))])
+
 (define_insn "*andqi_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
 	(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
@@ -12797,6 +12894,26 @@
 }
 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
 
+;; or xor
+(define_insn "*<code><mode>_1_nf"
+  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,rjM,r,r,r,r,?k")
+	(any_or:SWI248
+	  (match_operand:SWI248 1 "nonimmediate_operand" "0,0,0,rm,rjM,r,k")
+	  (match_operand:SWI248 2 "<general_operand>" "r,<i>,<m>,r,<i>,<m>,k")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  %{nf%} <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+  %{nf%} <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+  %{nf%} <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+  %{nf%} <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  %{nf%} <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  %{nf%} <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  #"
+  [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd,<kmov_isa>")
+   (set_attr "type" "alu,alu, alu, alu, alu, alu, msklog")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<code><mode>_1"
   [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,r,?k")
 	(any_or:SWI248
@@ -13529,6 +13646,18 @@
 			      (const_int 0)))
      (clobber (reg:CC FLAGS_REG))])])
 
+(define_insn "*neg<mode>_1_nf"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
+	(neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")))]
+  "TARGET_APX_NF &&
+   ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  %{nf%} neg{<imodesuffix>}\t%0
+  %{nf%} neg{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "negnot")
+   (set_attr "isa" "*,apx_ndd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*neg<mode>_1"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
 	(neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f57f36ae380..72d4556f47d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2005,6 +2005,17 @@
 	   ]
 	   (const_string "<MODE>")))])
 
+(define_split
+  [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
+	(any_logic:SWI1248_AVX512BW
+	  (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
+	  (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))]
+  "TARGET_AVX512F && reload_completed"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_logic:SWI1248_AVX512BW (match_dup 1) (match_dup 2)))
+      (unspec [(const_int 0)] UNSPEC_MASKOP)])])
+
 (define_split
   [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
 	(any_logic:SWI1248_AVX512BW
diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
index 3adc7a27902..608dbf8f5f7 100644
--- a/gcc/testsuite/gcc.target/i386/apx-nf.c
+++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
@@ -1,6 +1,15 @@
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 -O2" } */
 /* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */
+/* { dg-final { scan-assembler-times "\{nf\} and" 1 } } */
+/* { dg-final { scan-assembler-times "\{nf\} or" 1 } } */
 
 #include "apx-ndd.c"
 
+struct B { unsigned bit0 : 1; unsigned bit1 : 1; };
+
+void
+foo (struct B *b)
+{
+    b->bit0 = b->bit0 | b->bit1;
+}
-- 
2.31.1


[-- Attachment #9: 0003-APX-NF-Support-APX-NF-for-left-shift-insns.patch --]
[-- Type: application/octet-stream, Size: 8648 bytes --]

From 9384a400945c4134acc01833a0a02aec412adcef Mon Sep 17 00:00:00 2001
From: konglin1 <lingling.kong@intel.com>
Date: Wed, 24 Jan 2024 16:49:01 +0800
Subject: [PATCH 3/8] [APX NF] Support APX NF for left shift insns

gcc/ChangeLog:

	* config/i386/i386.md (*ashl<mode>3_1_nf): New.
	(*ashlhi3_1_nf): Ditto.
	(*ashlqi3_1_nf): Ditto.
	* config/i386/sse.md: New define_split.
---
 gcc/config/i386/i386.md | 175 ++++++++++++++++++++++++++++++++++++++++
 gcc/config/i386/sse.md  |  13 +++
 2 files changed, 188 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 66dc5e1035f..9ffdb3fe71a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15126,6 +15126,54 @@
   [(set_attr "type" "ishiftx")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*ashl<mode>3_1_nf"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm")
+		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))]
+  "TARGET_APX_NF &&
+  ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+    case TYPE_ISHIFTX:
+    case TYPE_MSKLOG:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      return "%{nf%} add{<imodesuffix>}\t%0, %0";
+
+    default:
+      return use_ndd ? "%{nf%} sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		     : "%{nf%} sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
+	    (eq_attr "alternative" "4")
+	      (const_string "ishift")
+            (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		      (match_operand 0 "register_operand"))
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	    (eq_attr "alternative" "3")
+	      (const_string "msklog")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (eq_attr "type" "alu")
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ashl<mode>3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
 	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm")
@@ -15187,6 +15235,17 @@
    (set_attr "mode" "<MODE>")])
 
 ;; Convert shift to the shiftx pattern to avoid flags dependency.
+;; For NF/NDD doesn't support shift count as r, it just support c<S>,
+;; but it has no flag.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+		      (match_operand:QI 2 "register_operand")))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(ashift:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
 	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
@@ -15273,6 +15332,50 @@
 	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
+(define_insn "*ashlhi3_1_nf"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
+	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+    case TYPE_MSKLOG:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "%{nf%} add{w}\t%0, %0";
+
+    default:
+      return use_ndd ? "%{nf%} sal{w}\t{%2, %1, %0|%0, %1, %2}"
+		     : "%{nf%} sal{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "isa" "*,*,avx512f,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "msklog")
+	    (eq_attr "alternative" "3")
+	      (const_string "ishift")
+            (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		      (match_operand 0 "register_operand"))
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (eq_attr "type" "alu")
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "HI,SI,HI,HI")])
+
 (define_insn "*ashlhi3_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
 	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
@@ -15326,6 +15429,61 @@
        (const_string "*")))
    (set_attr "mode" "HI,SI,HI,HI")])
 
+(define_insn "*ashlqi3_1_nf"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
+	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))]
+  "TARGET_APX_NF &&
+   ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+    case TYPE_MSKLOG:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1])))
+        return "%{nf%} add{l}\t%k0, %k0";
+      else
+        return "%{nf%} add{b}\t%0, %0";
+
+    default:
+      if (get_attr_mode (insn) == MODE_SI)
+	return "%{nf%} sal{l}\t{%2, %k0|%k0, %2}";
+      else
+	return use_ndd ? "%{nf%} sal{b}\t{%2, %1, %0|%0, %1, %2}"
+		       : "%{nf%} sal{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "isa" "*,*,*,avx512dq,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "lea")
+	    (eq_attr "alternative" "3")
+	      (const_string "msklog")
+	    (eq_attr "alternative" "4")
+	      (const_string "ishift")
+            (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+		      (match_operand 0 "register_operand"))
+		 (match_operand 2 "const1_operand"))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (eq_attr "type" "alu")
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI,SI,SI,QI,QI")
+   ;; Potential partial reg stall on alternative 1.
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "1")
+	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+	   (symbol_ref "true")))])
+
 (define_insn "*ashlqi3_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
 	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
@@ -15448,6 +15606,23 @@
    (set_attr "mode" "<MODE>")])
 
 ;; Convert ashift to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI 0 "general_reg_operand")
+	(ashift:SWI (match_operand:SWI 1 "index_reg_operand")
+		    (match_operand 2 "const_0_to_3_operand")))]
+  "reload_completed
+   && REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_dup 0)
+	(mult:<LEAMODE> (match_dup 1) (match_dup 2)))]
+{
+  if (<MODE>mode != <LEAMODE>mode)
+    {
+      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
+    }
+  operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+})
+
 (define_split
   [(set (match_operand:SWI 0 "general_reg_operand")
 	(ashift:SWI (match_operand:SWI 1 "index_reg_operand")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 72d4556f47d..498ca5e4d1b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2169,6 +2169,19 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])
 
+(define_split
+  [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
+	(any_lshift:SWI1248_AVX512BW
+	  (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
+	  (match_operand 2 "const_int_operand")))]
+  "TARGET_AVX512F && reload_completed"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_lshift:SWI1248_AVX512BW
+	     (match_dup 1)
+	     (match_dup 2)))
+      (unspec [(const_int 0)] UNSPEC_MASKOP)])])
+
 (define_split
   [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
 	(any_lshift:SWI1248_AVX512BW
-- 
2.31.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/8] [APX NF]: Support APX NF add
  2024-05-15  7:43 ` [PATCH 1/8] [APX NF]: Support APX NF add Kong, Lingling
  2024-05-15  8:14   ` Uros Bizjak
@ 2024-05-15  8:46   ` Uros Bizjak
  2024-05-22  8:29     ` [PATCH v2 " Kong, Lingling
  1 sibling, 1 reply; 13+ messages in thread
From: Uros Bizjak @ 2024-05-15  8:46 UTC (permalink / raw)
  To: Kong, Lingling; +Cc: gcc-patches, Liu, Hongtao, Wang, Hongyu

On Wed, May 15, 2024 at 9:43 AM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> From: Hongyu Wang <hongyu.wang@intel.com>
>
> APX NF(no flags) feature implements suppresses the update of status flags for arithmetic operations.
>
> For NF add, it is not clear whether NF add can be faster than lea. If so, the pattern needs to be adjusted to prefer LEA generation.
>
> gcc/ChangeLog:
>
>         * config/i386/i386-opts.h (enum apx_features): Add nf
>         enumeration.
>         * config/i386/i386.h (TARGET_APX_NF): New.
>         * config/i386/i386.md (*add<mode>_1_nf): New define_insn.
>         * config/i386/i386.opt: Add apx_nf enumeration.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/apx-ndd.c: Fixed test.
>         * gcc.target/i386/apx-nf.c: New test.
>
> Co-authored-by: Lingling Kong <lingling.kong@intel.com>
>
> Bootstrapped and regtested on x86_64-linux-gnu. And Supported SPEC 2017 run normally on Intel software development emulator.
> Ok for trunk?
>
> ---
>  gcc/config/i386/i386-opts.h             |  3 +-
>  gcc/config/i386/i386.h                  |  1 +
>  gcc/config/i386/i386.md                 | 42 +++++++++++++++++++++++++
>  gcc/config/i386/i386.opt                |  3 ++
>  gcc/testsuite/gcc.target/i386/apx-ndd.c |  2 +-
>  gcc/testsuite/gcc.target/i386/apx-nf.c  |  6 ++++
>  6 files changed, 55 insertions(+), 2 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf.c
>
> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h index ef2825803b3..60176ce609f 100644
> --- a/gcc/config/i386/i386-opts.h
> +++ b/gcc/config/i386/i386-opts.h
> @@ -140,7 +140,8 @@ enum apx_features {
>    apx_push2pop2 = 1 << 1,
>    apx_ndd = 1 << 2,
>    apx_ppx = 1 << 3,
> -  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
> +  apx_nf = 1<< 4,
> +  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
>  };
>
>  #endif
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 529edff93a4..f20ae4726da 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see  #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)  #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)  #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
> +#define TARGET_APX_NF (ix86_apx_features & apx_nf)
>
>  #include "config/vxworks-dummy.h"
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 764bfe20ff2..4a9e35c4990 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -6233,6 +6233,48 @@
>      }
>  })
>
>
> +;; NF instructions.
> +
> +(define_insn "*add<mode>_1_nf"
> +  [(set (match_operand:SWI 0 "nonimmediate_operand" "=rm,rje,r,r,r,r,r,r")
> +       (plus:SWI
> +         (match_operand:SWI 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
> +         (match_operand:SWI 2 "x86_64_general_operand"
> +"r,e,BM,0,le,r,e,BM")))]
> +  "TARGET_APX_NF &&
> +   ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
> +                           TARGET_APX_NDD)"

I wonder if we can use "define_subst" to conditionally add flags
clobber for !TARGET_APX_NF targets. Even the example for "Define
Subst" uses the insn w/ and w/o the clobber, so I think it is worth
considering this approach.

Uros.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 1/8] [APX NF]: Support APX NF add
  2024-05-15  8:46   ` Uros Bizjak
@ 2024-05-22  8:29     ` Kong, Lingling
  2024-05-22  8:35       ` Uros Bizjak
  0 siblings, 1 reply; 13+ messages in thread
From: Kong, Lingling @ 2024-05-22  8:29 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Liu, Hongtao, Wang, Hongyu, Kong, Lingling

> I wonder if we can use "define_subst" to conditionally add flags clobber
> for !TARGET_APX_NF targets. Even the example for "Define Subst" uses the insn
> w/ and w/o the clobber, so I think it is worth considering this approach.
> 
> Uros.

Good Suggestion, I defined new subst for no flags, and Bootstrapped and regtested on x86_64-linux-gnu. Also supported SPEC 2017 run normally on Intel software development emulator.
Ok for trunk?

Thanks,
Lingling

Subject: [PATCH v2 1/8] [APX NF]: Support APX NF add
APX NF(no flags) feature implements suppresses the update of status flags
for arithmetic operations.

For NF add, it is not clear whether nf add can be faster than lea. If so,
the pattern needs to be adjusted to perfer lea generation.

gcc/ChangeLog:

	* config/i386/i386-opts.h (enum apx_features): Add nf
	enumeration.
	* config/i386/i386.h (TARGET_APX_NF): New.
	* config/i386/i386.md (nf_subst): New define_subst.
	(nf_name): New subst_attr.
	(nf_prefix): Ditto.
	(nf_condition): Ditto.
	(nf_mem_constraint): Ditto.
	(nf_applied): Ditto.
	(*add<mode>_1_nf): New define_insn.
	(addhi_1_nf): Ditto.
	(addqi_1_nf): Ditto.
	* config/i386/i386.opt: Add apx_nf enumeration.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-ndd.c: Fixed test.
	* gcc.target/i386/apx-nf.c: New test.

Co-authored-by: Lingling Kong <lingling.kong@intel.com>
---
 gcc/config/i386/i386-opts.h             |   3 +-
 gcc/config/i386/i386.h                  |   1 +
 gcc/config/i386/i386.md                 | 179 +++++++++++++++---------
 gcc/config/i386/i386.opt                |   3 +
 gcc/testsuite/gcc.target/i386/apx-ndd.c |   2 +-
 gcc/testsuite/gcc.target/i386/apx-nf.c  |   6 +
 6 files changed, 126 insertions(+), 68 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf.c

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index ef2825803b3..60176ce609f 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -140,7 +140,8 @@ enum apx_features {
   apx_push2pop2 = 1 << 1,
   apx_ndd = 1 << 2,
   apx_ppx = 1 << 3,
-  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
+  apx_nf = 1<< 4,
+  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
 };
 
 #endif
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 529edff93a4..f20ae4726da 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)
 #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
 #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
+#define TARGET_APX_NF (ix86_apx_features & apx_nf)
 
 #include "config/vxworks-dummy.h"
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 764bfe20ff2..bae344518bd 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6233,28 +6233,6 @@
     }
 })
 

-;; Load effective address instructions
-
-(define_insn "*lea<mode>"
-  [(set (match_operand:SWI48 0 "register_operand" "=r")
-	(match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
-  "ix86_hardreg_mov_ok (operands[0], operands[1])"
-{
-  if (SImode_address_operand (operands[1], VOIDmode))
-    {
-      gcc_assert (TARGET_64BIT);
-      return "lea{l}\t{%E1, %k0|%k0, %E1}";
-    }
-  else
-    return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
-}
-  [(set_attr "type" "lea")
-   (set (attr "mode")
-     (if_then_else
-       (match_operand 1 "SImode_address_operand")
-       (const_string "SI")
-       (const_string "<MODE>")))])
-
 (define_peephole2
   [(set (match_operand:SWI48 0 "register_operand")
 	(match_operand:SWI48 1 "address_no_seg_operand"))]
@@ -6290,6 +6268,13 @@
   [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
               (clobber (reg:CC FLAGS_REG))])]
   "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_split
+  [(set (match_operand:SWI48 0 "general_reg_operand")
+	(mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))]
+  "TARGET_APX_NF && reload_completed"
+  [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))]
+  "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
 

 ;; Add instructions
 
@@ -6437,48 +6422,65 @@
 	      (clobber (reg:CC FLAGS_REG))])]
  "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
 
-(define_insn "*add<mode>_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r")
+(define_subst_attr "nf_name" "nf_subst" "_nf" "")
+(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "")
+(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
+(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m")
+(define_subst_attr "nf_applied" "nf_subst" "true" "false")
+
+(define_subst "nf_subst"
+  [(set (match_operand:SWI 0)
+        (match_operand:SWI 1))]
+  ""
+  [(set (match_dup 0)
+	(match_dup 1))
+	(clobber (reg:CC FLAGS_REG))])
+
+(define_insn "*add<mode>_1<nf_name>"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,r,r")
 	(plus:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r")
-	  (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
+	  (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
+	  (match_operand:SWI48 2 "x86_64_general_operand" "r,e,BM,0,le,r,e,BM")))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
+  && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
-      return "#";
+      if (TARGET_APX_NDD && <nf_applied>)
+	return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
+      else
+	return "#";
 
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-        return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
-		      : "inc{<imodesuffix>}\t%0";
+        return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+		      : "<nf_prefix>inc{<imodesuffix>}\t%0";
       else
         {
 	  gcc_assert (operands[2] == constm1_rtx);
-	  return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
-			: "dec{<imodesuffix>}\t%0";
+	  return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+			: "<nf_prefix>dec{<imodesuffix>}\t%0";
 	}
 
     default:
       /* For most processors, ADD is faster than LEA.  This alternative
 	 was added to use ADD as much as possible.  */
-      if (which_alternative == 2)
+      if (which_alternative == 3)
         std::swap (operands[1], operands[2]);
         
       if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
-        return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
-		      : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+        return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		      : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}";
 
-      return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
-		    : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+      return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		    : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
+  [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
    (set (attr "type")
-     (cond [(eq_attr "alternative" "3")
+     (cond [(eq_attr "alternative" "4")
               (const_string "lea")
 	    (match_operand:SWI48 2 "incdec_operand")
 	      (const_string "incdec")
@@ -6491,6 +6493,28 @@
 	(const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+;; Load effective address instructions
+
+(define_insn "*lea<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
+  "ix86_hardreg_mov_ok (operands[0], operands[1])"
+{
+  if (SImode_address_operand (operands[1], VOIDmode))
+    {
+      gcc_assert (TARGET_64BIT);
+      return "lea{l}\t{%E1, %k0|%k0, %E1}";
+    }
+  else
+    return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
+}
+  [(set_attr "type" "lea")
+   (set (attr "mode")
+     (if_then_else
+       (match_operand 1 "SImode_address_operand")
+       (const_string "SI")
+       (const_string "<MODE>")))])
+
 ;; It may seem that nonimmediate operand is proper one for operand 1.
 ;; The addsi_1 pattern allows nonimmediate operand at that place and
 ;; we take care in ix86_binary_operator_ok to not allow two memory
@@ -6552,26 +6576,29 @@
 	(const_string "*")))
    (set_attr "mode" "SI")])
 
-(define_insn "*addhi_1"
+(define_insn "*addhi_1<nf_name>"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
 	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
-		 (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)"
+		 (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))]
+  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)
+  && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
-      return "#";
+      if (TARGET_APX_NDD && <nf_applied>)
+	return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}";
+      else
+	return "#";
 
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-	return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
+	return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}" : "<nf_prefix>inc{w}\t%0";
       else
 	{
 	  gcc_assert (operands[2] == constm1_rtx);
-	  return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
+	  return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}" : "<nf_prefix>dec{w}\t%0";
 	}
 
     default:
@@ -6581,11 +6608,11 @@
         std::swap (operands[1], operands[2]);
 
       if (x86_maybe_negate_const_int (&operands[2], HImode))
-	return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
-		       : "sub{w}\t{%2, %0|%0, %2}";
+	return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}"
+		       : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}";
 
-      return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
-		     : "add{w}\t{%2, %0|%0, %2}";
+      return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}"
+		     : "<nf_prefix>add{w}\t{%2, %0|%0, %2}";
     }
 }
   [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
@@ -6603,33 +6630,36 @@
 	(const_string "*")))
    (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
 
-(define_insn "*addqi_1"
+(define_insn "*addqi_1<nf_name>"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
 	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
-		 (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)"
+		 (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))]
+  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)
+  && <nf_condition>"
 {
   bool widen = (get_attr_mode (insn) != MODE_QI);
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
-      return "#";
+      if (TARGET_APX_NDD && <nf_applied>)
+	return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}";
+      else
+	return "#";
 
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
 	if (use_ndd)
-	  return "inc{b}\t{%1, %0|%0, %1}";
+	  return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}";
 	else
-	  return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+	  return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0";
       else
 	{
 	  gcc_assert (operands[2] == constm1_rtx);
 	  if (use_ndd)
-	    return "dec{b}\t{%1, %0|%0, %1}";
+	    return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}";
 	  else
-	    return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+	    return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0";
 	}
 
     default:
@@ -6641,16 +6671,16 @@
       if (x86_maybe_negate_const_int (&operands[2], QImode))
 	{
 	  if (use_ndd)
-	    return "sub{b}\t{%2, %1, %0|%0, %1, %2}";
+	    return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}";
 	  else
-	    return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
-			 : "sub{b}\t{%2, %0|%0, %2}";
+	    return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}"
+			 : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}";
 	}
       if (use_ndd)
-	return "add{b}\t{%2, %1, %0|%0, %1, %2}";
+	return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}";
       else
-	return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
-		     : "add{b}\t{%2, %0|%0, %2}";
+	return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}"
+		     : "<nf_prefix>add{b}\t{%2, %0|%0, %2}";
     }
 }
   [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
@@ -6824,6 +6854,23 @@
     }
 })
 
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(plus:SWI (match_operand:SWI 1 "register_operand")
+		  (match_operand:SWI 2 "<nonmemory_operand>")))]
+  "TARGET_APX_NF && reload_completed
+   && ix86_lea_for_add_ok (insn, operands)"
+  [(set (match_dup 0)
+	(plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
+{
+  if (<MODE>mode != <LEAMODE>mode)
+    {
+      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
+      operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
+    }
+})
+
 ;; Convert add to the lea pattern to avoid flags dependency.
 (define_split
   [(set (match_operand:DI 0 "register_operand")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index d5f793a9e8b..66021d59d4e 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1356,6 +1356,9 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
 EnumValue
 Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
 
+EnumValue
+Enum(apx_features) String(nf) Value(apx_nf) Set(6)
+
 EnumValue
 Enum(apx_features) String(all) Value(apx_all) Set(1)
 
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 0eb751ad225..0ff4df0780c 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-mapxf -march=x86-64 -O2" } */
+/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } */
 /* { dg-final { scan-assembler-not "movl"} } */
 
 #include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
new file mode 100644
index 00000000000..3adc7a27902
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 -O2" } */
+/* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */
+
+#include "apx-ndd.c"
+
-- 
2.31.1



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/8] [APX NF]: Support APX NF add
  2024-05-22  8:29     ` [PATCH v2 " Kong, Lingling
@ 2024-05-22  8:35       ` Uros Bizjak
  0 siblings, 0 replies; 13+ messages in thread
From: Uros Bizjak @ 2024-05-22  8:35 UTC (permalink / raw)
  To: Kong, Lingling; +Cc: gcc-patches, Liu, Hongtao, Wang, Hongyu

On Wed, May 22, 2024 at 10:29 AM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> > I wonder if we can use "define_subst" to conditionally add flags clobber
> > for !TARGET_APX_NF targets. Even the example for "Define Subst" uses the insn
> > w/ and w/o the clobber, so I think it is worth considering this approach.
> >
> > Uros.
>
> Good Suggestion, I defined new subst for no flags, and Bootstrapped and regtested on x86_64-linux-gnu. Also supported SPEC 2017 run normally on Intel software development emulator.
> Ok for trunk?
>
> Thanks,
> Lingling
>
> Subject: [PATCH v2 1/8] [APX NF]: Support APX NF add
> APX NF(no flags) feature implements suppresses the update of status flags
> for arithmetic operations.
>
> For NF add, it is not clear whether nf add can be faster than lea. If so,
> the pattern needs to be adjusted to perfer lea generation.
>
> gcc/ChangeLog:
>
>         * config/i386/i386-opts.h (enum apx_features): Add nf
>         enumeration.
>         * config/i386/i386.h (TARGET_APX_NF): New.
>         * config/i386/i386.md (nf_subst): New define_subst.
>         (nf_name): New subst_attr.
>         (nf_prefix): Ditto.
>         (nf_condition): Ditto.
>         (nf_mem_constraint): Ditto.
>         (nf_applied): Ditto.
>         (*add<mode>_1_nf): New define_insn.
>         (addhi_1_nf): Ditto.
>         (addqi_1_nf): Ditto.
>         * config/i386/i386.opt: Add apx_nf enumeration.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/apx-ndd.c: Fixed test.
>         * gcc.target/i386/apx-nf.c: New test.

LGTM, but I'll leave the final approval to Hongtao.

Thanks,
Uros.

>
> Co-authored-by: Lingling Kong <lingling.kong@intel.com>
> ---
>  gcc/config/i386/i386-opts.h             |   3 +-
>  gcc/config/i386/i386.h                  |   1 +
>  gcc/config/i386/i386.md                 | 179 +++++++++++++++---------
>  gcc/config/i386/i386.opt                |   3 +
>  gcc/testsuite/gcc.target/i386/apx-ndd.c |   2 +-
>  gcc/testsuite/gcc.target/i386/apx-nf.c  |   6 +
>  6 files changed, 126 insertions(+), 68 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf.c
>
> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> index ef2825803b3..60176ce609f 100644
> --- a/gcc/config/i386/i386-opts.h
> +++ b/gcc/config/i386/i386-opts.h
> @@ -140,7 +140,8 @@ enum apx_features {
>    apx_push2pop2 = 1 << 1,
>    apx_ndd = 1 << 2,
>    apx_ppx = 1 << 3,
> -  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
> +  apx_nf = 1<< 4,
> +  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
>  };
>
>  #endif
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 529edff93a4..f20ae4726da 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
>  #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)
>  #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
>  #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
> +#define TARGET_APX_NF (ix86_apx_features & apx_nf)
>
>  #include "config/vxworks-dummy.h"
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 764bfe20ff2..bae344518bd 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -6233,28 +6233,6 @@
>      }
>  })
>
>
> -;; Load effective address instructions
> -
> -(define_insn "*lea<mode>"
> -  [(set (match_operand:SWI48 0 "register_operand" "=r")
> -       (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
> -  "ix86_hardreg_mov_ok (operands[0], operands[1])"
> -{
> -  if (SImode_address_operand (operands[1], VOIDmode))
> -    {
> -      gcc_assert (TARGET_64BIT);
> -      return "lea{l}\t{%E1, %k0|%k0, %E1}";
> -    }
> -  else
> -    return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
> -}
> -  [(set_attr "type" "lea")
> -   (set (attr "mode")
> -     (if_then_else
> -       (match_operand 1 "SImode_address_operand")
> -       (const_string "SI")
> -       (const_string "<MODE>")))])
> -
>  (define_peephole2
>    [(set (match_operand:SWI48 0 "register_operand")
>         (match_operand:SWI48 1 "address_no_seg_operand"))]
> @@ -6290,6 +6268,13 @@
>    [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
>                (clobber (reg:CC FLAGS_REG))])]
>    "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
> +
> +(define_split
> +  [(set (match_operand:SWI48 0 "general_reg_operand")
> +       (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))]
> +  "TARGET_APX_NF && reload_completed"
> +  [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))]
> +  "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
>
>
>  ;; Add instructions
>
> @@ -6437,48 +6422,65 @@
>               (clobber (reg:CC FLAGS_REG))])]
>   "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
>
> -(define_insn "*add<mode>_1"
> -  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r")
> +(define_subst_attr "nf_name" "nf_subst" "_nf" "")
> +(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "")
> +(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
> +(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m")
> +(define_subst_attr "nf_applied" "nf_subst" "true" "false")
> +
> +(define_subst "nf_subst"
> +  [(set (match_operand:SWI 0)
> +        (match_operand:SWI 1))]
> +  ""
> +  [(set (match_dup 0)
> +       (match_dup 1))
> +       (clobber (reg:CC FLAGS_REG))])
> +
> +(define_insn "*add<mode>_1<nf_name>"
> +  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,r,r")
>         (plus:SWI48
> -         (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r")
> -         (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM")))
> -   (clobber (reg:CC FLAGS_REG))]
> -  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
> +         (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
> +         (match_operand:SWI48 2 "x86_64_general_operand" "r,e,BM,0,le,r,e,BM")))]
> +  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
> +  && <nf_condition>"
>  {
>    bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
>    switch (get_attr_type (insn))
>      {
>      case TYPE_LEA:
> -      return "#";
> +      if (TARGET_APX_NDD && <nf_applied>)
> +       return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "#";
>
>      case TYPE_INCDEC:
>        if (operands[2] == const1_rtx)
> -        return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
> -                     : "inc{<imodesuffix>}\t%0";
> +        return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}"
> +                     : "<nf_prefix>inc{<imodesuffix>}\t%0";
>        else
>          {
>           gcc_assert (operands[2] == constm1_rtx);
> -         return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
> -                       : "dec{<imodesuffix>}\t%0";
> +         return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}"
> +                       : "<nf_prefix>dec{<imodesuffix>}\t%0";
>         }
>
>      default:
>        /* For most processors, ADD is faster than LEA.  This alternative
>          was added to use ADD as much as possible.  */
> -      if (which_alternative == 2)
> +      if (which_alternative == 3)
>          std::swap (operands[1], operands[2]);
>
>        if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
> -        return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> -                     : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
> +        return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> +                     : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}";
>
> -      return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> -                   : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
> +      return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> +                   : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}";
>      }
>  }
> -  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
> +  [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
>     (set (attr "type")
> -     (cond [(eq_attr "alternative" "3")
> +     (cond [(eq_attr "alternative" "4")
>                (const_string "lea")
>             (match_operand:SWI48 2 "incdec_operand")
>               (const_string "incdec")
> @@ -6491,6 +6493,28 @@
>         (const_string "*")))
>     (set_attr "mode" "<MODE>")])
>
> +;; Load effective address instructions
> +
> +(define_insn "*lea<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
> +  "ix86_hardreg_mov_ok (operands[0], operands[1])"
> +{
> +  if (SImode_address_operand (operands[1], VOIDmode))
> +    {
> +      gcc_assert (TARGET_64BIT);
> +      return "lea{l}\t{%E1, %k0|%k0, %E1}";
> +    }
> +  else
> +    return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
> +}
> +  [(set_attr "type" "lea")
> +   (set (attr "mode")
> +     (if_then_else
> +       (match_operand 1 "SImode_address_operand")
> +       (const_string "SI")
> +       (const_string "<MODE>")))])
> +
>  ;; It may seem that nonimmediate operand is proper one for operand 1.
>  ;; The addsi_1 pattern allows nonimmediate operand at that place and
>  ;; we take care in ix86_binary_operator_ok to not allow two memory
> @@ -6552,26 +6576,29 @@
>         (const_string "*")))
>     (set_attr "mode" "SI")])
>
> -(define_insn "*addhi_1"
> +(define_insn "*addhi_1<nf_name>"
>    [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
>         (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
> -                (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
> -   (clobber (reg:CC FLAGS_REG))]
> -  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)"
> +                (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))]
> +  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)
> +  && <nf_condition>"
>  {
>    bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
>    switch (get_attr_type (insn))
>      {
>      case TYPE_LEA:
> -      return "#";
> +      if (TARGET_APX_NDD && <nf_applied>)
> +       return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "#";
>
>      case TYPE_INCDEC:
>        if (operands[2] == const1_rtx)
> -       return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
> +       return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}" : "<nf_prefix>inc{w}\t%0";
>        else
>         {
>           gcc_assert (operands[2] == constm1_rtx);
> -         return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
> +         return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}" : "<nf_prefix>dec{w}\t%0";
>         }
>
>      default:
> @@ -6581,11 +6608,11 @@
>          std::swap (operands[1], operands[2]);
>
>        if (x86_maybe_negate_const_int (&operands[2], HImode))
> -       return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
> -                      : "sub{w}\t{%2, %0|%0, %2}";
> +       return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}"
> +                      : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}";
>
> -      return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
> -                    : "add{w}\t{%2, %0|%0, %2}";
> +      return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}"
> +                    : "<nf_prefix>add{w}\t{%2, %0|%0, %2}";
>      }
>  }
>    [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
> @@ -6603,33 +6630,36 @@
>         (const_string "*")))
>     (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
>
> -(define_insn "*addqi_1"
> +(define_insn "*addqi_1<nf_name>"
>    [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
>         (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
> -                (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))
> -   (clobber (reg:CC FLAGS_REG))]
> -  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)"
> +                (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))]
> +  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)
> +  && <nf_condition>"
>  {
>    bool widen = (get_attr_mode (insn) != MODE_QI);
>    bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
>    switch (get_attr_type (insn))
>      {
>      case TYPE_LEA:
> -      return "#";
> +      if (TARGET_APX_NDD && <nf_applied>)
> +       return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "#";
>
>      case TYPE_INCDEC:
>        if (operands[2] == const1_rtx)
>         if (use_ndd)
> -         return "inc{b}\t{%1, %0|%0, %1}";
> +         return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}";
>         else
> -         return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
> +         return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0";
>        else
>         {
>           gcc_assert (operands[2] == constm1_rtx);
>           if (use_ndd)
> -           return "dec{b}\t{%1, %0|%0, %1}";
> +           return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}";
>           else
> -           return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
> +           return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0";
>         }
>
>      default:
> @@ -6641,16 +6671,16 @@
>        if (x86_maybe_negate_const_int (&operands[2], QImode))
>         {
>           if (use_ndd)
> -           return "sub{b}\t{%2, %1, %0|%0, %1, %2}";
> +           return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}";
>           else
> -           return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
> -                        : "sub{b}\t{%2, %0|%0, %2}";
> +           return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}"
> +                        : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}";
>         }
>        if (use_ndd)
> -       return "add{b}\t{%2, %1, %0|%0, %1, %2}";
> +       return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}";
>        else
> -       return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
> -                    : "add{b}\t{%2, %0|%0, %2}";
> +       return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}"
> +                    : "<nf_prefix>add{b}\t{%2, %0|%0, %2}";
>      }
>  }
>    [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
> @@ -6824,6 +6854,23 @@
>      }
>  })
>
> +(define_split
> +  [(set (match_operand:SWI 0 "register_operand")
> +       (plus:SWI (match_operand:SWI 1 "register_operand")
> +                 (match_operand:SWI 2 "<nonmemory_operand>")))]
> +  "TARGET_APX_NF && reload_completed
> +   && ix86_lea_for_add_ok (insn, operands)"
> +  [(set (match_dup 0)
> +       (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
> +{
> +  if (<MODE>mode != <LEAMODE>mode)
> +    {
> +      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
> +      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
> +      operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
> +    }
> +})
> +
>  ;; Convert add to the lea pattern to avoid flags dependency.
>  (define_split
>    [(set (match_operand:DI 0 "register_operand")
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index d5f793a9e8b..66021d59d4e 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1356,6 +1356,9 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
>  EnumValue
>  Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
>
> +EnumValue
> +Enum(apx_features) String(nf) Value(apx_nf) Set(6)
> +
>  EnumValue
>  Enum(apx_features) String(all) Value(apx_all) Set(1)
>
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> index 0eb751ad225..0ff4df0780c 100644
> --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
> +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { ! ia32 } } } */
> -/* { dg-options "-mapxf -march=x86-64 -O2" } */
> +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } */
>  /* { dg-final { scan-assembler-not "movl"} } */
>
>  #include <stdint.h>
> diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
> new file mode 100644
> index 00000000000..3adc7a27902
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 -O2" } */
> +/* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */
> +
> +#include "apx-ndd.c"
> +
> --
> 2.31.1
>
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2024-05-22  8:35 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20240515070226.3760873-1-lingling.kong@intel.com>
2024-05-15  7:43 ` [PATCH 1/8] [APX NF]: Support APX NF add Kong, Lingling
2024-05-15  8:14   ` Uros Bizjak
2024-05-15  8:36     ` Kong, Lingling
2024-05-15  8:46   ` Uros Bizjak
2024-05-22  8:29     ` [PATCH v2 " Kong, Lingling
2024-05-22  8:35       ` Uros Bizjak
     [not found] ` <20240515070226.3760873-2-lingling.kong@intel.com>
2024-05-15  7:44   ` [PATCH 2/8] [APX NF] Support APX NF for {sub/and/or/xor/neg} Kong, Lingling
     [not found] ` <20240515070226.3760873-3-lingling.kong@intel.com>
2024-05-15  7:44   ` [PATCH 3/8] [APX NF] Support APX NF for left shift insns Kong, Lingling
     [not found] ` <20240515070226.3760873-4-lingling.kong@intel.com>
2024-05-15  7:45   ` [PATCH 4/8] [APX NF] Support APX NF for right " Kong, Lingling
     [not found] ` <20240515070226.3760873-5-lingling.kong@intel.com>
2024-05-15  7:45   ` [PATCH 5/8] [APX NF] Support APX NF for rotate insns Kong, Lingling
     [not found] ` <20240515070226.3760873-6-lingling.kong@intel.com>
2024-05-15  7:46   ` [PATCH 6/8] [APX NF] Support APX NF for shld/shrd Kong, Lingling
     [not found] ` <20240515070226.3760873-7-lingling.kong@intel.com>
2024-05-15  7:46   ` [PATCH 7/8] [APX NF] Support APX NF for mul/div Kong, Lingling
     [not found] ` <20240515070226.3760873-8-lingling.kong@intel.com>
2024-05-15  7:47   ` [PATCH 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt Kong, Lingling

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).