* [PATCH v3 1/8] [APX NF]: Support APX NF add
[not found] <20240529023704.3728291-1-lingling.kong@intel.com>
@ 2024-05-29 5:10 ` Kong, Lingling
2024-06-03 1:08 ` Hongtao Liu
[not found] ` <20240529023704.3728291-2-lingling.kong@intel.com>
` (6 subsequent siblings)
7 siblings, 1 reply; 9+ messages in thread
From: Kong, Lingling @ 2024-05-29 5:10 UTC (permalink / raw)
To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak
Hi, compared with v2, these patches restored the original lea patten position and addressed hongtao's comment.
APX NF(no flags) feature implements suppresses the update of status flags
for arithmetic operations.
For NF add, it is not clear whether nf add can be faster than lea. If so,
the pattern needs to be adjusted to perfer lea generation.
gcc/ChangeLog:
* config/i386/i386-opts.h (enum apx_features): Add nf
enumeration.
* config/i386/i386.h (TARGET_APX_NF): New.
* config/i386/i386.md (*add<mode>_1_nf): New define_insn.
* config/i386/i386.opt: Add apx_nf enumeration.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Fixed test.
Co-authored-by: Lingling Kong <lingling.kong@intel.com>
---
gcc/config/i386/i386-opts.h | 3 +-
gcc/config/i386/i386.h | 1 +
gcc/config/i386/i386.md | 135 ++++++++++++++++--------
gcc/config/i386/i386.opt | 3 +
gcc/testsuite/gcc.target/i386/apx-ndd.c | 2 +-
5 files changed, 98 insertions(+), 46 deletions(-)
diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index ef2825803b3..60176ce609f 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -140,7 +140,8 @@ enum apx_features {
apx_push2pop2 = 1 << 1,
apx_ndd = 1 << 2,
apx_ppx = 1 << 3,
- apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
+ apx_nf = 1<< 4,
+ apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
};
#endif
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 359a8408263..969391d3013 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)
#define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
#define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
+#define TARGET_APX_NF (ix86_apx_features & apx_nf)
#include "config/vxworks-dummy.h"
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e8073f5a200..1eeadaddeba 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6290,6 +6290,13 @@
[(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
(clobber (reg:CC FLAGS_REG))])]
"operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_split
+ [(set (match_operand:SWI48 0 "general_reg_operand")
+ (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))]
+ "TARGET_APX_NF && reload_completed"
+ [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))]
+ "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
;; Add instructions
@@ -6437,48 +6444,65 @@
(clobber (reg:CC FLAGS_REG))])]
"split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
-(define_insn "*add<mode>_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r")
+(define_subst_attr "nf_name" "nf_subst" "_nf" "")
+(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "")
+(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
+(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m")
+(define_subst_attr "nf_applied" "nf_subst" "true" "false")
+
+(define_subst "nf_subst"
+ [(set (match_operand:SWI 0)
+ (match_operand:SWI 1))]
+ ""
+ [(set (match_dup 0)
+ (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])
+
+(define_insn "*add<mode>_1<nf_name>"
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,r,r")
(plus:SWI48
- (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r")
- (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
+ (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
+ (match_operand:SWI48 2 "x86_64_general_operand" "r,e,BM,0,le,r,e,BM")))]
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_LEA:
- return "#";
+ if (TARGET_APX_NDD && <nf_applied>)
+ return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "#";
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
- return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
- : "inc{<imodesuffix>}\t%0";
+ return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "<nf_prefix>inc{<imodesuffix>}\t%0";
else
{
gcc_assert (operands[2] == constm1_rtx);
- return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
- : "dec{<imodesuffix>}\t%0";
+ return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "<nf_prefix>dec{<imodesuffix>}\t%0";
}
default:
/* For most processors, ADD is faster than LEA. This alternative
was added to use ADD as much as possible. */
- if (which_alternative == 2)
+ if (which_alternative == 3)
std::swap (operands[1], operands[2]);
if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
- return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
- : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}";
- return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
- : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
+ [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
(set (attr "type")
- (cond [(eq_attr "alternative" "3")
+ (cond [(eq_attr "alternative" "4")
(const_string "lea")
(match_operand:SWI48 2 "incdec_operand")
(const_string "incdec")
@@ -6552,26 +6576,29 @@
(const_string "*")))
(set_attr "mode" "SI")])
-(define_insn "*addhi_1"
+(define_insn "*addhi_1<nf_name>"
[(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
- (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)"
+ (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))]
+ "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_LEA:
- return "#";
+ if (TARGET_APX_NDD && <nf_applied>)
+ return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "#";
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
- return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
+ return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}" : "<nf_prefix>inc{w}\t%0";
else
{
gcc_assert (operands[2] == constm1_rtx);
- return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
+ return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}" : "<nf_prefix>dec{w}\t%0";
}
default:
@@ -6581,11 +6608,11 @@
std::swap (operands[1], operands[2]);
if (x86_maybe_negate_const_int (&operands[2], HImode))
- return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
- : "sub{w}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}";
- return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
- : "add{w}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix>add{w}\t{%2, %0|%0, %2}";
}
}
[(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
@@ -6603,33 +6630,36 @@
(const_string "*")))
(set_attr "mode" "HI,HI,HI,SI,HI,HI")])
-(define_insn "*addqi_1"
+(define_insn "*addqi_1<nf_name>"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
- (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))]
+ "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool widen = (get_attr_mode (insn) != MODE_QI);
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_LEA:
- return "#";
+ if (TARGET_APX_NDD && <nf_applied>)
+ return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "#";
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
if (use_ndd)
- return "inc{b}\t{%1, %0|%0, %1}";
+ return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}";
else
- return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+ return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0";
else
{
gcc_assert (operands[2] == constm1_rtx);
if (use_ndd)
- return "dec{b}\t{%1, %0|%0, %1}";
+ return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}";
else
- return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+ return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0";
}
default:
@@ -6641,16 +6671,16 @@
if (x86_maybe_negate_const_int (&operands[2], QImode))
{
if (use_ndd)
- return "sub{b}\t{%2, %1, %0|%0, %1, %2}";
+ return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}";
else
- return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
- : "sub{b}\t{%2, %0|%0, %2}";
+ return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}"
+ : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}";
}
if (use_ndd)
- return "add{b}\t{%2, %1, %0|%0, %1, %2}";
+ return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}";
else
- return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
- : "add{b}\t{%2, %0|%0, %2}";
+ return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}"
+ : "<nf_prefix>add{b}\t{%2, %0|%0, %2}";
}
}
[(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
@@ -6824,6 +6854,23 @@
}
})
+(define_split
+ [(set (match_operand:SWI 0 "register_operand")
+ (plus:SWI (match_operand:SWI 1 "register_operand")
+ (match_operand:SWI 2 "<nonmemory_operand>")))]
+ "TARGET_APX_NF && reload_completed
+ && ix86_lea_for_add_ok (insn, operands)"
+ [(set (match_dup 0)
+ (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
+{
+ if (<MODE>mode != <LEAMODE>mode)
+ {
+ operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+ operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
+ operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
+ }
+})
+
;; Convert add to the lea pattern to avoid flags dependency.
(define_split
[(set (match_operand:DI 0 "register_operand")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 7151fb1b147..b6f28a2b4bd 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1336,6 +1336,9 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
EnumValue
Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
+EnumValue
+Enum(apx_features) String(nf) Value(apx_nf) Set(6)
+
EnumValue
Enum(apx_features) String(all) Value(apx_all) Set(1)
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 0eb751ad225..0ff4df0780c 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-mapxf -march=x86-64 -O2" } */
+/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } */
/* { dg-final { scan-assembler-not "movl"} } */
#include <stdint.h>
--
2.31.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 2/8] [APX NF] Support APX NF for {sub/and/or/xor/neg}
[not found] ` <20240529023704.3728291-2-lingling.kong@intel.com>
@ 2024-05-29 5:10 ` Kong, Lingling
0 siblings, 0 replies; 9+ messages in thread
From: Kong, Lingling @ 2024-05-29 5:10 UTC (permalink / raw)
To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak
gcc/ChangeLog:
* config/i386/i386.md (nf_nonf_attr): New subst_attr.
(nf_nonf_x64_attr): Ditto.
(*sub<mode>_1_nf): New define_insn.
(*anddi_1_nf): Ditto.
(*and<mode>_1_nf): Ditto.
(*<code>qi_1_nf): Ditto.
(*<code><mode>_1_nf): Ditto.
(*neg<mode>_1_nf): Ditto.
* config/i386/sse.md : New define_split.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-nf.c: Add test.
---
gcc/config/i386/i386.md | 173 +++++++++++++------------
gcc/config/i386/sse.md | 11 ++
gcc/testsuite/gcc.target/i386/apx-nf.c | 12 ++
3 files changed, 114 insertions(+), 82 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf.c
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1eeadaddeba..d3cb224abad 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -575,7 +575,7 @@
noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
- vaes_avx512vl"
+ vaes_avx512vl,noapx_nf"
(const_string "base"))
;; The (bounding maximum) length of an instruction immediate.
@@ -981,6 +981,7 @@
(symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
(eq_attr "mmx_isa" "avx")
(symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
+ (eq_attr "isa" "noapx_nf") (symbol_ref "!TARGET_APX_NF")
]
(const_int 1)))
@@ -6449,6 +6450,8 @@
(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m")
(define_subst_attr "nf_applied" "nf_subst" "true" "false")
+(define_subst_attr "nf_nonf_attr" "nf_subst" "noapx_nf" "*")
+(define_subst_attr "nf_nonf_x64_attr" "nf_subst" "noapx_nf" "x64")
(define_subst "nf_subst"
[(set (match_operand:SWI 0)
@@ -7893,20 +7896,21 @@
"split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
-(define_insn "*sub<mode>_1"
- [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r,r")
+(define_insn "*sub<mode>_1<nf_name>"
+ [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r<nf_mem_constraint>,<r>,r,r,r")
(minus:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,rjM,r")
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r,<i>,<m>")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
+ (match_operand:SWI 1 "nonimmediate_operand" "0,0,0,rm,rjM,r")
+ (match_operand:SWI 2 "<general_operand>" "<r>,<i>,<m>,r,<i>,<m>")))]
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
"@
- sub{<imodesuffix>}\t{%2, %0|%0, %2}
- sub{<imodesuffix>}\t{%2, %0|%0, %2}
- sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd")
+ <nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}
+ <nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}
+ <nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}
+ <nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
@@ -11795,27 +11799,28 @@
}
[(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
-(define_insn "*anddi_1"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,r,?k")
+(define_insn "*anddi_1<nf_name>"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r<nf_mem_constraint>,r,r,r,r,r,?k")
(and:DI
- (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,rm,rjM,r,qm,k")
- (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,re,m,r,e,m,L,k")))
- (clobber (reg:CC FLAGS_REG))]
+ (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,0,rm,rjM,r,qm,k")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,r,e,m,r,e,m,L,k")))]
"TARGET_64BIT
- && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)"
+ && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
"@
- and{l}\t{%k2, %k0|%k0, %k2}
- and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
- and{q}\t{%2, %0|%0, %2}
- and{q}\t{%2, %0|%0, %2}
- and{q}\t{%2, %1, %0|%0, %1, %2}
- and{q}\t{%2, %1, %0|%0, %1, %2}
- and{q}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix>and{l}\t{%k2, %k0|%k0, %k2}
+ <nf_prefix>and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
+ <nf_prefix>and{q}\t{%2, %0|%0, %2}
+ <nf_prefix>and{q}\t{%2, %0|%0, %2}
+ <nf_prefix>and{q}\t{%2, %0|%0, %2}
+ <nf_prefix>and{q}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix>and{q}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix>and{q}\t{%2, %1, %0|%0, %1, %2}
#
#"
- [(set_attr "isa" "x64,apx_ndd,x64,x64,apx_ndd,apx_ndd,apx_ndd,x64,avx512bw")
- (set_attr "type" "alu,alu,alu,alu,alu,alu,alu,imovx,msklog")
- (set_attr "length_immediate" "*,*,*,*,*,*,*,0,*")
+ [(set_attr "isa" "x64,apx_ndd,x64,x64,x64,apx_ndd,apx_ndd,apx_ndd,<nf_nonf_x64_attr>,avx512bw")
+ (set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,imovx,msklog")
+ (set_attr "length_immediate" "*,*,*,*,*,*,*,*,0,*")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
@@ -11823,7 +11828,7 @@
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "SI,SI,DI,DI,DI,DI,DI,SI,DI")])
+ (set_attr "mode" "SI,SI,DI,DI,DI,DI,DI,DI,SI,DI")])
(define_insn_and_split "*anddi_1_btr"
[(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
@@ -11894,31 +11899,34 @@
(set_attr "isa" "*,apx_ndd,apx_ndd,apx_ndd")
(set_attr "mode" "SI")])
-(define_insn "*and<mode>_1"
- [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,r,r,r,Ya,?k")
- (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,rm,rjM,r,qm,k")
- (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,r,<i>,<m>,L,k")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)"
+(define_insn "*and<mode>_1<nf_name>"
+ [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,Ya,?k")
+ (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,0,rm,rjM,r,qm,k")
+ (match_operand:SWI24 2 "<general_operand>" "r,<i>,<m>,r,<i>,<m>,L,k")))]
+ "ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
"@
- and{<imodesuffix>}\t{%2, %0|%0, %2}
- and{<imodesuffix>}\t{%2, %0|%0, %2}
- and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix>and{<imodesuffix>}\t{%2, %0|%0, %2}
+ <nf_prefix>and{<imodesuffix>}\t{%2, %0|%0, %2}
+ <nf_prefix>and{<imodesuffix>}\t{%2, %0|%0, %2}
+ <nf_prefix>and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix>and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix>and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
#
#"
[(set (attr "isa")
- (cond [(eq_attr "alternative" "2,3,4")
+ (cond [(eq_attr "alternative" "3,4,5")
(const_string "apx_ndd")
(eq_attr "alternative" "6")
+ (const_string "<nf_nonf_attr>")
+ (eq_attr "alternative" "7")
(if_then_else (eq_attr "mode" "SI")
(const_string "avx512bw")
(const_string "avx512f"))
]
(const_string "*")))
- (set_attr "type" "alu,alu,alu,alu,alu,imovx,msklog")
- (set_attr "length_immediate" "*,*,*,*,*,0,*")
+ (set_attr "type" "alu,alu,alu,alu,alu,alu,imovx,msklog")
+ (set_attr "length_immediate" "*,*,*,*,*,*,0,*")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
@@ -11926,20 +11934,20 @@
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")])
+ (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")])
-(define_insn "*andqi_1"
+(define_insn "*andqi_1<nf_name>"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
- (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))]
+ "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
"@
- and{b}\t{%2, %0|%0, %2}
- and{b}\t{%2, %0|%0, %2}
- and{l}\t{%k2, %k0|%k0, %k2}
- and{b}\t{%2, %1, %0|%0, %1, %2}
- and{b}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix>and{b}\t{%2, %0|%0, %2}
+ <nf_prefix>and{b}\t{%2, %0|%0, %2}
+ <nf_prefix>and{l}\t{%k2, %k0|%k0, %k2}
+ <nf_prefix>and{b}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix>and{b}\t{%2, %1, %0|%0, %1, %2}
#"
[(set_attr "type" "alu,alu,alu,alu,alu,msklog")
(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*")
@@ -12802,22 +12810,23 @@
}
[(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
-(define_insn "*<code><mode>_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,r,?k")
+(define_insn "*<code><mode>_1<nf_name>"
+ [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,?k")
(any_or:SWI248
- (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,rjM,r,k")
- (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r,<i>,<m>,k")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
+ (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,0,rm,rjM,r,k")
+ (match_operand:SWI248 2 "<general_operand>" "r,<i>,<m>,r,<i>,<m>,k")))]
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
"@
- <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
- <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
- <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix><logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ <nf_prefix><logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ <nf_prefix><logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ <nf_prefix><logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix><logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix><logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
#"
- [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,<kmov_isa>")
- (set_attr "type" "alu, alu, alu, alu, alu, msklog")
+ [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd,<kmov_isa>")
+ (set_attr "type" "alu,alu, alu, alu, alu, alu, msklog")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*notxor<mode>_1"
@@ -12963,18 +12972,18 @@
(set_attr "isa" "*,apx_ndd")
(set_attr "mode" "SI")])
-(define_insn "*<code>qi_1"
+(define_insn "*<code>qi_1<nf_name>"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
(any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
- (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, QImode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))]
+ "ix86_binary_operator_ok (<CODE>, QImode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
"@
- <logic>{b}\t{%2, %0|%0, %2}
- <logic>{b}\t{%2, %0|%0, %2}
- <logic>{l}\t{%k2, %k0|%k0, %k2}
- <logic>{b}\t{%2, %1, %0|%0, %1, %2}
- <logic>{b}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix><logic>{b}\t{%2, %0|%0, %2}
+ <nf_prefix><logic>{b}\t{%2, %0|%0, %2}
+ <nf_prefix><logic>{l}\t{%k2, %k0|%k0, %k2}
+ <nf_prefix><logic>{b}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix><logic>{b}\t{%2, %1, %0|%0, %1, %2}
#"
[(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
(set_attr "type" "alu,alu,alu,alu,alu,msklog")
@@ -13534,14 +13543,14 @@
(const_int 0)))
(clobber (reg:CC FLAGS_REG))])])
-(define_insn "*neg<mode>_1"
+(define_insn "*neg<mode>_1<nf_name>"
[(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
- (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD)"
+ (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")))]
+ "ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
"@
- neg{<imodesuffix>}\t%0
- neg{<imodesuffix>}\t{%1, %0|%0, %1}"
+ <nf_prefix>neg{<imodesuffix>}\t%0
+ <nf_prefix>neg{<imodesuffix>}\t{%1, %0|%0, %1}"
[(set_attr "type" "negnot")
(set_attr "isa" "*,apx_ndd")
(set_attr "mode" "<MODE>")])
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0f4fbcb2c5d..cdf11a68bc5 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1986,6 +1986,17 @@
]
(const_string "<MODE>")))])
+(define_split
+ [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
+ (any_logic:SWI1248_AVX512BW
+ (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
+ (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))]
+ "TARGET_AVX512F && reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (any_logic:SWI1248_AVX512BW (match_dup 1) (match_dup 2)))
+ (unspec [(const_int 0)] UNSPEC_MASKOP)])])
+
(define_split
[(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
(any_logic:SWI1248_AVX512BW
diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
new file mode 100644
index 00000000000..f33a994f0b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 -O2" } */
+/* { dg-final { scan-assembler-times "\{nf\} and" 1 } } */
+/* { dg-final { scan-assembler-times "\{nf\} or" 1 } } */
+
+struct B { unsigned bit0 : 1; unsigned bit1 : 1; };
+
+void
+foo (struct B *b)
+{
+ b->bit0 = b->bit0 | b->bit1;
+}
--
2.31.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 3/8] [APX NF] Support APX NF for left shift insns
[not found] ` <20240529023704.3728291-3-lingling.kong@intel.com>
@ 2024-05-29 5:10 ` Kong, Lingling
0 siblings, 0 replies; 9+ messages in thread
From: Kong, Lingling @ 2024-05-29 5:10 UTC (permalink / raw)
To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak
gcc/ChangeLog:
* config/i386/i386.md (*ashl<mode>3_1_nf): New.
(*ashlhi3_1_nf): Ditto.
(*ashlqi3_1_nf): Ditto.
* config/i386/sse.md: New define_split.
---
gcc/config/i386/i386.md | 96 ++++++++++++++++++++++++++++++-----------
gcc/config/i386/sse.md | 13 ++++++
2 files changed, 83 insertions(+), 26 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d3cb224abad..4c06c243cc3 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15011,17 +15011,22 @@
[(set_attr "type" "ishiftx")
(set_attr "mode" "<MODE>")])
-(define_insn "*ashl<mode>3_1"
+(define_insn "*ashl<mode>3_1<nf_name>"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm")
- (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))]
+ "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_LEA:
+ if (TARGET_APX_NDD && <nf_applied>)
+ return "%{nf%} sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "#";
+
case TYPE_ISHIFTX:
case TYPE_MSKLOG:
return "#";
@@ -15029,7 +15034,7 @@
case TYPE_ALU:
gcc_assert (operands[2] == const1_rtx);
gcc_assert (rtx_equal_p (operands[0], operands[1]));
- return "add{<imodesuffix>}\t%0, %0";
+ return "<nf_prefix>add{<imodesuffix>}\t%0, %0";
default:
if (operands[2] == const1_rtx
@@ -15037,11 +15042,11 @@
/* For NDD form instructions related to TARGET_SHIFT1, the $1
immediate do not need to be omitted as assembler will map it
to use shorter encoding. */
- && !use_ndd)
+ && !use_ndd && !<nf_applied>)
return "sal{<imodesuffix>}\t%0";
else
- return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
- : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix>sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix>sal{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
[(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd")
@@ -15072,6 +15077,17 @@
(set_attr "mode" "<MODE>")])
;; Convert shift to the shiftx pattern to avoid flags dependency.
+;; For NF/NDD doesn't support shift count as r, it just support c<S>,
+;; and it has no flag.
+(define_split
+ [(set (match_operand:SWI48 0 "register_operand")
+ (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+ (match_operand:QI 2 "register_operand")))]
+ "TARGET_BMI2 && reload_completed"
+ [(set (match_dup 0)
+ (ashift:SWI48 (match_dup 1) (match_dup 2)))]
+ "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
(define_split
[(set (match_operand:SWI48 0 "register_operand")
(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
@@ -15158,32 +15174,37 @@
(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
"operands[2] = gen_lowpart (SImode, operands[2]);")
-(define_insn "*ashlhi3_1"
+(define_insn "*ashlhi3_1<nf_name>"
[(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
- (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))]
+ "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_LEA:
+ if (TARGET_APX_NDD && <nf_applied>)
+ return "%{nf%} sal{w}\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "#";
+
case TYPE_MSKLOG:
return "#";
case TYPE_ALU:
gcc_assert (operands[2] == const1_rtx);
- return "add{w}\t%0, %0";
+ return "<nf_prefix>add{w}\t%0, %0";
default:
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !<nf_applied>)
return "sal{w}\t%0";
else
- return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}"
- : "sal{w}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix>sal{w}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix>sal{w}\t{%2, %0|%0, %2}";
}
}
[(set_attr "isa" "*,*,avx512f,apx_ndd")
@@ -15211,31 +15232,36 @@
(const_string "*")))
(set_attr "mode" "HI,SI,HI,HI")])
-(define_insn "*ashlqi3_1"
+(define_insn "*ashlqi3_1<nf_name>"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
- (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))]
+ "ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_LEA:
+ if (TARGET_APX_NDD && <nf_applied>)
+ return "%{nf%} sal{b}\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "#";
+
case TYPE_MSKLOG:
return "#";
case TYPE_ALU:
gcc_assert (operands[2] == const1_rtx);
if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1])))
- return "add{l}\t%k0, %k0";
+ return "<nf_prefix>add{l}\t%k0, %k0";
else
- return "add{b}\t%0, %0";
+ return "<nf_prefix>add{b}\t%0, %0";
default:
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !<nf_applied>)
{
if (get_attr_mode (insn) == MODE_SI)
return "sal{l}\t%k0";
@@ -15245,10 +15271,10 @@
else
{
if (get_attr_mode (insn) == MODE_SI)
- return "sal{l}\t{%2, %k0|%k0, %2}";
+ return "<nf_prefix>sal{l}\t{%2, %k0|%k0, %2}";
else
- return use_ndd ? "sal{b}\t{%2, %1, %0|%0, %1, %2}"
- : "sal{b}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix>sal{b}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix>sal{b}\t{%2, %0|%0, %2}";
}
}
}
@@ -15351,6 +15377,24 @@
operands[2] = GEN_INT (1 << INTVAL (operands[2]));
})
+(define_split
+ [(set (match_operand:SWI 0 "general_reg_operand")
+ (ashift:SWI (match_operand:SWI 1 "index_reg_operand")
+ (match_operand 2 "const_0_to_3_operand")))]
+ "reload_completed
+ && REGNO (operands[0]) != REGNO (operands[1])
+ && !TARGET_APX_NDD"
+ [(set (match_dup 0)
+ (mult:<LEAMODE> (match_dup 1) (match_dup 2)))]
+{
+ if (<MODE>mode != <LEAMODE>mode)
+ {
+ operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+ operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
+ }
+ operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+})
+
;; Convert ashift to the lea pattern to avoid flags dependency.
(define_split
[(set (match_operand:DI 0 "general_reg_operand")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index cdf11a68bc5..556401ce379 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2150,6 +2150,19 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
+(define_split
+ [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
+ (any_lshift:SWI1248_AVX512BW
+ (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
+ (match_operand 2 "const_int_operand")))]
+ "TARGET_AVX512F && reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (any_lshift:SWI1248_AVX512BW
+ (match_dup 1)
+ (match_dup 2)))
+ (unspec [(const_int 0)] UNSPEC_MASKOP)])])
+
(define_split
[(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
(any_lshift:SWI1248_AVX512BW
--
2.31.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 4/8] [APX NF] Support APX NF for right shift insns
[not found] ` <20240529023704.3728291-4-lingling.kong@intel.com>
@ 2024-05-29 5:10 ` Kong, Lingling
0 siblings, 0 replies; 9+ messages in thread
From: Kong, Lingling @ 2024-05-29 5:10 UTC (permalink / raw)
To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak
gcc/ChangeLog:
* config/i386/i386.md (*ashr<mode>3_1_nf): New.
(*lshr<mode>3_1_nf): Ditto.
(*lshrqi3_1_nf): Ditto.
(*lshrhi3_1_nf): Ditto.
---
gcc/config/i386/i386.md | 82 +++++++++++++++++++++++------------------
1 file changed, 46 insertions(+), 36 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4c06c243cc3..d10caf04fcc 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16323,13 +16323,13 @@
[(set_attr "type" "ishiftx")
(set_attr "mode" "<MODE>")])
-(define_insn "*ashr<mode>3_1"
+(define_insn "*ashr<mode>3_1<nf_name>"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
(ashiftrt:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
- (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))]
+ "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
@@ -16340,11 +16340,11 @@
default:
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !<nf_applied>)
return "sar{<imodesuffix>}\t%0";
else
- return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
- : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
[(set_attr "isa" "*,bmi2,apx_ndd")
@@ -16384,14 +16384,13 @@
}
[(set_attr "isa" "*,*,*,apx_ndd")])
-
-(define_insn "*lshr<mode>3_1"
+(define_insn "*lshr<mode>3_1<nf_name>"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r")
(lshiftrt:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm")
- (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))]
+ "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
@@ -16403,11 +16402,11 @@
default:
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !<nf_applied>)
return "shr{<imodesuffix>}\t%0";
else
- return use_ndd ? "shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
- : "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix>shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix>shr{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
[(set_attr "isa" "*,bmi2,avx512bw,apx_ndd")
@@ -16423,6 +16422,17 @@
(set_attr "mode" "<MODE>")])
;; Convert shift to the shiftx pattern to avoid flags dependency.
+;; For NF/NDD doesn't support shift count as r, it just support c<S>,
+;; and it has no flag.
+(define_split
+ [(set (match_operand:SWI48 0 "register_operand")
+ (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+ (match_operand:QI 2 "register_operand")))]
+ "TARGET_BMI2 && reload_completed"
+ [(set (match_dup 0)
+ (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
+ "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
(define_split
[(set (match_operand:SWI48 0 "register_operand")
(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
@@ -16491,22 +16501,22 @@
(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
"operands[2] = gen_lowpart (SImode, operands[2]);")
-(define_insn "*ashr<mode>3_1"
+(define_insn "*ashr<mode>3_1<nf_name>"
[(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r")
(ashiftrt:SWI12
(match_operand:SWI12 1 "nonimmediate_operand" "0, rm")
- (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))]
+ "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !<nf_applied>)
return "sar{<imodesuffix>}\t%0";
else
- return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
- : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}";
}
[(set_attr "isa" "*, apx_ndd")
(set_attr "type" "ishift")
@@ -16519,13 +16529,13 @@
(const_string "*")))
(set_attr "mode" "<MODE>")])
-(define_insn "*lshrqi3_1"
+(define_insn "*lshrqi3_1<nf_name>"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k,r")
(lshiftrt:QI
(match_operand:QI 1 "nonimmediate_operand" "0, k, rm")
- (match_operand:QI 2 "nonmemory_operand" "cI,Wb,cI")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "cI,Wb,cI")))]
+ "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
@@ -16533,11 +16543,11 @@
case TYPE_ISHIFT:
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !<nf_applied>)
return "shr{b}\t%0";
else
- return use_ndd ? "shr{b}\t{%2, %1, %0|%0, %1, %2}"
- : "shr{b}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix>shr{b}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix>shr{b}\t{%2, %0|%0, %2}";
case TYPE_MSKLOG:
return "#";
default:
@@ -16556,13 +16566,13 @@
(const_string "*")))
(set_attr "mode" "QI")])
-(define_insn "*lshrhi3_1"
+(define_insn "*lshrhi3_1<nf_name>"
[(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r")
(lshiftrt:HI
(match_operand:HI 1 "nonimmediate_operand" "0, k, rm")
- (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI")))]
+ "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
@@ -16570,11 +16580,11 @@
case TYPE_ISHIFT:
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !<nf_applied>)
return "shr{w}\t%0";
else
- return use_ndd ? "shr{w}\t{%2, %1, %0|%0, %1, %2}"
- : "shr{w}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix>shr{w}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix>shr{w}\t{%2, %0|%0, %2}";
case TYPE_MSKLOG:
return "#";
default:
--
2.31.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 5/8] [APX NF] Support APX NF for rotate insns
[not found] ` <20240529023704.3728291-5-lingling.kong@intel.com>
@ 2024-05-29 5:10 ` Kong, Lingling
0 siblings, 0 replies; 9+ messages in thread
From: Kong, Lingling @ 2024-05-29 5:10 UTC (permalink / raw)
To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak
gcc/ChangeLog:
* config/i386/i386.md (ashr<mode>3_cvt_nf): New define_insn.
(*<insn><mode>3_1_nf): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-nf.c: Add NF test for rotate insns.
---
gcc/config/i386/i386.md | 59 +++++++++++++++++---------
gcc/testsuite/gcc.target/i386/apx-nf.c | 5 +++
2 files changed, 43 insertions(+), 21 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d10caf04fcc..9d518e90d07 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16245,19 +16245,19 @@
(define_mode_attr cvt_mnemonic
[(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
-(define_insn "ashr<mode>3_cvt"
+(define_insn "ashr<mode>3_cvt<nf_name>"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r")
(ashiftrt:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm")
- (match_operand:QI 2 "const_int_operand")))
- (clobber (reg:CC FLAGS_REG))]
+ (match_operand:QI 2 "const_int_operand")))]
"INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
&& (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
- && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
+ && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
"@
<cvt_mnemonic>
- sar{<imodesuffix>}\t{%2, %0|%0, %2}
- sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ <nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}
+ <nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "*,*,apx_ndd")
(set_attr "type" "imovx,ishift,ishift")
(set_attr "prefix_0f" "0,*,*")
@@ -17109,28 +17109,31 @@
[(set_attr "type" "rotatex")
(set_attr "mode" "<MODE>")])
-(define_insn "*<insn><mode>3_1"
+(define_insn "*<insn><mode>3_1<nf_name>"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
(any_rotate:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
- (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>")))]
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_ROTATEX:
- return "#";
+ if (TARGET_APX_NDD && <nf_applied>)
+ return "%{nf%} <rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "#";
default:
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !<nf_applied>)
return "<rotate>{<imodesuffix>}\t%0";
else
- return use_ndd ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
- : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
[(set_attr "isa" "*,bmi2,apx_ndd")
@@ -17164,6 +17167,20 @@
operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
})
+(define_split
+ [(set (match_operand:SWI48 0 "register_operand")
+ (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+ (match_operand:QI 2 "const_int_operand")))]
+ "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)
+ && !TARGET_APX_NDD"
+ [(set (match_dup 0)
+ (rotatert:SWI48 (match_dup 1) (match_dup 2)))]
+{
+ int bitsize = GET_MODE_BITSIZE (<MODE>mode);
+
+ operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
+})
+
(define_split
[(set (match_operand:SWI48 0 "register_operand")
(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
@@ -17251,22 +17268,22 @@
[(set (match_dup 0)
(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
-(define_insn "*<insn><mode>3_1"
+(define_insn "*<insn><mode>3_1<nf_name>"
[(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r")
(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm")
- (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))
- (clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))]
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)
+ && <nf_condition>"
{
bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !<nf_applied>)
return "<rotate>{<imodesuffix>}\t%0";
else
return use_ndd
- ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
- : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+ ? "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
[(set_attr "isa" "*,apx_ndd")
(set_attr "type" "rotate")
diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
index f33a994f0b7..ed859b399b8 100644
--- a/gcc/testsuite/gcc.target/i386/apx-nf.c
+++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
@@ -2,6 +2,7 @@
/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 -O2" } */
/* { dg-final { scan-assembler-times "\{nf\} and" 1 } } */
/* { dg-final { scan-assembler-times "\{nf\} or" 1 } } */
+/* { dg-final { scan-assembler-times "\{nf\} rol" 4 } } */
struct B { unsigned bit0 : 1; unsigned bit1 : 1; };
@@ -10,3 +11,7 @@ foo (struct B *b)
{
b->bit0 = b->bit0 | b->bit1;
}
+long int f1 (int x) { return ~(1ULL << (x & 0x3f)); }
+long int f2 (int x) { return ~(1ULL << x); }
+long int f3 (unsigned char *x) { return ~(1ULL << (x[0] & 0x3f)); }
+long int f4 (unsigned char *x) { return ~(1ULL << x[0]); }
--
2.31.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 6/8] [APX NF] Support APX NF for shld/shrd
[not found] ` <20240529023704.3728291-6-lingling.kong@intel.com>
@ 2024-05-29 5:10 ` Kong, Lingling
0 siblings, 0 replies; 9+ messages in thread
From: Kong, Lingling @ 2024-05-29 5:10 UTC (permalink / raw)
To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak
gcc/ChangeLog:
* config/i386/i386.md (x86_64_shld_nf): New define_insn.
(x86_64_shld_ndd_nf): Ditto.
(x86_64_shld_1_nf): Ditto.
(x86_64_shld_ndd_1_nf): Ditto.
(*x86_64_shld_shrd_1_nozext_nf): Ditto.
(x86_shld_nf): Ditto.
(x86_shld_ndd_nf): Ditto.
(x86_shld_1_nf): Ditto.
(x86_shld_ndd_1_nf): Ditto.
(*x86_shld_shrd_1_nozext_nf): Ditto.
(<insn><dwi>3_doubleword_lowpart_nf): Ditto.
(x86_64_shrd_nf): Ditto.
(x86_64_shrd_ndd_nf): Ditto.
(x86_64_shrd_1_nf): Ditto.
(x86_64_shrd_ndd_1_nf): Ditto.
(*x86_64_shrd_shld_1_nozext_nf): Ditto.
(x86_shrd_nf): Ditto.
(x86_shrd_ndd_nf): Ditto.
(x86_shrd_1_nf): Ditto.
(x86_shrd_ndd_1_nf): Ditto.
(*x86_shrd_shld_1_nozext_nf): Ditto.
---
gcc/config/i386/i386.md | 377 +++++++++++++++++++++++++++++++---------
1 file changed, 296 insertions(+), 81 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9d518e90d07..719cce7d3ef 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14551,7 +14551,7 @@
DONE;
})
-(define_insn "x86_64_shld"
+(define_insn "x86_64_shld<nf_name>"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (ashift:DI (match_dup 0)
(and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
@@ -14561,10 +14561,9 @@
(zero_extend:TI
(match_operand:DI 1 "register_operand" "r"))
(minus:QI (const_int 64)
- (and:QI (match_dup 2) (const_int 63)))) 0)))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT"
- "shld{q}\t{%2, %1, %0|%0, %1, %2}"
+ (and:QI (match_dup 2) (const_int 63)))) 0)))]
+ "TARGET_64BIT && <nf_condition>"
+ "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
@@ -14572,7 +14571,7 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
-(define_insn "x86_64_shld_ndd"
+(define_insn "x86_64_shld_ndd<nf_name>"
[(set (match_operand:DI 0 "register_operand" "=r")
(ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
(and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
@@ -14582,14 +14581,13 @@
(zero_extend:TI
(match_operand:DI 2 "register_operand" "r"))
(minus:QI (const_int 64)
- (and:QI (match_dup 3) (const_int 63)))) 0)))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_APX_NDD"
- "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ (and:QI (match_dup 3) (const_int 63)))) 0)))]
+ "TARGET_APX_NDD && <nf_condition>"
+ "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
(set_attr "mode" "DI")])
-(define_insn "x86_64_shld_1"
+(define_insn "x86_64_shld_1<nf_name>"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (ashift:DI (match_dup 0)
(match_operand:QI 2 "const_0_to_63_operand"))
@@ -14597,11 +14595,11 @@
(lshiftrt:TI
(zero_extend:TI
(match_operand:DI 1 "register_operand" "r"))
- (match_operand:QI 3 "const_0_to_255_operand")) 0)))
- (clobber (reg:CC FLAGS_REG))]
+ (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
"TARGET_64BIT
- && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
- "shld{q}\t{%2, %1, %0|%0, %1, %2}"
+ && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+ && <nf_condition>"
+ "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
@@ -14610,7 +14608,7 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
-(define_insn "x86_64_shld_ndd_1"
+(define_insn "x86_64_shld_ndd_1<nf_name>"
[(set (match_operand:DI 0 "register_operand" "=r")
(ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
(match_operand:QI 3 "const_0_to_63_operand"))
@@ -14618,15 +14616,66 @@
(lshiftrt:TI
(zero_extend:TI
(match_operand:DI 2 "register_operand" "r"))
- (match_operand:QI 4 "const_0_to_255_operand")) 0)))
- (clobber (reg:CC FLAGS_REG))]
+ (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
"TARGET_APX_NDD
- && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
- "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ && INTVAL (operands[4]) == 64 - INTVAL (operands[3])
+ && <nf_condition>"
+ "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
(set_attr "mode" "DI")
(set_attr "length_immediate" "1")])
+(define_insn_and_split "*x86_64_shld_shrd_1_nozext_nf"
+ [(set (match_operand:DI 0 "nonimmediate_operand")
+ (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
+ (match_operand:QI 2 "const_0_to_63_operand"))
+ (lshiftrt:DI
+ (match_operand:DI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "const_0_to_63_operand"))))]
+ "TARGET_64BIT && TARGET_APX_NF
+ && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (rtx_equal_p (operands[4], operands[0]))
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[1], operands[2], operands[3]));
+ }
+ else if (rtx_equal_p (operands[1], operands[0]))
+ {
+ operands[4] = force_reg (DImode, operands[4]);
+ emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[4], operands[3], operands[2]));
+ }
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ else
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ rtx tmp = gen_reg_rtx (DImode);
+ emit_move_insn (tmp, operands[4]);
+ emit_insn (gen_x86_64_shld_1_nf (tmp, operands[1], operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ DONE;
+})
(define_insn_and_split "*x86_64_shld_shrd_1_nozext"
[(set (match_operand:DI 0 "nonimmediate_operand")
@@ -14729,7 +14778,7 @@
emit_move_insn (operands[4], operands[0]);
})
-(define_insn "x86_shld"
+(define_insn "x86_shld<nf_name>"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (ashift:SI (match_dup 0)
(and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
@@ -14739,10 +14788,9 @@
(zero_extend:DI
(match_operand:SI 1 "register_operand" "r"))
(minus:QI (const_int 32)
- (and:QI (match_dup 2) (const_int 31)))) 0)))
- (clobber (reg:CC FLAGS_REG))]
- ""
- "shld{l}\t{%2, %1, %0|%0, %1, %2}"
+ (and:QI (match_dup 2) (const_int 31)))) 0)))]
+ "<nf_condition>"
+ "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "SI")
@@ -14751,7 +14799,7 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
-(define_insn "x86_shld_ndd"
+(define_insn "x86_shld_ndd<nf_name>"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r")
(ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
(and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
@@ -14761,15 +14809,14 @@
(zero_extend:DI
(match_operand:SI 2 "register_operand" "r"))
(minus:QI (const_int 32)
- (and:QI (match_dup 3) (const_int 31)))) 0)))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_APX_NDD"
- "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ (and:QI (match_dup 3) (const_int 31)))) 0)))]
+ "TARGET_APX_NDD && <nf_condition>"
+ "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
(set_attr "mode" "SI")])
-(define_insn "x86_shld_1"
+(define_insn "x86_shld_1<nf_name>"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (ashift:SI (match_dup 0)
(match_operand:QI 2 "const_0_to_31_operand"))
@@ -14777,10 +14824,10 @@
(lshiftrt:DI
(zero_extend:DI
(match_operand:SI 1 "register_operand" "r"))
- (match_operand:QI 3 "const_0_to_63_operand")) 0)))
- (clobber (reg:CC FLAGS_REG))]
- "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
- "shld{l}\t{%2, %1, %0|%0, %1, %2}"
+ (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
+ "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+ && <nf_condition>"
+ "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "length_immediate" "1")
@@ -14790,7 +14837,7 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
-(define_insn "x86_shld_ndd_1"
+(define_insn "x86_shld_ndd_1<nf_name>"
[(set (match_operand:SI 0 "register_operand" "=r")
(ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
(match_operand:QI 3 "const_0_to_31_operand"))
@@ -14798,15 +14845,66 @@
(lshiftrt:DI
(zero_extend:DI
(match_operand:SI 2 "register_operand" "r"))
- (match_operand:QI 4 "const_0_to_63_operand")) 0)))
- (clobber (reg:CC FLAGS_REG))]
+ (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
"TARGET_APX_NDD
- && INTVAL (operands[4]) == 32 - INTVAL (operands[3])"
- "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ && INTVAL (operands[4]) == 32 - INTVAL (operands[3])
+ && <nf_condition>"
+ "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
(set_attr "length_immediate" "1")
(set_attr "mode" "SI")])
+(define_insn_and_split "*x86_shld_shrd_1_nozext_nf"
+ [(set (match_operand:SI 0 "nonimmediate_operand")
+ (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
+ (match_operand:QI 2 "const_0_to_31_operand"))
+ (lshiftrt:SI
+ (match_operand:SI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "const_0_to_31_operand"))))]
+ "TARGET_APX_NF &&
+ INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (rtx_equal_p (operands[4], operands[0]))
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_x86_shld_1_nf (operands[0], operands[1], operands[2], operands[3]));
+ }
+ else if (rtx_equal_p (operands[1], operands[0]))
+ {
+ operands[4] = force_reg (SImode, operands[4]);
+ emit_insn (gen_x86_shrd_1_nf (operands[0], operands[4], operands[3], operands[2]));
+ }
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ else
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_move_insn (tmp, operands[4]);
+ emit_insn (gen_x86_shld_1_nf (tmp, operands[1], operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ DONE;
+})
(define_insn_and_split "*x86_shld_shrd_1_nozext"
[(set (match_operand:SI 0 "nonimmediate_operand")
@@ -15861,6 +15959,26 @@
})
;; Split truncations of double word right shifts into x86_shrd_1.
+(define_insn_and_split "<insn><dwi>3_doubleword_lowpart_nf"
+ [(set (match_operand:DWIH 0 "register_operand" "=&r")
+ (subreg:DWIH
+ (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
+ (match_operand:QI 2 "const_int_operand")) 0))]
+ "TARGET_APX_NF && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2))
+ (subreg:DWIH
+ (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
+ (match_dup 4)) 0)))]
+{
+ split_double_mode (<DWI>mode, &operands[1], 1, &operands[1], &operands[3]);
+ operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL (operands[2]));
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+})
+
(define_insn_and_split "<insn><dwi>3_doubleword_lowpart"
[(set (match_operand:DWIH 0 "register_operand" "=&r")
(subreg:DWIH
@@ -15884,7 +16002,7 @@
emit_move_insn (operands[0], operands[1]);
})
-(define_insn "x86_64_shrd"
+(define_insn "x86_64_shrd<nf_name>"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (lshiftrt:DI (match_dup 0)
(and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
@@ -15894,10 +16012,9 @@
(zero_extend:TI
(match_operand:DI 1 "register_operand" "r"))
(minus:QI (const_int 64)
- (and:QI (match_dup 2) (const_int 63)))) 0)))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT"
- "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+ (and:QI (match_dup 2) (const_int 63)))) 0)))]
+ "TARGET_64BIT && <nf_condition>"
+ "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
@@ -15905,7 +16022,7 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
-(define_insn "x86_64_shrd_ndd"
+(define_insn "x86_64_shrd_ndd<nf_name>"
[(set (match_operand:DI 0 "register_operand" "=r")
(ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
(and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
@@ -15915,15 +16032,13 @@
(zero_extend:TI
(match_operand:DI 2 "register_operand" "r"))
(minus:QI (const_int 64)
- (and:QI (match_dup 3) (const_int 63)))) 0)))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_APX_NDD"
- "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ (and:QI (match_dup 3) (const_int 63)))) 0)))]
+ "TARGET_APX_NDD && <nf_condition>"
+ "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
(set_attr "mode" "DI")])
-
-(define_insn "x86_64_shrd_1"
+(define_insn "x86_64_shrd_1<nf_name>"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (lshiftrt:DI (match_dup 0)
(match_operand:QI 2 "const_0_to_63_operand"))
@@ -15931,11 +16046,11 @@
(ashift:TI
(zero_extend:TI
(match_operand:DI 1 "register_operand" "r"))
- (match_operand:QI 3 "const_0_to_255_operand")) 0)))
- (clobber (reg:CC FLAGS_REG))]
+ (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
"TARGET_64BIT
- && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
- "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+ && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+ && <nf_condition>"
+ "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "length_immediate" "1")
@@ -15944,7 +16059,7 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
-(define_insn "x86_64_shrd_ndd_1"
+(define_insn "x86_64_shrd_ndd_1<nf_name>"
[(set (match_operand:DI 0 "register_operand" "=r")
(ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
(match_operand:QI 3 "const_0_to_63_operand"))
@@ -15952,15 +16067,66 @@
(ashift:TI
(zero_extend:TI
(match_operand:DI 2 "register_operand" "r"))
- (match_operand:QI 4 "const_0_to_255_operand")) 0)))
- (clobber (reg:CC FLAGS_REG))]
+ (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
"TARGET_APX_NDD
- && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
- "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ && INTVAL (operands[4]) == 64 - INTVAL (operands[3])
+ && <nf_condition>"
+ "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
(set_attr "length_immediate" "1")
(set_attr "mode" "DI")])
+(define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf"
+ [(set (match_operand:DI 0 "nonimmediate_operand")
+ (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
+ (match_operand:QI 2 "const_0_to_63_operand"))
+ (ashift:DI
+ (match_operand:DI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "const_0_to_63_operand"))))]
+ "TARGET_64BIT && TARGET_APX_NF
+ && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (rtx_equal_p (operands[4], operands[0]))
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[1], operands[2], operands[3]));
+ }
+ else if (rtx_equal_p (operands[1], operands[0]))
+ {
+ operands[4] = force_reg (DImode, operands[4]);
+ emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[4], operands[3], operands[2]));
+ }
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ else
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ rtx tmp = gen_reg_rtx (DImode);
+ emit_move_insn (tmp, operands[4]);
+ emit_insn (gen_x86_64_shrd_1_nf (tmp, operands[1], operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ DONE;
+})
(define_insn_and_split "*x86_64_shrd_shld_1_nozext"
[(set (match_operand:DI 0 "nonimmediate_operand")
@@ -16063,7 +16229,7 @@
emit_move_insn (operands[4], operands[0]);
})
-(define_insn "x86_shrd"
+(define_insn "x86_shrd<nf_name>"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (lshiftrt:SI (match_dup 0)
(and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
@@ -16073,10 +16239,9 @@
(zero_extend:DI
(match_operand:SI 1 "register_operand" "r"))
(minus:QI (const_int 32)
- (and:QI (match_dup 2) (const_int 31)))) 0)))
- (clobber (reg:CC FLAGS_REG))]
- ""
- "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+ (and:QI (match_dup 2) (const_int 31)))) 0)))]
+ "<nf_condition>"
+ "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "SI")
@@ -16085,7 +16250,7 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
-(define_insn "x86_shrd_ndd"
+(define_insn "x86_shrd_ndd<nf_name>"
[(set (match_operand:SI 0 "register_operand" "=r")
(ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
(and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
@@ -16095,14 +16260,13 @@
(zero_extend:DI
(match_operand:SI 2 "register_operand" "r"))
(minus:QI (const_int 32)
- (and:QI (match_dup 3) (const_int 31)))) 0)))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_APX_NDD"
- "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ (and:QI (match_dup 3) (const_int 31)))) 0)))]
+ "TARGET_APX_NDD && <nf_condition>"
+ "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
(set_attr "mode" "SI")])
-(define_insn "x86_shrd_1"
+(define_insn "x86_shrd_1<nf_name>"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (lshiftrt:SI (match_dup 0)
(match_operand:QI 2 "const_0_to_31_operand"))
@@ -16110,10 +16274,10 @@
(ashift:DI
(zero_extend:DI
(match_operand:SI 1 "register_operand" "r"))
- (match_operand:QI 3 "const_0_to_63_operand")) 0)))
- (clobber (reg:CC FLAGS_REG))]
- "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
- "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+ (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
+ "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+ && <nf_condition>"
+ "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "length_immediate" "1")
@@ -16123,7 +16287,7 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
-(define_insn "x86_shrd_ndd_1"
+(define_insn "x86_shrd_ndd_1<nf_name>"
[(set (match_operand:SI 0 "register_operand" "=r")
(ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
(match_operand:QI 3 "const_0_to_31_operand"))
@@ -16131,15 +16295,66 @@
(ashift:DI
(zero_extend:DI
(match_operand:SI 2 "register_operand" "r"))
- (match_operand:QI 4 "const_0_to_63_operand")) 0)))
- (clobber (reg:CC FLAGS_REG))]
+ (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
"TARGET_APX_NDD
- && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))"
- "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))
+ && <nf_condition>"
+ "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ishift")
(set_attr "length_immediate" "1")
(set_attr "mode" "SI")])
+(define_insn_and_split "*x86_shrd_shld_1_nozext_nf"
+ [(set (match_operand:SI 0 "nonimmediate_operand")
+ (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
+ (match_operand:QI 2 "const_0_to_31_operand"))
+ (ashift:SI
+ (match_operand:SI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "const_0_to_31_operand"))))]
+ "TARGET_APX_NF &&
+ INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (rtx_equal_p (operands[4], operands[0]))
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_x86_shrd_1_nf (operands[0], operands[1], operands[2], operands[3]));
+ }
+ else if (rtx_equal_p (operands[1], operands[0]))
+ {
+ operands[4] = force_reg (SImode, operands[4]);
+ emit_insn (gen_x86_shld_1_nf (operands[0], operands[4], operands[3], operands[2]));
+ }
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ else
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_move_insn (tmp, operands[4]);
+ emit_insn (gen_x86_shrd_1_nf (tmp, operands[1], operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ DONE;
+})
(define_insn_and_split "*x86_shrd_shld_1_nozext"
[(set (match_operand:SI 0 "nonimmediate_operand")
--
2.31.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 7/8] [APX NF] Support APX NF for mul/div
[not found] ` <20240529023704.3728291-7-lingling.kong@intel.com>
@ 2024-05-29 5:11 ` Kong, Lingling
0 siblings, 0 replies; 9+ messages in thread
From: Kong, Lingling @ 2024-05-29 5:11 UTC (permalink / raw)
To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak
gcc/ChangeLog:
* config/i386/i386.md (*mul<mode>3_1_nf): New define_insn.
(*mulqi3_1_nf): Ditto.
(*<u>divmod<mode>4_noext_nf): Ditto.
(<u>divmodhiqi3_nf): Ditto.
---
gcc/config/i386/i386.md | 47 ++++++++++++++++++++++++++---------------
1 file changed, 30 insertions(+), 17 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 719cce7d3ef..e688e92785e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9898,17 +9898,17 @@
;;
;; On BDVER1, all HI MULs use DoublePath
-(define_insn "*mul<mode>3_1"
+(define_insn "*mul<mode>3_1<nf_name>"
[(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
(mult:SWIM248
(match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
- (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))
- (clobber (reg:CC FLAGS_REG))]
- "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))]
+ "!(MEM_P (operands[1]) && MEM_P (operands[2]))
+ && <nf_condition>"
"@
- imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- imul{<imodesuffix>}\t{%2, %0|%0, %2}"
+ <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <nf_prefix>imul{<imodesuffix>}\t{%2, %0|%0, %2}"
[(set_attr "type" "imul")
(set_attr "prefix_0f" "0,0,1")
(set (attr "athlon_decode")
@@ -9969,14 +9969,14 @@
;; MUL reg8 Direct
;; MUL mem8 Direct
-(define_insn "*mulqi3_1"
+(define_insn "*mulqi3_1<nf_name>"
[(set (match_operand:QI 0 "register_operand" "=a")
(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
- (match_operand:QI 2 "nonimmediate_operand" "qm")))
- (clobber (reg:CC FLAGS_REG))]
+ (match_operand:QI 2 "nonimmediate_operand" "qm")))]
"TARGET_QIMODE_MATH
- && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "mul{b}\t%2"
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+ && <nf_condition>"
+ "<nf_prefix>mul{b}\t%2"
[(set_attr "type" "imul")
(set_attr "length_immediate" "0")
(set (attr "athlon_decode")
@@ -11119,6 +11119,19 @@
[(set_attr "type" "multi")
(set_attr "mode" "SI")])
+(define_insn "*<u>divmod<mode>4_noext_nf"
+ [(set (match_operand:SWIM248 0 "register_operand" "=a")
+ (any_div:SWIM248
+ (match_operand:SWIM248 2 "register_operand" "0")
+ (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:SWIM248 1 "register_operand" "=d")
+ (<paired_mod>:SWIM248 (match_dup 2) (match_dup 3)))
+ (use (match_operand:SWIM248 4 "register_operand" "1"))]
+ "TARGET_APX_NF"
+ "%{nf%} <sgnprefix>div{<imodesuffix>}\t%3"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*<u>divmod<mode>4_noext"
[(set (match_operand:SWIM248 0 "register_operand" "=a")
(any_div:SWIM248
@@ -11266,7 +11279,7 @@
;; Change div/mod to HImode and extend the second argument to HImode
;; so that mode of div/mod matches with mode of arguments. Otherwise
;; combine may fail.
-(define_insn "<u>divmodhiqi3"
+(define_insn "<u>divmodhiqi3<nf_name>"
[(set (match_operand:HI 0 "register_operand" "=a")
(ior:HI
(ashift:HI
@@ -11278,10 +11291,10 @@
(const_int 8))
(zero_extend:HI
(truncate:QI
- (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_QIMODE_MATH"
- "<sgnprefix>div{b}\t%2"
+ (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))]
+ "TARGET_QIMODE_MATH
+ && <nf_condition>"
+ "<nf_prefix><sgnprefix>div{b}\t%2"
[(set_attr "type" "idiv")
(set_attr "mode" "QI")])
--
2.31.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt
[not found] ` <20240529023704.3728291-8-lingling.kong@intel.com>
@ 2024-05-29 5:11 ` Kong, Lingling
0 siblings, 0 replies; 9+ messages in thread
From: Kong, Lingling @ 2024-05-29 5:11 UTC (permalink / raw)
To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak
gcc/ChangeLog:
* config/i386/i386.md (clz<mode>2_lzcnt_nf): New define_insn.
(*clz<mode>2_lzcnt_falsedep_nf): Ditto.
(<lt_zcnt>_<mode>_nf): Ditto.
(*<lt_zcnt>_<mode>_falsedep_nf): Ditto.
(<lt_zcnt>_hi_nf): Ditto.
(popcount<mode>2_nf): Ditto.
(*popcount<mode>2_falsedep_nf): Ditto.
(popcounthi2_nf): Ditto.
---
gcc/config/i386/i386.md | 124 ++++++++++++++++++++++++++++++++++++----
1 file changed, 113 insertions(+), 11 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e688e92785e..b0eb497cd23 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -20269,6 +20269,24 @@
operands[3] = gen_reg_rtx (<MODE>mode);
})
+(define_insn_and_split "clz<mode>2_lzcnt_nf"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (clz:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+ "TARGET_APX_NF && TARGET_LZCNT"
+ "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+ "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && !reg_mentioned_p (operands[0], operands[1])"
+ [(parallel
+ [(set (match_dup 0)
+ (clz:SWI48 (match_dup 1)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+ "ix86_expand_clear (operands[0]);"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "<MODE>")])
+
(define_insn_and_split "clz<mode>2_lzcnt"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(clz:SWI48
@@ -20292,6 +20310,18 @@
; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt. There is no false dependency when destination is
; also used in source.
+(define_insn "*clz<mode>2_lzcnt_falsedep_nf"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (clz:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+ (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+ UNSPEC_INSN_FALSE_DEP)]
+ "TARGET_APX_NF && TARGET_LZCNT"
+ "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*clz<mode>2_lzcnt_falsedep"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(clz:SWI48
@@ -20398,6 +20428,25 @@
;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version
;; provides operand size as output when source operand is zero.
+(define_insn_and_split "<lt_zcnt>_<mode>_nf"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (unspec:SWI48
+ [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+ "TARGET_APX_NF"
+ "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
+ "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && !reg_mentioned_p (operands[0], operands[1])"
+ [(parallel
+ [(set (match_dup 0)
+ (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+ "ix86_expand_clear (operands[0]);"
+ [(set_attr "type" "<lt_zcnt_type>")
+ (set_attr "prefix_0f" "1")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "<MODE>")])
+
(define_insn_and_split "<lt_zcnt>_<mode>"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(unspec:SWI48
@@ -20422,6 +20471,19 @@
; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt. There is no false dependency when destination is
; also used in source.
+(define_insn "*<lt_zcnt>_<mode>_falsedep_nf"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (unspec:SWI48
+ [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
+ (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+ UNSPEC_INSN_FALSE_DEP)]
+ "TARGET_APX_NF"
+ "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "<lt_zcnt_type>")
+ (set_attr "prefix_0f" "1")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*<lt_zcnt>_<mode>_falsedep"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(unspec:SWI48
@@ -20436,13 +20498,12 @@
(set_attr "prefix_rep" "1")
(set_attr "mode" "<MODE>")])
-(define_insn "<lt_zcnt>_hi"
+(define_insn "<lt_zcnt>_hi<nf_name>"
[(set (match_operand:HI 0 "register_operand" "=r")
(unspec:HI
- [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))
- (clobber (reg:CC FLAGS_REG))]
- ""
- "<lt_zcnt>{w}\t{%1, %0|%0, %1}"
+ [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+ "<nf_condition>"
+ "<nf_prefix><lt_zcnt>{w}\t{%1, %0|%0, %1}"
[(set_attr "type" "<lt_zcnt_type>")
(set_attr "prefix_0f" "1")
(set_attr "prefix_rep" "1")
@@ -20860,6 +20921,30 @@
[(set_attr "type" "bitmanip")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "popcount<mode>2_nf"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (popcount:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+ "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+ return "%{nf%} popcnt\t{%1, %0|%0, %1}";
+#else
+ return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+ "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && !reg_mentioned_p (operands[0], operands[1])"
+ [(parallel
+ [(set (match_dup 0)
+ (popcount:SWI48 (match_dup 1)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+ "ix86_expand_clear (operands[0]);"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "<MODE>")])
+
(define_insn_and_split "popcount<mode>2"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(popcount:SWI48
@@ -20889,6 +20974,24 @@
; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt. There is no false dependency when destination is
; also used in source.
+(define_insn "*popcount<mode>2_falsedep_nf"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (popcount:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+ (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+ UNSPEC_INSN_FALSE_DEP)]
+ "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+ return "%{nf%} popcnt\t{%1, %0|%0, %1}";
+#else
+ return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*popcount<mode>2_falsedep"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(popcount:SWI48
@@ -21046,17 +21149,16 @@
DONE;
})
-(define_insn "popcounthi2"
+(define_insn "popcounthi2<nf_name>"
[(set (match_operand:HI 0 "register_operand" "=r")
(popcount:HI
- (match_operand:HI 1 "nonimmediate_operand" "rm")))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_POPCNT"
+ (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+ "TARGET_POPCNT && <nf_condition>"
{
#if TARGET_MACHO
- return "popcnt\t{%1, %0|%0, %1}";
+ return "<nf_prefix>popcnt\t{%1, %0|%0, %1}";
#else
- return "popcnt{w}\t{%1, %0|%0, %1}";
+ return "<nf_prefix>popcnt{w}\t{%1, %0|%0, %1}";
#endif
}
[(set_attr "prefix_rep" "1")
--
2.31.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v3 1/8] [APX NF]: Support APX NF add
2024-05-29 5:10 ` [PATCH v3 1/8] [APX NF]: Support APX NF add Kong, Lingling
@ 2024-06-03 1:08 ` Hongtao Liu
0 siblings, 0 replies; 9+ messages in thread
From: Hongtao Liu @ 2024-06-03 1:08 UTC (permalink / raw)
To: Kong, Lingling; +Cc: gcc-patches, Liu, Hongtao, Uros Bizjak
On Wed, May 29, 2024 at 1:11 PM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> Hi, compared with v2, these patches restored the original lea patten position and addressed hongtao's comment.
>
> APX NF(no flags) feature implements suppresses the update of status flags
> for arithmetic operations.
Ok for the patch and the remaining.
[PATCH v3 1/8] [APX NF]: Support APX NF add Kong, Lingling
[PATCH v3 2/8] [APX NF] Support APX NF for {sub/and/or/xor/neg} Kong, Lingling
[PATCH v3 3/8] [APX NF] Support APX NF for left shift insns Kong, Lingling
[PATCH v3 4/8] [APX NF] Support APX NF for right shift insns Kong, Lingling
[PATCH v3 5/8] [APX NF] Support APX NF for rotate insns Kong, Lingling
[PATCH v3 6/8] [APX NF] Support APX NF for shld/shrd Kong, Lingling
[PATCH v3 7/8] [APX NF] Support APX NF for mul/div Kong, Lingling
[PATCH v3 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt Kong, Lingling
>
> For NF add, it is not clear whether nf add can be faster than lea. If so,
> the pattern needs to be adjusted to perfer lea generation.
>
> gcc/ChangeLog:
>
> * config/i386/i386-opts.h (enum apx_features): Add nf
> enumeration.
> * config/i386/i386.h (TARGET_APX_NF): New.
> * config/i386/i386.md (*add<mode>_1_nf): New define_insn.
> * config/i386/i386.opt: Add apx_nf enumeration.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/apx-ndd.c: Fixed test.
>
> Co-authored-by: Lingling Kong <lingling.kong@intel.com>
> ---
> gcc/config/i386/i386-opts.h | 3 +-
> gcc/config/i386/i386.h | 1 +
> gcc/config/i386/i386.md | 135 ++++++++++++++++--------
> gcc/config/i386/i386.opt | 3 +
> gcc/testsuite/gcc.target/i386/apx-ndd.c | 2 +-
> 5 files changed, 98 insertions(+), 46 deletions(-)
>
> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> index ef2825803b3..60176ce609f 100644
> --- a/gcc/config/i386/i386-opts.h
> +++ b/gcc/config/i386/i386-opts.h
> @@ -140,7 +140,8 @@ enum apx_features {
> apx_push2pop2 = 1 << 1,
> apx_ndd = 1 << 2,
> apx_ppx = 1 << 3,
> - apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
> + apx_nf = 1<< 4,
> + apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
> };
>
> #endif
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 359a8408263..969391d3013 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
> #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)
> #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
> #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
> +#define TARGET_APX_NF (ix86_apx_features & apx_nf)
>
> #include "config/vxworks-dummy.h"
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index e8073f5a200..1eeadaddeba 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -6290,6 +6290,13 @@
> [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
> (clobber (reg:CC FLAGS_REG))])]
> "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
> +
> +(define_split
> + [(set (match_operand:SWI48 0 "general_reg_operand")
> + (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))]
> + "TARGET_APX_NF && reload_completed"
> + [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))]
> + "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
>
>
> ;; Add instructions
>
> @@ -6437,48 +6444,65 @@
> (clobber (reg:CC FLAGS_REG))])]
> "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
>
> -(define_insn "*add<mode>_1"
> - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r")
> +(define_subst_attr "nf_name" "nf_subst" "_nf" "")
> +(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "")
> +(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
> +(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m")
> +(define_subst_attr "nf_applied" "nf_subst" "true" "false")
> +
> +(define_subst "nf_subst"
> + [(set (match_operand:SWI 0)
> + (match_operand:SWI 1))]
> + ""
> + [(set (match_dup 0)
> + (match_dup 1))
> + (clobber (reg:CC FLAGS_REG))])
> +
> +(define_insn "*add<mode>_1<nf_name>"
> + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,r,r")
> (plus:SWI48
> - (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r")
> - (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM")))
> - (clobber (reg:CC FLAGS_REG))]
> - "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
> + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
> + (match_operand:SWI48 2 "x86_64_general_operand" "r,e,BM,0,le,r,e,BM")))]
> + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
> + && <nf_condition>"
> {
> bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
> switch (get_attr_type (insn))
> {
> case TYPE_LEA:
> - return "#";
> + if (TARGET_APX_NDD && <nf_applied>)
> + return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
> + else
> + return "#";
>
> case TYPE_INCDEC:
> if (operands[2] == const1_rtx)
> - return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
> - : "inc{<imodesuffix>}\t%0";
> + return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}"
> + : "<nf_prefix>inc{<imodesuffix>}\t%0";
> else
> {
> gcc_assert (operands[2] == constm1_rtx);
> - return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
> - : "dec{<imodesuffix>}\t%0";
> + return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}"
> + : "<nf_prefix>dec{<imodesuffix>}\t%0";
> }
>
> default:
> /* For most processors, ADD is faster than LEA. This alternative
> was added to use ADD as much as possible. */
> - if (which_alternative == 2)
> + if (which_alternative == 3)
> std::swap (operands[1], operands[2]);
>
> if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
> - return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> - : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
> + return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> + : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}";
>
> - return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> - : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
> + return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> + : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}";
> }
> }
> - [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
> + [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
> (set (attr "type")
> - (cond [(eq_attr "alternative" "3")
> + (cond [(eq_attr "alternative" "4")
> (const_string "lea")
> (match_operand:SWI48 2 "incdec_operand")
> (const_string "incdec")
> @@ -6552,26 +6576,29 @@
> (const_string "*")))
> (set_attr "mode" "SI")])
>
> -(define_insn "*addhi_1"
> +(define_insn "*addhi_1<nf_name>"
> [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
> (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
> - (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
> - (clobber (reg:CC FLAGS_REG))]
> - "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)"
> + (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))]
> + "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)
> + && <nf_condition>"
> {
> bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
> switch (get_attr_type (insn))
> {
> case TYPE_LEA:
> - return "#";
> + if (TARGET_APX_NDD && <nf_applied>)
> + return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}";
> + else
> + return "#";
>
> case TYPE_INCDEC:
> if (operands[2] == const1_rtx)
> - return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
> + return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}" : "<nf_prefix>inc{w}\t%0";
> else
> {
> gcc_assert (operands[2] == constm1_rtx);
> - return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
> + return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}" : "<nf_prefix>dec{w}\t%0";
> }
>
> default:
> @@ -6581,11 +6608,11 @@
> std::swap (operands[1], operands[2]);
>
> if (x86_maybe_negate_const_int (&operands[2], HImode))
> - return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
> - : "sub{w}\t{%2, %0|%0, %2}";
> + return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}"
> + : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}";
>
> - return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
> - : "add{w}\t{%2, %0|%0, %2}";
> + return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}"
> + : "<nf_prefix>add{w}\t{%2, %0|%0, %2}";
> }
> }
> [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
> @@ -6603,33 +6630,36 @@
> (const_string "*")))
> (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
>
> -(define_insn "*addqi_1"
> +(define_insn "*addqi_1<nf_name>"
> [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
> (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
> - (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))
> - (clobber (reg:CC FLAGS_REG))]
> - "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)"
> + (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))]
> + "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)
> + && <nf_condition>"
> {
> bool widen = (get_attr_mode (insn) != MODE_QI);
> bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
> switch (get_attr_type (insn))
> {
> case TYPE_LEA:
> - return "#";
> + if (TARGET_APX_NDD && <nf_applied>)
> + return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}";
> + else
> + return "#";
>
> case TYPE_INCDEC:
> if (operands[2] == const1_rtx)
> if (use_ndd)
> - return "inc{b}\t{%1, %0|%0, %1}";
> + return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}";
> else
> - return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
> + return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0";
> else
> {
> gcc_assert (operands[2] == constm1_rtx);
> if (use_ndd)
> - return "dec{b}\t{%1, %0|%0, %1}";
> + return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}";
> else
> - return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
> + return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0";
> }
>
> default:
> @@ -6641,16 +6671,16 @@
> if (x86_maybe_negate_const_int (&operands[2], QImode))
> {
> if (use_ndd)
> - return "sub{b}\t{%2, %1, %0|%0, %1, %2}";
> + return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}";
> else
> - return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
> - : "sub{b}\t{%2, %0|%0, %2}";
> + return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}"
> + : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}";
> }
> if (use_ndd)
> - return "add{b}\t{%2, %1, %0|%0, %1, %2}";
> + return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}";
> else
> - return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
> - : "add{b}\t{%2, %0|%0, %2}";
> + return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}"
> + : "<nf_prefix>add{b}\t{%2, %0|%0, %2}";
> }
> }
> [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
> @@ -6824,6 +6854,23 @@
> }
> })
>
> +(define_split
> + [(set (match_operand:SWI 0 "register_operand")
> + (plus:SWI (match_operand:SWI 1 "register_operand")
> + (match_operand:SWI 2 "<nonmemory_operand>")))]
> + "TARGET_APX_NF && reload_completed
> + && ix86_lea_for_add_ok (insn, operands)"
> + [(set (match_dup 0)
> + (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
> +{
> + if (<MODE>mode != <LEAMODE>mode)
> + {
> + operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
> + operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
> + operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
> + }
> +})
> +
> ;; Convert add to the lea pattern to avoid flags dependency.
> (define_split
> [(set (match_operand:DI 0 "register_operand")
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 7151fb1b147..b6f28a2b4bd 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1336,6 +1336,9 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
> EnumValue
> Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
>
> +EnumValue
> +Enum(apx_features) String(nf) Value(apx_nf) Set(6)
> +
> EnumValue
> Enum(apx_features) String(all) Value(apx_all) Set(1)
>
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> index 0eb751ad225..0ff4df0780c 100644
> --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
> +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile { target { ! ia32 } } } */
> -/* { dg-options "-mapxf -march=x86-64 -O2" } */
> +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } */
> /* { dg-final { scan-assembler-not "movl"} } */
>
> #include <stdint.h>
> --
> 2.31.1
>
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2024-06-03 1:09 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <20240529023704.3728291-1-lingling.kong@intel.com>
2024-05-29 5:10 ` [PATCH v3 1/8] [APX NF]: Support APX NF add Kong, Lingling
2024-06-03 1:08 ` Hongtao Liu
[not found] ` <20240529023704.3728291-2-lingling.kong@intel.com>
2024-05-29 5:10 ` [PATCH v3 2/8] [APX NF] Support APX NF for {sub/and/or/xor/neg} Kong, Lingling
[not found] ` <20240529023704.3728291-3-lingling.kong@intel.com>
2024-05-29 5:10 ` [PATCH v3 3/8] [APX NF] Support APX NF for left shift insns Kong, Lingling
[not found] ` <20240529023704.3728291-4-lingling.kong@intel.com>
2024-05-29 5:10 ` [PATCH v3 4/8] [APX NF] Support APX NF for right " Kong, Lingling
[not found] ` <20240529023704.3728291-5-lingling.kong@intel.com>
2024-05-29 5:10 ` [PATCH v3 5/8] [APX NF] Support APX NF for rotate insns Kong, Lingling
[not found] ` <20240529023704.3728291-6-lingling.kong@intel.com>
2024-05-29 5:10 ` [PATCH v3 6/8] [APX NF] Support APX NF for shld/shrd Kong, Lingling
[not found] ` <20240529023704.3728291-7-lingling.kong@intel.com>
2024-05-29 5:11 ` [PATCH v3 7/8] [APX NF] Support APX NF for mul/div Kong, Lingling
[not found] ` <20240529023704.3728291-8-lingling.kong@intel.com>
2024-05-29 5:11 ` [PATCH v3 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt Kong, Lingling
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).