* [PATCH v3 1/8] [APX NF]: Support APX NF add [not found] <20240529023704.3728291-1-lingling.kong@intel.com> @ 2024-05-29 5:10 ` Kong, Lingling 2024-06-03 1:08 ` Hongtao Liu [not found] ` <20240529023704.3728291-2-lingling.kong@intel.com> ` (6 subsequent siblings) 7 siblings, 1 reply; 9+ messages in thread From: Kong, Lingling @ 2024-05-29 5:10 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak Hi, compared with v2, these patches restored the original lea patten position and addressed hongtao's comment. APX NF(no flags) feature implements suppresses the update of status flags for arithmetic operations. For NF add, it is not clear whether nf add can be faster than lea. If so, the pattern needs to be adjusted to perfer lea generation. gcc/ChangeLog: * config/i386/i386-opts.h (enum apx_features): Add nf enumeration. * config/i386/i386.h (TARGET_APX_NF): New. * config/i386/i386.md (*add<mode>_1_nf): New define_insn. * config/i386/i386.opt: Add apx_nf enumeration. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Fixed test. Co-authored-by: Lingling Kong <lingling.kong@intel.com> --- gcc/config/i386/i386-opts.h | 3 +- gcc/config/i386/i386.h | 1 + gcc/config/i386/i386.md | 135 ++++++++++++++++-------- gcc/config/i386/i386.opt | 3 + gcc/testsuite/gcc.target/i386/apx-ndd.c | 2 +- 5 files changed, 98 insertions(+), 46 deletions(-) diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h index ef2825803b3..60176ce609f 100644 --- a/gcc/config/i386/i386-opts.h +++ b/gcc/config/i386/i386-opts.h @@ -140,7 +140,8 @@ enum apx_features { apx_push2pop2 = 1 << 1, apx_ndd = 1 << 2, apx_ppx = 1 << 3, - apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx, + apx_nf = 1<< 4, + apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf, }; #endif diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 359a8408263..969391d3013 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2) #define TARGET_APX_NDD (ix86_apx_features & apx_ndd) #define TARGET_APX_PPX (ix86_apx_features & apx_ppx) +#define TARGET_APX_NF (ix86_apx_features & apx_nf) #include "config/vxworks-dummy.h" diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e8073f5a200..1eeadaddeba 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -6290,6 +6290,13 @@ [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1))) (clobber (reg:CC FLAGS_REG))])] "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));") + +(define_split + [(set (match_operand:SWI48 0 "general_reg_operand") + (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))] + "TARGET_APX_NF && reload_completed" + [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))] + "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));") ;; Add instructions @@ -6437,48 +6444,65 @@ (clobber (reg:CC FLAGS_REG))])] "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);") -(define_insn "*add<mode>_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r") +(define_subst_attr "nf_name" "nf_subst" "_nf" "") +(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "") +(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true") +(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m") +(define_subst_attr "nf_applied" "nf_subst" "true" "false") + +(define_subst "nf_subst" + [(set (match_operand:SWI 0) + (match_operand:SWI 1))] + "" + [(set (match_dup 0) + (match_dup 1)) + (clobber (reg:CC FLAGS_REG))]) + +(define_insn "*add<mode>_1<nf_name>" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,r,r") (plus:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r") - (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r") + (match_operand:SWI48 2 "x86_64_general_operand" "r,e,BM,0,le,r,e,BM")))] + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: - return "#"; + if (TARGET_APX_NDD && <nf_applied>) + return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"; + else + return "#"; case TYPE_INCDEC: if (operands[2] == const1_rtx) - return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}" - : "inc{<imodesuffix>}\t%0"; + return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}" + : "<nf_prefix>inc{<imodesuffix>}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); - return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}" - : "dec{<imodesuffix>}\t%0"; + return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}" + : "<nf_prefix>dec{<imodesuffix>}\t%0"; } default: /* For most processors, ADD is faster than LEA. This alternative was added to use ADD as much as possible. */ - if (which_alternative == 2) + if (which_alternative == 3) std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) - return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}"; - return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "add{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd") + [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd") (set (attr "type") - (cond [(eq_attr "alternative" "3") + (cond [(eq_attr "alternative" "4") (const_string "lea") (match_operand:SWI48 2 "incdec_operand") (const_string "incdec") @@ -6552,26 +6576,29 @@ (const_string "*"))) (set_attr "mode" "SI")]) -(define_insn "*addhi_1" +(define_insn "*addhi_1<nf_name>" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r") (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r") - (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)" + (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))] + "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: - return "#"; + if (TARGET_APX_NDD && <nf_applied>) + return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}"; + else + return "#"; case TYPE_INCDEC: if (operands[2] == const1_rtx) - return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0"; + return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}" : "<nf_prefix>inc{w}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); - return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0"; + return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}" : "<nf_prefix>dec{w}\t%0"; } default: @@ -6581,11 +6608,11 @@ std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], HImode)) - return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}" - : "sub{w}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}"; - return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}" - : "add{w}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>add{w}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd") @@ -6603,33 +6630,36 @@ (const_string "*"))) (set_attr "mode" "HI,HI,HI,SI,HI,HI")]) -(define_insn "*addqi_1" +(define_insn "*addqi_1<nf_name>" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r") (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r") - (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))] + "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool widen = (get_attr_mode (insn) != MODE_QI); bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: - return "#"; + if (TARGET_APX_NDD && <nf_applied>) + return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}"; + else + return "#"; case TYPE_INCDEC: if (operands[2] == const1_rtx) if (use_ndd) - return "inc{b}\t{%1, %0|%0, %1}"; + return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}"; else - return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; + return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); if (use_ndd) - return "dec{b}\t{%1, %0|%0, %1}"; + return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}"; else - return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0"; } default: @@ -6641,16 +6671,16 @@ if (x86_maybe_negate_const_int (&operands[2], QImode)) { if (use_ndd) - return "sub{b}\t{%2, %1, %0|%0, %1, %2}"; + return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}"; else - return widen ? "sub{l}\t{%2, %k0|%k0, %2}" - : "sub{b}\t{%2, %0|%0, %2}"; + return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}" + : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}"; } if (use_ndd) - return "add{b}\t{%2, %1, %0|%0, %1, %2}"; + return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}"; else - return widen ? "add{l}\t{%k2, %k0|%k0, %k2}" - : "add{b}\t{%2, %0|%0, %2}"; + return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}" + : "<nf_prefix>add{b}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd") @@ -6824,6 +6854,23 @@ } }) +(define_split + [(set (match_operand:SWI 0 "register_operand") + (plus:SWI (match_operand:SWI 1 "register_operand") + (match_operand:SWI 2 "<nonmemory_operand>")))] + "TARGET_APX_NF && reload_completed + && ix86_lea_for_add_ok (insn, operands)" + [(set (match_dup 0) + (plus:<LEAMODE> (match_dup 1) (match_dup 2)))] +{ + if (<MODE>mode != <LEAMODE>mode) + { + operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]); + operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]); + operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]); + } +}) + ;; Convert add to the lea pattern to avoid flags dependency. (define_split [(set (match_operand:DI 0 "register_operand") diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 7151fb1b147..b6f28a2b4bd 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1336,6 +1336,9 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4) EnumValue Enum(apx_features) String(ppx) Value(apx_ppx) Set(5) +EnumValue +Enum(apx_features) String(nf) Value(apx_nf) Set(6) + EnumValue Enum(apx_features) String(all) Value(apx_all) Set(1) diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index 0eb751ad225..0ff4df0780c 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-mapxf -march=x86-64 -O2" } */ +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } */ /* { dg-final { scan-assembler-not "movl"} } */ #include <stdint.h> -- 2.31.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v3 1/8] [APX NF]: Support APX NF add 2024-05-29 5:10 ` [PATCH v3 1/8] [APX NF]: Support APX NF add Kong, Lingling @ 2024-06-03 1:08 ` Hongtao Liu 0 siblings, 0 replies; 9+ messages in thread From: Hongtao Liu @ 2024-06-03 1:08 UTC (permalink / raw) To: Kong, Lingling; +Cc: gcc-patches, Liu, Hongtao, Uros Bizjak On Wed, May 29, 2024 at 1:11 PM Kong, Lingling <lingling.kong@intel.com> wrote: > > Hi, compared with v2, these patches restored the original lea patten position and addressed hongtao's comment. > > APX NF(no flags) feature implements suppresses the update of status flags > for arithmetic operations. Ok for the patch and the remaining. [PATCH v3 1/8] [APX NF]: Support APX NF add Kong, Lingling [PATCH v3 2/8] [APX NF] Support APX NF for {sub/and/or/xor/neg} Kong, Lingling [PATCH v3 3/8] [APX NF] Support APX NF for left shift insns Kong, Lingling [PATCH v3 4/8] [APX NF] Support APX NF for right shift insns Kong, Lingling [PATCH v3 5/8] [APX NF] Support APX NF for rotate insns Kong, Lingling [PATCH v3 6/8] [APX NF] Support APX NF for shld/shrd Kong, Lingling [PATCH v3 7/8] [APX NF] Support APX NF for mul/div Kong, Lingling [PATCH v3 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt Kong, Lingling > > For NF add, it is not clear whether nf add can be faster than lea. If so, > the pattern needs to be adjusted to perfer lea generation. > > gcc/ChangeLog: > > * config/i386/i386-opts.h (enum apx_features): Add nf > enumeration. > * config/i386/i386.h (TARGET_APX_NF): New. > * config/i386/i386.md (*add<mode>_1_nf): New define_insn. > * config/i386/i386.opt: Add apx_nf enumeration. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/apx-ndd.c: Fixed test. > > Co-authored-by: Lingling Kong <lingling.kong@intel.com> > --- > gcc/config/i386/i386-opts.h | 3 +- > gcc/config/i386/i386.h | 1 + > gcc/config/i386/i386.md | 135 ++++++++++++++++-------- > gcc/config/i386/i386.opt | 3 + > gcc/testsuite/gcc.target/i386/apx-ndd.c | 2 +- > 5 files changed, 98 insertions(+), 46 deletions(-) > > diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h > index ef2825803b3..60176ce609f 100644 > --- a/gcc/config/i386/i386-opts.h > +++ b/gcc/config/i386/i386-opts.h > @@ -140,7 +140,8 @@ enum apx_features { > apx_push2pop2 = 1 << 1, > apx_ndd = 1 << 2, > apx_ppx = 1 << 3, > - apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx, > + apx_nf = 1<< 4, > + apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf, > }; > > #endif > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 359a8408263..969391d3013 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2) > #define TARGET_APX_NDD (ix86_apx_features & apx_ndd) > #define TARGET_APX_PPX (ix86_apx_features & apx_ppx) > +#define TARGET_APX_NF (ix86_apx_features & apx_nf) > > #include "config/vxworks-dummy.h" > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index e8073f5a200..1eeadaddeba 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -6290,6 +6290,13 @@ > [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1))) > (clobber (reg:CC FLAGS_REG))])] > "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));") > + > +(define_split > + [(set (match_operand:SWI48 0 "general_reg_operand") > + (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))] > + "TARGET_APX_NF && reload_completed" > + [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))] > + "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));") > > > ;; Add instructions > > @@ -6437,48 +6444,65 @@ > (clobber (reg:CC FLAGS_REG))])] > "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);") > > -(define_insn "*add<mode>_1" > - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r") > +(define_subst_attr "nf_name" "nf_subst" "_nf" "") > +(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "") > +(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true") > +(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m") > +(define_subst_attr "nf_applied" "nf_subst" "true" "false") > + > +(define_subst "nf_subst" > + [(set (match_operand:SWI 0) > + (match_operand:SWI 1))] > + "" > + [(set (match_dup 0) > + (match_dup 1)) > + (clobber (reg:CC FLAGS_REG))]) > + > +(define_insn "*add<mode>_1<nf_name>" > + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,r,r") > (plus:SWI48 > - (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r") > - (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM"))) > - (clobber (reg:CC FLAGS_REG))] > - "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)" > + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r") > + (match_operand:SWI48 2 "x86_64_general_operand" "r,e,BM,0,le,r,e,BM")))] > + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD) > + && <nf_condition>" > { > bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; > switch (get_attr_type (insn)) > { > case TYPE_LEA: > - return "#"; > + if (TARGET_APX_NDD && <nf_applied>) > + return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"; > + else > + return "#"; > > case TYPE_INCDEC: > if (operands[2] == const1_rtx) > - return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}" > - : "inc{<imodesuffix>}\t%0"; > + return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}" > + : "<nf_prefix>inc{<imodesuffix>}\t%0"; > else > { > gcc_assert (operands[2] == constm1_rtx); > - return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}" > - : "dec{<imodesuffix>}\t%0"; > + return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}" > + : "<nf_prefix>dec{<imodesuffix>}\t%0"; > } > > default: > /* For most processors, ADD is faster than LEA. This alternative > was added to use ADD as much as possible. */ > - if (which_alternative == 2) > + if (which_alternative == 3) > std::swap (operands[1], operands[2]); > > if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) > - return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" > - : "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; > + return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" > + : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}"; > > - return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" > - : "add{<imodesuffix>}\t{%2, %0|%0, %2}"; > + return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" > + : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}"; > } > } > - [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd") > + [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd") > (set (attr "type") > - (cond [(eq_attr "alternative" "3") > + (cond [(eq_attr "alternative" "4") > (const_string "lea") > (match_operand:SWI48 2 "incdec_operand") > (const_string "incdec") > @@ -6552,26 +6576,29 @@ > (const_string "*"))) > (set_attr "mode" "SI")]) > > -(define_insn "*addhi_1" > +(define_insn "*addhi_1<nf_name>" > [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r") > (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r") > - (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m"))) > - (clobber (reg:CC FLAGS_REG))] > - "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)" > + (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))] > + "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD) > + && <nf_condition>" > { > bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; > switch (get_attr_type (insn)) > { > case TYPE_LEA: > - return "#"; > + if (TARGET_APX_NDD && <nf_applied>) > + return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}"; > + else > + return "#"; > > case TYPE_INCDEC: > if (operands[2] == const1_rtx) > - return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0"; > + return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}" : "<nf_prefix>inc{w}\t%0"; > else > { > gcc_assert (operands[2] == constm1_rtx); > - return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0"; > + return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}" : "<nf_prefix>dec{w}\t%0"; > } > > default: > @@ -6581,11 +6608,11 @@ > std::swap (operands[1], operands[2]); > > if (x86_maybe_negate_const_int (&operands[2], HImode)) > - return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}" > - : "sub{w}\t{%2, %0|%0, %2}"; > + return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}" > + : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}"; > > - return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}" > - : "add{w}\t{%2, %0|%0, %2}"; > + return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}" > + : "<nf_prefix>add{w}\t{%2, %0|%0, %2}"; > } > } > [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd") > @@ -6603,33 +6630,36 @@ > (const_string "*"))) > (set_attr "mode" "HI,HI,HI,SI,HI,HI")]) > > -(define_insn "*addqi_1" > +(define_insn "*addqi_1<nf_name>" > [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r") > (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r") > - (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m"))) > - (clobber (reg:CC FLAGS_REG))] > - "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)" > + (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))] > + "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD) > + && <nf_condition>" > { > bool widen = (get_attr_mode (insn) != MODE_QI); > bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; > switch (get_attr_type (insn)) > { > case TYPE_LEA: > - return "#"; > + if (TARGET_APX_NDD && <nf_applied>) > + return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}"; > + else > + return "#"; > > case TYPE_INCDEC: > if (operands[2] == const1_rtx) > if (use_ndd) > - return "inc{b}\t{%1, %0|%0, %1}"; > + return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}"; > else > - return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; > + return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0"; > else > { > gcc_assert (operands[2] == constm1_rtx); > if (use_ndd) > - return "dec{b}\t{%1, %0|%0, %1}"; > + return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}"; > else > - return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; > + return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0"; > } > > default: > @@ -6641,16 +6671,16 @@ > if (x86_maybe_negate_const_int (&operands[2], QImode)) > { > if (use_ndd) > - return "sub{b}\t{%2, %1, %0|%0, %1, %2}"; > + return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}"; > else > - return widen ? "sub{l}\t{%2, %k0|%k0, %2}" > - : "sub{b}\t{%2, %0|%0, %2}"; > + return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}" > + : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}"; > } > if (use_ndd) > - return "add{b}\t{%2, %1, %0|%0, %1, %2}"; > + return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}"; > else > - return widen ? "add{l}\t{%k2, %k0|%k0, %k2}" > - : "add{b}\t{%2, %0|%0, %2}"; > + return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}" > + : "<nf_prefix>add{b}\t{%2, %0|%0, %2}"; > } > } > [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd") > @@ -6824,6 +6854,23 @@ > } > }) > > +(define_split > + [(set (match_operand:SWI 0 "register_operand") > + (plus:SWI (match_operand:SWI 1 "register_operand") > + (match_operand:SWI 2 "<nonmemory_operand>")))] > + "TARGET_APX_NF && reload_completed > + && ix86_lea_for_add_ok (insn, operands)" > + [(set (match_dup 0) > + (plus:<LEAMODE> (match_dup 1) (match_dup 2)))] > +{ > + if (<MODE>mode != <LEAMODE>mode) > + { > + operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]); > + operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]); > + operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]); > + } > +}) > + > ;; Convert add to the lea pattern to avoid flags dependency. > (define_split > [(set (match_operand:DI 0 "register_operand") > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > index 7151fb1b147..b6f28a2b4bd 100644 > --- a/gcc/config/i386/i386.opt > +++ b/gcc/config/i386/i386.opt > @@ -1336,6 +1336,9 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4) > EnumValue > Enum(apx_features) String(ppx) Value(apx_ppx) Set(5) > > +EnumValue > +Enum(apx_features) String(nf) Value(apx_nf) Set(6) > + > EnumValue > Enum(apx_features) String(all) Value(apx_all) Set(1) > > diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c > index 0eb751ad225..0ff4df0780c 100644 > --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c > +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target { ! ia32 } } } */ > -/* { dg-options "-mapxf -march=x86-64 -O2" } */ > +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } */ > /* { dg-final { scan-assembler-not "movl"} } */ > > #include <stdint.h> > -- > 2.31.1 > -- BR, Hongtao ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20240529023704.3728291-2-lingling.kong@intel.com>]
* [PATCH v3 2/8] [APX NF] Support APX NF for {sub/and/or/xor/neg} [not found] ` <20240529023704.3728291-2-lingling.kong@intel.com> @ 2024-05-29 5:10 ` Kong, Lingling 0 siblings, 0 replies; 9+ messages in thread From: Kong, Lingling @ 2024-05-29 5:10 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak gcc/ChangeLog: * config/i386/i386.md (nf_nonf_attr): New subst_attr. (nf_nonf_x64_attr): Ditto. (*sub<mode>_1_nf): New define_insn. (*anddi_1_nf): Ditto. (*and<mode>_1_nf): Ditto. (*<code>qi_1_nf): Ditto. (*<code><mode>_1_nf): Ditto. (*neg<mode>_1_nf): Ditto. * config/i386/sse.md : New define_split. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-nf.c: Add test. --- gcc/config/i386/i386.md | 173 +++++++++++++------------ gcc/config/i386/sse.md | 11 ++ gcc/testsuite/gcc.target/i386/apx-nf.c | 12 ++ 3 files changed, 114 insertions(+), 82 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 1eeadaddeba..d3cb224abad 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -575,7 +575,7 @@ noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni, avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert, avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl, - vaes_avx512vl" + vaes_avx512vl,noapx_nf" (const_string "base")) ;; The (bounding maximum) length of an instruction immediate. @@ -981,6 +981,7 @@ (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX") (eq_attr "mmx_isa" "avx") (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX") + (eq_attr "isa" "noapx_nf") (symbol_ref "!TARGET_APX_NF") ] (const_int 1))) @@ -6449,6 +6450,8 @@ (define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true") (define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m") (define_subst_attr "nf_applied" "nf_subst" "true" "false") +(define_subst_attr "nf_nonf_attr" "nf_subst" "noapx_nf" "*") +(define_subst_attr "nf_nonf_x64_attr" "nf_subst" "noapx_nf" "x64") (define_subst "nf_subst" [(set (match_operand:SWI 0) @@ -7893,20 +7896,21 @@ "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);" [(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) -(define_insn "*sub<mode>_1" - [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r,r") +(define_insn "*sub<mode>_1<nf_name>" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r<nf_mem_constraint>,<r>,r,r,r") (minus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,rjM,r") - (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r,<i>,<m>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:SWI 1 "nonimmediate_operand" "0,0,0,rm,rjM,r") + (match_operand:SWI 2 "<general_operand>" "<r>,<i>,<m>,r,<i>,<m>")))] + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" "@ - sub{<imodesuffix>}\t{%2, %0|%0, %2} - sub{<imodesuffix>}\t{%2, %0|%0, %2} - sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} - sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} - sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd") + <nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2} + <nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2} + <nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2} + <nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "<MODE>")]) @@ -11795,27 +11799,28 @@ } [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")]) -(define_insn "*anddi_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,r,?k") +(define_insn "*anddi_1<nf_name>" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r<nf_mem_constraint>,r,r,r,r,r,?k") (and:DI - (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,rm,rjM,r,qm,k") - (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,re,m,r,e,m,L,k"))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,0,rm,rjM,r,qm,k") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,r,e,m,r,e,m,L,k")))] "TARGET_64BIT - && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)" + && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD) + && <nf_condition>" "@ - and{l}\t{%k2, %k0|%k0, %k2} - and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2} - and{q}\t{%2, %0|%0, %2} - and{q}\t{%2, %0|%0, %2} - and{q}\t{%2, %1, %0|%0, %1, %2} - and{q}\t{%2, %1, %0|%0, %1, %2} - and{q}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>and{l}\t{%k2, %k0|%k0, %k2} + <nf_prefix>and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2} + <nf_prefix>and{q}\t{%2, %0|%0, %2} + <nf_prefix>and{q}\t{%2, %0|%0, %2} + <nf_prefix>and{q}\t{%2, %0|%0, %2} + <nf_prefix>and{q}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>and{q}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>and{q}\t{%2, %1, %0|%0, %1, %2} # #" - [(set_attr "isa" "x64,apx_ndd,x64,x64,apx_ndd,apx_ndd,apx_ndd,x64,avx512bw") - (set_attr "type" "alu,alu,alu,alu,alu,alu,alu,imovx,msklog") - (set_attr "length_immediate" "*,*,*,*,*,*,*,0,*") + [(set_attr "isa" "x64,apx_ndd,x64,x64,x64,apx_ndd,apx_ndd,apx_ndd,<nf_nonf_x64_attr>,avx512bw") + (set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,imovx,msklog") + (set_attr "length_immediate" "*,*,*,*,*,*,*,*,0,*") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") @@ -11823,7 +11828,7 @@ (match_operand 1 "ext_QIreg_operand"))) (const_string "1") (const_string "*"))) - (set_attr "mode" "SI,SI,DI,DI,DI,DI,DI,SI,DI")]) + (set_attr "mode" "SI,SI,DI,DI,DI,DI,DI,DI,SI,DI")]) (define_insn_and_split "*anddi_1_btr" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") @@ -11894,31 +11899,34 @@ (set_attr "isa" "*,apx_ndd,apx_ndd,apx_ndd") (set_attr "mode" "SI")]) -(define_insn "*and<mode>_1" - [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,r,r,r,Ya,?k") - (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,rm,rjM,r,qm,k") - (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,r,<i>,<m>,L,k"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)" +(define_insn "*and<mode>_1<nf_name>" + [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,Ya,?k") + (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,0,rm,rjM,r,qm,k") + (match_operand:SWI24 2 "<general_operand>" "r,<i>,<m>,r,<i>,<m>,L,k")))] + "ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" "@ - and{<imodesuffix>}\t{%2, %0|%0, %2} - and{<imodesuffix>}\t{%2, %0|%0, %2} - and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} - and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} - and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>and{<imodesuffix>}\t{%2, %0|%0, %2} + <nf_prefix>and{<imodesuffix>}\t{%2, %0|%0, %2} + <nf_prefix>and{<imodesuffix>}\t{%2, %0|%0, %2} + <nf_prefix>and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} # #" [(set (attr "isa") - (cond [(eq_attr "alternative" "2,3,4") + (cond [(eq_attr "alternative" "3,4,5") (const_string "apx_ndd") (eq_attr "alternative" "6") + (const_string "<nf_nonf_attr>") + (eq_attr "alternative" "7") (if_then_else (eq_attr "mode" "SI") (const_string "avx512bw") (const_string "avx512f")) ] (const_string "*"))) - (set_attr "type" "alu,alu,alu,alu,alu,imovx,msklog") - (set_attr "length_immediate" "*,*,*,*,*,0,*") + (set_attr "type" "alu,alu,alu,alu,alu,alu,imovx,msklog") + (set_attr "length_immediate" "*,*,*,*,*,*,0,*") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") @@ -11926,20 +11934,20 @@ (match_operand 1 "ext_QIreg_operand"))) (const_string "1") (const_string "*"))) - (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")]) + (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")]) -(define_insn "*andqi_1" +(define_insn "*andqi_1<nf_name>" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k") (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k") - (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))] + "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD) + && <nf_condition>" "@ - and{b}\t{%2, %0|%0, %2} - and{b}\t{%2, %0|%0, %2} - and{l}\t{%k2, %k0|%k0, %k2} - and{b}\t{%2, %1, %0|%0, %1, %2} - and{b}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>and{b}\t{%2, %0|%0, %2} + <nf_prefix>and{b}\t{%2, %0|%0, %2} + <nf_prefix>and{l}\t{%k2, %k0|%k0, %k2} + <nf_prefix>and{b}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>and{b}\t{%2, %1, %0|%0, %1, %2} #" [(set_attr "type" "alu,alu,alu,alu,alu,msklog") (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*") @@ -12802,22 +12810,23 @@ } [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")]) -(define_insn "*<code><mode>_1" - [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,r,?k") +(define_insn "*<code><mode>_1<nf_name>" + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,?k") (any_or:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,rjM,r,k") - (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r,<i>,<m>,k"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,0,rm,rjM,r,k") + (match_operand:SWI248 2 "<general_operand>" "r,<i>,<m>,r,<i>,<m>,k")))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" "@ - <logic>{<imodesuffix>}\t{%2, %0|%0, %2} - <logic>{<imodesuffix>}\t{%2, %0|%0, %2} - <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} - <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} - <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix><logic>{<imodesuffix>}\t{%2, %0|%0, %2} + <nf_prefix><logic>{<imodesuffix>}\t{%2, %0|%0, %2} + <nf_prefix><logic>{<imodesuffix>}\t{%2, %0|%0, %2} + <nf_prefix><logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix><logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix><logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} #" - [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,<kmov_isa>") - (set_attr "type" "alu, alu, alu, alu, alu, msklog") + [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd,<kmov_isa>") + (set_attr "type" "alu,alu, alu, alu, alu, alu, msklog") (set_attr "mode" "<MODE>")]) (define_insn_and_split "*notxor<mode>_1" @@ -12963,18 +12972,18 @@ (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) -(define_insn "*<code>qi_1" +(define_insn "*<code>qi_1<nf_name>" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k") (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k") - (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (<CODE>, QImode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))] + "ix86_binary_operator_ok (<CODE>, QImode, operands, TARGET_APX_NDD) + && <nf_condition>" "@ - <logic>{b}\t{%2, %0|%0, %2} - <logic>{b}\t{%2, %0|%0, %2} - <logic>{l}\t{%k2, %k0|%k0, %k2} - <logic>{b}\t{%2, %1, %0|%0, %1, %2} - <logic>{b}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix><logic>{b}\t{%2, %0|%0, %2} + <nf_prefix><logic>{b}\t{%2, %0|%0, %2} + <nf_prefix><logic>{l}\t{%k2, %k0|%k0, %k2} + <nf_prefix><logic>{b}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix><logic>{b}\t{%2, %1, %0|%0, %1, %2} #" [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f") (set_attr "type" "alu,alu,alu,alu,alu,msklog") @@ -13534,14 +13543,14 @@ (const_int 0))) (clobber (reg:CC FLAGS_REG))])]) -(define_insn "*neg<mode>_1" +(define_insn "*neg<mode>_1<nf_name>" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r") - (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD)" + (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")))] + "ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" "@ - neg{<imodesuffix>}\t%0 - neg{<imodesuffix>}\t{%1, %0|%0, %1}" + <nf_prefix>neg{<imodesuffix>}\t%0 + <nf_prefix>neg{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "negnot") (set_attr "isa" "*,apx_ndd") (set_attr "mode" "<MODE>")]) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0f4fbcb2c5d..cdf11a68bc5 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1986,6 +1986,17 @@ ] (const_string "<MODE>")))]) +(define_split + [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand") + (any_logic:SWI1248_AVX512BW + (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand") + (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))] + "TARGET_AVX512F && reload_completed" + [(parallel + [(set (match_dup 0) + (any_logic:SWI1248_AVX512BW (match_dup 1) (match_dup 2))) + (unspec [(const_int 0)] UNSPEC_MASKOP)])]) + (define_split [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand") (any_logic:SWI1248_AVX512BW diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c new file mode 100644 index 00000000000..f33a994f0b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-nf.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 -O2" } */ +/* { dg-final { scan-assembler-times "\{nf\} and" 1 } } */ +/* { dg-final { scan-assembler-times "\{nf\} or" 1 } } */ + +struct B { unsigned bit0 : 1; unsigned bit1 : 1; }; + +void +foo (struct B *b) +{ + b->bit0 = b->bit0 | b->bit1; +} -- 2.31.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20240529023704.3728291-3-lingling.kong@intel.com>]
* [PATCH v3 3/8] [APX NF] Support APX NF for left shift insns [not found] ` <20240529023704.3728291-3-lingling.kong@intel.com> @ 2024-05-29 5:10 ` Kong, Lingling 0 siblings, 0 replies; 9+ messages in thread From: Kong, Lingling @ 2024-05-29 5:10 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak gcc/ChangeLog: * config/i386/i386.md (*ashl<mode>3_1_nf): New. (*ashlhi3_1_nf): Ditto. (*ashlqi3_1_nf): Ditto. * config/i386/sse.md: New define_split. --- gcc/config/i386/i386.md | 96 ++++++++++++++++++++++++++++++----------- gcc/config/i386/sse.md | 13 ++++++ 2 files changed, 83 insertions(+), 26 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d3cb224abad..4c06c243cc3 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -15011,17 +15011,22 @@ [(set_attr "type" "ishiftx") (set_attr "mode" "<MODE>")]) -(define_insn "*ashl<mode>3_1" +(define_insn "*ashl<mode>3_1<nf_name>" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm") - (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))] + "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: + if (TARGET_APX_NDD && <nf_applied>) + return "%{nf%} sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"; + else + return "#"; + case TYPE_ISHIFTX: case TYPE_MSKLOG: return "#"; @@ -15029,7 +15034,7 @@ case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); gcc_assert (rtx_equal_p (operands[0], operands[1])); - return "add{<imodesuffix>}\t%0, %0"; + return "<nf_prefix>add{<imodesuffix>}\t%0, %0"; default: if (operands[2] == const1_rtx @@ -15037,11 +15042,11 @@ /* For NDD form instructions related to TARGET_SHIFT1, the $1 immediate do not need to be omitted as assembler will map it to use shorter encoding. */ - && !use_ndd) + && !use_ndd && !<nf_applied>) return "sal{<imodesuffix>}\t%0"; else - return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "sal{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>sal{<imodesuffix>}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd") @@ -15072,6 +15077,17 @@ (set_attr "mode" "<MODE>")]) ;; Convert shift to the shiftx pattern to avoid flags dependency. +;; For NF/NDD doesn't support shift count as r, it just support c<S>, +;; and it has no flag. +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:QI 2 "register_operand")))] + "TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (ashift:SWI48 (match_dup 1) (match_dup 2)))] + "operands[2] = gen_lowpart (<MODE>mode, operands[2]);") + (define_split [(set (match_operand:SWI48 0 "register_operand") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") @@ -15158,32 +15174,37 @@ (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] "operands[2] = gen_lowpart (SImode, operands[2]);") -(define_insn "*ashlhi3_1" +(define_insn "*ashlhi3_1<nf_name>" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r") (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm") - (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))] + "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: + if (TARGET_APX_NDD && <nf_applied>) + return "%{nf%} sal{w}\t{%2, %1, %0|%0, %1, %2}"; + else + return "#"; + case TYPE_MSKLOG: return "#"; case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); - return "add{w}\t%0, %0"; + return "<nf_prefix>add{w}\t%0, %0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "sal{w}\t%0"; else - return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}" - : "sal{w}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>sal{w}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>sal{w}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,avx512f,apx_ndd") @@ -15211,31 +15232,36 @@ (const_string "*"))) (set_attr "mode" "HI,SI,HI,HI")]) -(define_insn "*ashlqi3_1" +(define_insn "*ashlqi3_1<nf_name>" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r") (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm") - (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))] + "ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: + if (TARGET_APX_NDD && <nf_applied>) + return "%{nf%} sal{b}\t{%2, %1, %0|%0, %1, %2}"; + else + return "#"; + case TYPE_MSKLOG: return "#"; case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1]))) - return "add{l}\t%k0, %k0"; + return "<nf_prefix>add{l}\t%k0, %k0"; else - return "add{b}\t%0, %0"; + return "<nf_prefix>add{b}\t%0, %0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) { if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t%k0"; @@ -15245,10 +15271,10 @@ else { if (get_attr_mode (insn) == MODE_SI) - return "sal{l}\t{%2, %k0|%k0, %2}"; + return "<nf_prefix>sal{l}\t{%2, %k0|%k0, %2}"; else - return use_ndd ? "sal{b}\t{%2, %1, %0|%0, %1, %2}" - : "sal{b}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>sal{b}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>sal{b}\t{%2, %0|%0, %2}"; } } } @@ -15351,6 +15377,24 @@ operands[2] = GEN_INT (1 << INTVAL (operands[2])); }) +(define_split + [(set (match_operand:SWI 0 "general_reg_operand") + (ashift:SWI (match_operand:SWI 1 "index_reg_operand") + (match_operand 2 "const_0_to_3_operand")))] + "reload_completed + && REGNO (operands[0]) != REGNO (operands[1]) + && !TARGET_APX_NDD" + [(set (match_dup 0) + (mult:<LEAMODE> (match_dup 1) (match_dup 2)))] +{ + if (<MODE>mode != <LEAMODE>mode) + { + operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]); + operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]); + } + operands[2] = GEN_INT (1 << INTVAL (operands[2])); +}) + ;; Convert ashift to the lea pattern to avoid flags dependency. (define_split [(set (match_operand:DI 0 "general_reg_operand") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index cdf11a68bc5..556401ce379 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2150,6 +2150,19 @@ (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) +(define_split + [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand") + (any_lshift:SWI1248_AVX512BW + (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand") + (match_operand 2 "const_int_operand")))] + "TARGET_AVX512F && reload_completed" + [(parallel + [(set (match_dup 0) + (any_lshift:SWI1248_AVX512BW + (match_dup 1) + (match_dup 2))) + (unspec [(const_int 0)] UNSPEC_MASKOP)])]) + (define_split [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand") (any_lshift:SWI1248_AVX512BW -- 2.31.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20240529023704.3728291-4-lingling.kong@intel.com>]
* [PATCH v3 4/8] [APX NF] Support APX NF for right shift insns [not found] ` <20240529023704.3728291-4-lingling.kong@intel.com> @ 2024-05-29 5:10 ` Kong, Lingling 0 siblings, 0 replies; 9+ messages in thread From: Kong, Lingling @ 2024-05-29 5:10 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak gcc/ChangeLog: * config/i386/i386.md (*ashr<mode>3_1_nf): New. (*lshr<mode>3_1_nf): Ditto. (*lshrqi3_1_nf): Ditto. (*lshrhi3_1_nf): Ditto. --- gcc/config/i386/i386.md | 82 +++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4c06c243cc3..d10caf04fcc 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16323,13 +16323,13 @@ [(set_attr "type" "ishiftx") (set_attr "mode" "<MODE>")]) -(define_insn "*ashr<mode>3_1" +(define_insn "*ashr<mode>3_1<nf_name>" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") (ashiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm") - (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))] + "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) @@ -16340,11 +16340,11 @@ default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "sar{<imodesuffix>}\t%0"; else - return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "sar{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,bmi2,apx_ndd") @@ -16384,14 +16384,13 @@ } [(set_attr "isa" "*,*,*,apx_ndd")]) - -(define_insn "*lshr<mode>3_1" +(define_insn "*lshr<mode>3_1<nf_name>" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r") (lshiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm") - (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))] + "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) @@ -16403,11 +16402,11 @@ default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "shr{<imodesuffix>}\t%0"; else - return use_ndd ? "shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "shr{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>shr{<imodesuffix>}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,bmi2,avx512bw,apx_ndd") @@ -16423,6 +16422,17 @@ (set_attr "mode" "<MODE>")]) ;; Convert shift to the shiftx pattern to avoid flags dependency. +;; For NF/NDD doesn't support shift count as r, it just support c<S>, +;; and it has no flag. +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:QI 2 "register_operand")))] + "TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))] + "operands[2] = gen_lowpart (<MODE>mode, operands[2]);") + (define_split [(set (match_operand:SWI48 0 "register_operand") (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") @@ -16491,22 +16501,22 @@ (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))] "operands[2] = gen_lowpart (SImode, operands[2]);") -(define_insn "*ashr<mode>3_1" +(define_insn "*ashr<mode>3_1<nf_name>" [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r") (ashiftrt:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0, rm") - (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))] + "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "sar{<imodesuffix>}\t%0"; else - return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "sar{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}"; } [(set_attr "isa" "*, apx_ndd") (set_attr "type" "ishift") @@ -16519,13 +16529,13 @@ (const_string "*"))) (set_attr "mode" "<MODE>")]) -(define_insn "*lshrqi3_1" +(define_insn "*lshrqi3_1<nf_name>" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k,r") (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0, k, rm") - (match_operand:QI 2 "nonmemory_operand" "cI,Wb,cI"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "cI,Wb,cI")))] + "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) @@ -16533,11 +16543,11 @@ case TYPE_ISHIFT: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "shr{b}\t%0"; else - return use_ndd ? "shr{b}\t{%2, %1, %0|%0, %1, %2}" - : "shr{b}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>shr{b}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>shr{b}\t{%2, %0|%0, %2}"; case TYPE_MSKLOG: return "#"; default: @@ -16556,13 +16566,13 @@ (const_string "*"))) (set_attr "mode" "QI")]) -(define_insn "*lshrhi3_1" +(define_insn "*lshrhi3_1<nf_name>" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r") (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0, k, rm") - (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI")))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) @@ -16570,11 +16580,11 @@ case TYPE_ISHIFT: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "shr{w}\t%0"; else - return use_ndd ? "shr{w}\t{%2, %1, %0|%0, %1, %2}" - : "shr{w}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>shr{w}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>shr{w}\t{%2, %0|%0, %2}"; case TYPE_MSKLOG: return "#"; default: -- 2.31.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20240529023704.3728291-5-lingling.kong@intel.com>]
* [PATCH v3 5/8] [APX NF] Support APX NF for rotate insns [not found] ` <20240529023704.3728291-5-lingling.kong@intel.com> @ 2024-05-29 5:10 ` Kong, Lingling 0 siblings, 0 replies; 9+ messages in thread From: Kong, Lingling @ 2024-05-29 5:10 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak gcc/ChangeLog: * config/i386/i386.md (ashr<mode>3_cvt_nf): New define_insn. (*<insn><mode>3_1_nf): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-nf.c: Add NF test for rotate insns. --- gcc/config/i386/i386.md | 59 +++++++++++++++++--------- gcc/testsuite/gcc.target/i386/apx-nf.c | 5 +++ 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d10caf04fcc..9d518e90d07 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16245,19 +16245,19 @@ (define_mode_attr cvt_mnemonic [(SI "{cltd|cdq}") (DI "{cqto|cqo}")]) -(define_insn "ashr<mode>3_cvt" +(define_insn "ashr<mode>3_cvt<nf_name>" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r") (ashiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm") - (match_operand:QI 2 "const_int_operand"))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 2 "const_int_operand")))] "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) - && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)" + && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" "@ <cvt_mnemonic> - sar{<imodesuffix>}\t{%2, %0|%0, %2} - sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + <nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2} + <nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd") (set_attr "type" "imovx,ishift,ishift") (set_attr "prefix_0f" "0,*,*") @@ -17109,28 +17109,31 @@ [(set_attr "type" "rotatex") (set_attr "mode" "<MODE>")]) -(define_insn "*<insn><mode>3_1" +(define_insn "*<insn><mode>3_1<nf_name>" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") (any_rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm") - (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>")))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ROTATEX: - return "#"; + if (TARGET_APX_NDD && <nf_applied>) + return "%{nf%} <rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"; + else + return "#"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "<rotate>{<imodesuffix>}\t%0"; else - return use_ndd ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,bmi2,apx_ndd") @@ -17164,6 +17167,20 @@ operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize); }) +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:QI 2 "const_int_operand")))] + "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun) + && !TARGET_APX_NDD" + [(set (match_dup 0) + (rotatert:SWI48 (match_dup 1) (match_dup 2)))] +{ + int bitsize = GET_MODE_BITSIZE (<MODE>mode); + + operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize); +}) + (define_split [(set (match_operand:SWI48 0 "register_operand") (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") @@ -17251,22 +17268,22 @@ [(set (match_dup 0) (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]) -(define_insn "*<insn><mode>3_1" +(define_insn "*<insn><mode>3_1<nf_name>" [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r") (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm") - (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "<rotate>{<imodesuffix>}\t%0"; else return use_ndd - ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; + ? "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; } [(set_attr "isa" "*,apx_ndd") (set_attr "type" "rotate") diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c index f33a994f0b7..ed859b399b8 100644 --- a/gcc/testsuite/gcc.target/i386/apx-nf.c +++ b/gcc/testsuite/gcc.target/i386/apx-nf.c @@ -2,6 +2,7 @@ /* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 -O2" } */ /* { dg-final { scan-assembler-times "\{nf\} and" 1 } } */ /* { dg-final { scan-assembler-times "\{nf\} or" 1 } } */ +/* { dg-final { scan-assembler-times "\{nf\} rol" 4 } } */ struct B { unsigned bit0 : 1; unsigned bit1 : 1; }; @@ -10,3 +11,7 @@ foo (struct B *b) { b->bit0 = b->bit0 | b->bit1; } +long int f1 (int x) { return ~(1ULL << (x & 0x3f)); } +long int f2 (int x) { return ~(1ULL << x); } +long int f3 (unsigned char *x) { return ~(1ULL << (x[0] & 0x3f)); } +long int f4 (unsigned char *x) { return ~(1ULL << x[0]); } -- 2.31.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20240529023704.3728291-6-lingling.kong@intel.com>]
* [PATCH v3 6/8] [APX NF] Support APX NF for shld/shrd [not found] ` <20240529023704.3728291-6-lingling.kong@intel.com> @ 2024-05-29 5:10 ` Kong, Lingling 0 siblings, 0 replies; 9+ messages in thread From: Kong, Lingling @ 2024-05-29 5:10 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak gcc/ChangeLog: * config/i386/i386.md (x86_64_shld_nf): New define_insn. (x86_64_shld_ndd_nf): Ditto. (x86_64_shld_1_nf): Ditto. (x86_64_shld_ndd_1_nf): Ditto. (*x86_64_shld_shrd_1_nozext_nf): Ditto. (x86_shld_nf): Ditto. (x86_shld_ndd_nf): Ditto. (x86_shld_1_nf): Ditto. (x86_shld_ndd_1_nf): Ditto. (*x86_shld_shrd_1_nozext_nf): Ditto. (<insn><dwi>3_doubleword_lowpart_nf): Ditto. (x86_64_shrd_nf): Ditto. (x86_64_shrd_ndd_nf): Ditto. (x86_64_shrd_1_nf): Ditto. (x86_64_shrd_ndd_1_nf): Ditto. (*x86_64_shrd_shld_1_nozext_nf): Ditto. (x86_shrd_nf): Ditto. (x86_shrd_ndd_nf): Ditto. (x86_shrd_1_nf): Ditto. (x86_shrd_ndd_1_nf): Ditto. (*x86_shrd_shld_1_nozext_nf): Ditto. --- gcc/config/i386/i386.md | 377 +++++++++++++++++++++++++++++++--------- 1 file changed, 296 insertions(+), 81 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9d518e90d07..719cce7d3ef 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14551,7 +14551,7 @@ DONE; }) -(define_insn "x86_64_shld" +(define_insn "x86_64_shld<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (ashift:DI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc") @@ -14561,10 +14561,9 @@ (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) (minus:QI (const_int 64) - (and:QI (match_dup 2) (const_int 63)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "shld{q}\t{%2, %1, %0|%0, %1, %2}" + (and:QI (match_dup 2) (const_int 63)))) 0)))] + "TARGET_64BIT && <nf_condition>" + "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") @@ -14572,7 +14571,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_64_shld_ndd" +(define_insn "x86_64_shld_ndd<nf_name>" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") @@ -14582,14 +14581,13 @@ (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) (minus:QI (const_int 64) - (and:QI (match_dup 3) (const_int 63)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_APX_NDD" - "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (and:QI (match_dup 3) (const_int 63)))) 0)))] + "TARGET_APX_NDD && <nf_condition>" + "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) -(define_insn "x86_64_shld_1" +(define_insn "x86_64_shld_1<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (ashift:DI (match_dup 0) (match_operand:QI 2 "const_0_to_63_operand")) @@ -14597,11 +14595,11 @@ (lshiftrt:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) - (match_operand:QI 3 "const_0_to_255_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 3 "const_0_to_255_operand")) 0)))] "TARGET_64BIT - && INTVAL (operands[3]) == 64 - INTVAL (operands[2])" - "shld{q}\t{%2, %1, %0|%0, %1, %2}" + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && <nf_condition>" + "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") @@ -14610,7 +14608,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_64_shld_ndd_1" +(define_insn "x86_64_shld_ndd_1<nf_name>" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_63_operand")) @@ -14618,15 +14616,66 @@ (lshiftrt:TI (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) - (match_operand:QI 4 "const_0_to_255_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 4 "const_0_to_255_operand")) 0)))] "TARGET_APX_NDD - && INTVAL (operands[4]) == 64 - INTVAL (operands[3])" - "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + && INTVAL (operands[4]) == 64 - INTVAL (operands[3]) + && <nf_condition>" + "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "DI") (set_attr "length_immediate" "1")]) +(define_insn_and_split "*x86_64_shld_shrd_1_nozext_nf" + [(set (match_operand:DI 0 "nonimmediate_operand") + (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_63_operand")) + (lshiftrt:DI + (match_operand:DI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_63_operand"))))] + "TARGET_64BIT && TARGET_APX_NF + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (DImode, operands[4]); + emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[4], operands[3], operands[2])); + } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (DImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else + { + operands[1] = force_reg (DImode, operands[1]); + rtx tmp = gen_reg_rtx (DImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_64_shld_1_nf (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) (define_insn_and_split "*x86_64_shld_shrd_1_nozext" [(set (match_operand:DI 0 "nonimmediate_operand") @@ -14729,7 +14778,7 @@ emit_move_insn (operands[4], operands[0]); }) -(define_insn "x86_shld" +(define_insn "x86_shld<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashift:SI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic") @@ -14739,10 +14788,9 @@ (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (minus:QI (const_int 32) - (and:QI (match_dup 2) (const_int 31)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "" - "shld{l}\t{%2, %1, %0|%0, %1, %2}" + (and:QI (match_dup 2) (const_int 31)))) 0)))] + "<nf_condition>" + "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "SI") @@ -14751,7 +14799,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_shld_ndd" +(define_insn "x86_shld_ndd<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "=r") (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") @@ -14761,15 +14809,14 @@ (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) (minus:QI (const_int 32) - (and:QI (match_dup 3) (const_int 31)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_APX_NDD" - "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (and:QI (match_dup 3) (const_int 31)))) 0)))] + "TARGET_APX_NDD && <nf_condition>" + "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) -(define_insn "x86_shld_1" +(define_insn "x86_shld_1<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashift:SI (match_dup 0) (match_operand:QI 2 "const_0_to_31_operand")) @@ -14777,10 +14824,10 @@ (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) - (match_operand:QI 3 "const_0_to_63_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] - "INTVAL (operands[3]) == 32 - INTVAL (operands[2])" - "shld{l}\t{%2, %1, %0|%0, %1, %2}" + (match_operand:QI 3 "const_0_to_63_operand")) 0)))] + "INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && <nf_condition>" + "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") @@ -14790,7 +14837,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_shld_ndd_1" +(define_insn "x86_shld_ndd_1<nf_name>" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_31_operand")) @@ -14798,15 +14845,66 @@ (lshiftrt:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) - (match_operand:QI 4 "const_0_to_63_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 4 "const_0_to_63_operand")) 0)))] "TARGET_APX_NDD - && INTVAL (operands[4]) == 32 - INTVAL (operands[3])" - "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + && INTVAL (operands[4]) == 32 - INTVAL (operands[3]) + && <nf_condition>" + "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") (set_attr "mode" "SI")]) +(define_insn_and_split "*x86_shld_shrd_1_nozext_nf" + [(set (match_operand:SI 0 "nonimmediate_operand") + (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_31_operand")) + (lshiftrt:SI + (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_31_operand"))))] + "TARGET_APX_NF && + INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shld_1_nf (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (SImode, operands[4]); + emit_insn (gen_x86_shrd_1_nf (operands[0], operands[4], operands[3], operands[2])); + } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (SImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else + { + operands[1] = force_reg (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_shld_1_nf (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) (define_insn_and_split "*x86_shld_shrd_1_nozext" [(set (match_operand:SI 0 "nonimmediate_operand") @@ -15861,6 +15959,26 @@ }) ;; Split truncations of double word right shifts into x86_shrd_1. +(define_insn_and_split "<insn><dwi>3_doubleword_lowpart_nf" + [(set (match_operand:DWIH 0 "register_operand" "=&r") + (subreg:DWIH + (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r") + (match_operand:QI 2 "const_int_operand")) 0))] + "TARGET_APX_NF && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2)) + (subreg:DWIH + (ashift:<DWI> (zero_extend:<DWI> (match_dup 3)) + (match_dup 4)) 0)))] +{ + split_double_mode (<DWI>mode, &operands[1], 1, &operands[1], &operands[3]); + operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL (operands[2])); + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + (define_insn_and_split "<insn><dwi>3_doubleword_lowpart" [(set (match_operand:DWIH 0 "register_operand" "=&r") (subreg:DWIH @@ -15884,7 +16002,7 @@ emit_move_insn (operands[0], operands[1]); }) -(define_insn "x86_64_shrd" +(define_insn "x86_64_shrd<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (lshiftrt:DI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc") @@ -15894,10 +16012,9 @@ (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) (minus:QI (const_int 64) - (and:QI (match_dup 2) (const_int 63)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "shrd{q}\t{%2, %1, %0|%0, %1, %2}" + (and:QI (match_dup 2) (const_int 63)))) 0)))] + "TARGET_64BIT && <nf_condition>" + "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") @@ -15905,7 +16022,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_64_shrd_ndd" +(define_insn "x86_64_shrd_ndd<nf_name>" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") @@ -15915,15 +16032,13 @@ (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) (minus:QI (const_int 64) - (and:QI (match_dup 3) (const_int 63)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_APX_NDD" - "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (and:QI (match_dup 3) (const_int 63)))) 0)))] + "TARGET_APX_NDD && <nf_condition>" + "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) - -(define_insn "x86_64_shrd_1" +(define_insn "x86_64_shrd_1<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (lshiftrt:DI (match_dup 0) (match_operand:QI 2 "const_0_to_63_operand")) @@ -15931,11 +16046,11 @@ (ashift:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) - (match_operand:QI 3 "const_0_to_255_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 3 "const_0_to_255_operand")) 0)))] "TARGET_64BIT - && INTVAL (operands[3]) == 64 - INTVAL (operands[2])" - "shrd{q}\t{%2, %1, %0|%0, %1, %2}" + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && <nf_condition>" + "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") @@ -15944,7 +16059,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_64_shrd_ndd_1" +(define_insn "x86_64_shrd_ndd_1<nf_name>" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_63_operand")) @@ -15952,15 +16067,66 @@ (ashift:TI (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) - (match_operand:QI 4 "const_0_to_255_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 4 "const_0_to_255_operand")) 0)))] "TARGET_APX_NDD - && INTVAL (operands[4]) == 64 - INTVAL (operands[3])" - "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + && INTVAL (operands[4]) == 64 - INTVAL (operands[3]) + && <nf_condition>" + "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") (set_attr "mode" "DI")]) +(define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf" + [(set (match_operand:DI 0 "nonimmediate_operand") + (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_63_operand")) + (ashift:DI + (match_operand:DI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_63_operand"))))] + "TARGET_64BIT && TARGET_APX_NF + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (DImode, operands[4]); + emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[4], operands[3], operands[2])); + } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (DImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else + { + operands[1] = force_reg (DImode, operands[1]); + rtx tmp = gen_reg_rtx (DImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_64_shrd_1_nf (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) (define_insn_and_split "*x86_64_shrd_shld_1_nozext" [(set (match_operand:DI 0 "nonimmediate_operand") @@ -16063,7 +16229,7 @@ emit_move_insn (operands[4], operands[0]); }) -(define_insn "x86_shrd" +(define_insn "x86_shrd<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (lshiftrt:SI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic") @@ -16073,10 +16239,9 @@ (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (minus:QI (const_int 32) - (and:QI (match_dup 2) (const_int 31)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "" - "shrd{l}\t{%2, %1, %0|%0, %1, %2}" + (and:QI (match_dup 2) (const_int 31)))) 0)))] + "<nf_condition>" + "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "SI") @@ -16085,7 +16250,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_shrd_ndd" +(define_insn "x86_shrd_ndd<nf_name>" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") @@ -16095,14 +16260,13 @@ (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) (minus:QI (const_int 32) - (and:QI (match_dup 3) (const_int 31)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_APX_NDD" - "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (and:QI (match_dup 3) (const_int 31)))) 0)))] + "TARGET_APX_NDD && <nf_condition>" + "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) -(define_insn "x86_shrd_1" +(define_insn "x86_shrd_1<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (lshiftrt:SI (match_dup 0) (match_operand:QI 2 "const_0_to_31_operand")) @@ -16110,10 +16274,10 @@ (ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) - (match_operand:QI 3 "const_0_to_63_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] - "INTVAL (operands[3]) == 32 - INTVAL (operands[2])" - "shrd{l}\t{%2, %1, %0|%0, %1, %2}" + (match_operand:QI 3 "const_0_to_63_operand")) 0)))] + "INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && <nf_condition>" + "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") @@ -16123,7 +16287,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_shrd_ndd_1" +(define_insn "x86_shrd_ndd_1<nf_name>" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_31_operand")) @@ -16131,15 +16295,66 @@ (ashift:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) - (match_operand:QI 4 "const_0_to_63_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 4 "const_0_to_63_operand")) 0)))] "TARGET_APX_NDD - && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))" - "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + && (INTVAL (operands[4]) == 32 - INTVAL (operands[3])) + && <nf_condition>" + "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") (set_attr "mode" "SI")]) +(define_insn_and_split "*x86_shrd_shld_1_nozext_nf" + [(set (match_operand:SI 0 "nonimmediate_operand") + (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_31_operand")) + (ashift:SI + (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_31_operand"))))] + "TARGET_APX_NF && + INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shrd_1_nf (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (SImode, operands[4]); + emit_insn (gen_x86_shld_1_nf (operands[0], operands[4], operands[3], operands[2])); + } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (SImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else + { + operands[1] = force_reg (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_shrd_1_nf (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) (define_insn_and_split "*x86_shrd_shld_1_nozext" [(set (match_operand:SI 0 "nonimmediate_operand") -- 2.31.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20240529023704.3728291-7-lingling.kong@intel.com>]
* [PATCH v3 7/8] [APX NF] Support APX NF for mul/div [not found] ` <20240529023704.3728291-7-lingling.kong@intel.com> @ 2024-05-29 5:11 ` Kong, Lingling 0 siblings, 0 replies; 9+ messages in thread From: Kong, Lingling @ 2024-05-29 5:11 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak gcc/ChangeLog: * config/i386/i386.md (*mul<mode>3_1_nf): New define_insn. (*mulqi3_1_nf): Ditto. (*<u>divmod<mode>4_noext_nf): Ditto. (<u>divmodhiqi3_nf): Ditto. --- gcc/config/i386/i386.md | 47 ++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 719cce7d3ef..e688e92785e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -9898,17 +9898,17 @@ ;; ;; On BDVER1, all HI MULs use DoublePath -(define_insn "*mul<mode>3_1" +(define_insn "*mul<mode>3_1<nf_name>" [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r") (mult:SWIM248 (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0") - (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r"))) - (clobber (reg:CC FLAGS_REG))] - "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))] + "!(MEM_P (operands[1]) && MEM_P (operands[2])) + && <nf_condition>" "@ - imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} - imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} - imul{<imodesuffix>}\t{%2, %0|%0, %2}" + <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>imul{<imodesuffix>}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,0,1") (set (attr "athlon_decode") @@ -9969,14 +9969,14 @@ ;; MUL reg8 Direct ;; MUL mem8 Direct -(define_insn "*mulqi3_1" +(define_insn "*mulqi3_1<nf_name>" [(set (match_operand:QI 0 "register_operand" "=a") (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0") - (match_operand:QI 2 "nonimmediate_operand" "qm"))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 2 "nonimmediate_operand" "qm")))] "TARGET_QIMODE_MATH - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "mul{b}\t%2" + && !(MEM_P (operands[1]) && MEM_P (operands[2])) + && <nf_condition>" + "<nf_prefix>mul{b}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") @@ -11119,6 +11119,19 @@ [(set_attr "type" "multi") (set_attr "mode" "SI")]) +(define_insn "*<u>divmod<mode>4_noext_nf" + [(set (match_operand:SWIM248 0 "register_operand" "=a") + (any_div:SWIM248 + (match_operand:SWIM248 2 "register_operand" "0") + (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWIM248 1 "register_operand" "=d") + (<paired_mod>:SWIM248 (match_dup 2) (match_dup 3))) + (use (match_operand:SWIM248 4 "register_operand" "1"))] + "TARGET_APX_NF" + "%{nf%} <sgnprefix>div{<imodesuffix>}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "<MODE>")]) + (define_insn "*<u>divmod<mode>4_noext" [(set (match_operand:SWIM248 0 "register_operand" "=a") (any_div:SWIM248 @@ -11266,7 +11279,7 @@ ;; Change div/mod to HImode and extend the second argument to HImode ;; so that mode of div/mod matches with mode of arguments. Otherwise ;; combine may fail. -(define_insn "<u>divmodhiqi3" +(define_insn "<u>divmodhiqi3<nf_name>" [(set (match_operand:HI 0 "register_operand" "=a") (ior:HI (ashift:HI @@ -11278,10 +11291,10 @@ (const_int 8)) (zero_extend:HI (truncate:QI - (div:HI (match_dup 1) (any_extend:HI (match_dup 2))))))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_QIMODE_MATH" - "<sgnprefix>div{b}\t%2" + (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))] + "TARGET_QIMODE_MATH + && <nf_condition>" + "<nf_prefix><sgnprefix>div{b}\t%2" [(set_attr "type" "idiv") (set_attr "mode" "QI")]) -- 2.31.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20240529023704.3728291-8-lingling.kong@intel.com>]
* [PATCH v3 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt [not found] ` <20240529023704.3728291-8-lingling.kong@intel.com> @ 2024-05-29 5:11 ` Kong, Lingling 0 siblings, 0 replies; 9+ messages in thread From: Kong, Lingling @ 2024-05-29 5:11 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak gcc/ChangeLog: * config/i386/i386.md (clz<mode>2_lzcnt_nf): New define_insn. (*clz<mode>2_lzcnt_falsedep_nf): Ditto. (<lt_zcnt>_<mode>_nf): Ditto. (*<lt_zcnt>_<mode>_falsedep_nf): Ditto. (<lt_zcnt>_hi_nf): Ditto. (popcount<mode>2_nf): Ditto. (*popcount<mode>2_falsedep_nf): Ditto. (popcounthi2_nf): Ditto. --- gcc/config/i386/i386.md | 124 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 113 insertions(+), 11 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e688e92785e..b0eb497cd23 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -20269,6 +20269,24 @@ operands[3] = gen_reg_rtx (<MODE>mode); }) +(define_insn_and_split "clz<mode>2_lzcnt_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] + "TARGET_APX_NF && TARGET_LZCNT" + "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (clz:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] + "ix86_expand_clear (operands[0]);" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "clz<mode>2_lzcnt" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 @@ -20292,6 +20310,18 @@ ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. +(define_insn "*clz<mode>2_lzcnt_falsedep_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP)] + "TARGET_APX_NF && TARGET_LZCNT" + "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn "*clz<mode>2_lzcnt_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 @@ -20398,6 +20428,25 @@ ;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version ;; provides operand size as output when source operand is zero. +(define_insn_and_split "<lt_zcnt>_<mode>_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))] + "TARGET_APX_NF" + "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (unspec:SWI48 [(match_dup 1)] LT_ZCNT)) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] + "ix86_expand_clear (operands[0]);" + [(set_attr "type" "<lt_zcnt_type>") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "<lt_zcnt>_<mode>" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 @@ -20422,6 +20471,19 @@ ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. +(define_insn "*<lt_zcnt>_<mode>_falsedep_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT)) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP)] + "TARGET_APX_NF" + "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "<lt_zcnt_type>") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + (define_insn "*<lt_zcnt>_<mode>_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 @@ -20436,13 +20498,12 @@ (set_attr "prefix_rep" "1") (set_attr "mode" "<MODE>")]) -(define_insn "<lt_zcnt>_hi" +(define_insn "<lt_zcnt>_hi<nf_name>" [(set (match_operand:HI 0 "register_operand" "=r") (unspec:HI - [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT)) - (clobber (reg:CC FLAGS_REG))] - "" - "<lt_zcnt>{w}\t{%1, %0|%0, %1}" + [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))] + "<nf_condition>" + "<nf_prefix><lt_zcnt>{w}\t{%1, %0|%0, %1}" [(set_attr "type" "<lt_zcnt_type>") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") @@ -20860,6 +20921,30 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "popcount<mode>2_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] + "TARGET_APX_NF && TARGET_POPCNT" +{ +#if TARGET_MACHO + return "%{nf%} popcnt\t{%1, %0|%0, %1}"; +#else + return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; +#endif +} + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (popcount:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] + "ix86_expand_clear (operands[0]);" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "popcount<mode>2" [(set (match_operand:SWI48 0 "register_operand" "=r") (popcount:SWI48 @@ -20889,6 +20974,24 @@ ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. +(define_insn "*popcount<mode>2_falsedep_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP)] + "TARGET_APX_NF && TARGET_POPCNT" +{ +#if TARGET_MACHO + return "%{nf%} popcnt\t{%1, %0|%0, %1}"; +#else + return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn "*popcount<mode>2_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (popcount:SWI48 @@ -21046,17 +21149,16 @@ DONE; }) -(define_insn "popcounthi2" +(define_insn "popcounthi2<nf_name>" [(set (match_operand:HI 0 "register_operand" "=r") (popcount:HI - (match_operand:HI 1 "nonimmediate_operand" "rm"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_POPCNT" + (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "TARGET_POPCNT && <nf_condition>" { #if TARGET_MACHO - return "popcnt\t{%1, %0|%0, %1}"; + return "<nf_prefix>popcnt\t{%1, %0|%0, %1}"; #else - return "popcnt{w}\t{%1, %0|%0, %1}"; + return "<nf_prefix>popcnt{w}\t{%1, %0|%0, %1}"; #endif } [(set_attr "prefix_rep" "1") -- 2.31.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2024-06-03 1:09 UTC | newest] Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- [not found] <20240529023704.3728291-1-lingling.kong@intel.com> 2024-05-29 5:10 ` [PATCH v3 1/8] [APX NF]: Support APX NF add Kong, Lingling 2024-06-03 1:08 ` Hongtao Liu [not found] ` <20240529023704.3728291-2-lingling.kong@intel.com> 2024-05-29 5:10 ` [PATCH v3 2/8] [APX NF] Support APX NF for {sub/and/or/xor/neg} Kong, Lingling [not found] ` <20240529023704.3728291-3-lingling.kong@intel.com> 2024-05-29 5:10 ` [PATCH v3 3/8] [APX NF] Support APX NF for left shift insns Kong, Lingling [not found] ` <20240529023704.3728291-4-lingling.kong@intel.com> 2024-05-29 5:10 ` [PATCH v3 4/8] [APX NF] Support APX NF for right " Kong, Lingling [not found] ` <20240529023704.3728291-5-lingling.kong@intel.com> 2024-05-29 5:10 ` [PATCH v3 5/8] [APX NF] Support APX NF for rotate insns Kong, Lingling [not found] ` <20240529023704.3728291-6-lingling.kong@intel.com> 2024-05-29 5:10 ` [PATCH v3 6/8] [APX NF] Support APX NF for shld/shrd Kong, Lingling [not found] ` <20240529023704.3728291-7-lingling.kong@intel.com> 2024-05-29 5:11 ` [PATCH v3 7/8] [APX NF] Support APX NF for mul/div Kong, Lingling [not found] ` <20240529023704.3728291-8-lingling.kong@intel.com> 2024-05-29 5:11 ` [PATCH v3 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt Kong, Lingling
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).