* [PATCH v2 3/8] [APX NF] Support APX NF for left shift insns [not found] ` <20240522073710.2039035-3-lingling.kong@intel.com> @ 2024-05-22 8:41 ` Kong, Lingling 0 siblings, 0 replies; 6+ messages in thread From: Kong, Lingling @ 2024-05-22 8:41 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak gcc/ChangeLog: * config/i386/i386.md (*ashl<mode>3_1_nf): New. (*ashlhi3_1_nf): Ditto. (*ashlqi3_1_nf): Ditto. * config/i386/sse.md: New define_split. --- gcc/config/i386/i386.md | 80 +++++++++++++++++++++++++++-------------- gcc/config/i386/sse.md | 13 +++++++ 2 files changed, 67 insertions(+), 26 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 099d7f35c8f..271d449d7c4 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -15012,12 +15012,12 @@ [(set_attr "type" "ishiftx") (set_attr "mode" "<MODE>")]) -(define_insn "*ashl<mode>3_1" +(define_insn "*ashl<mode>3_1<nf_name>" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm") - (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))] + "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) @@ -15030,7 +15030,7 @@ case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); gcc_assert (rtx_equal_p (operands[0], operands[1])); - return "add{<imodesuffix>}\t%0, %0"; + return "<nf_prefix>add{<imodesuffix>}\t%0, %0"; default: if (operands[2] == const1_rtx @@ -15038,11 +15038,11 @@ /* For NDD form instructions related to TARGET_SHIFT1, the $1 immediate do not need to be omitted as assembler will map it to use shorter encoding. */ - && !use_ndd) + && !use_ndd && !<nf_applied>) return "sal{<imodesuffix>}\t%0"; else - return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "sal{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>sal{<imodesuffix>}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd") @@ -15073,6 +15073,17 @@ (set_attr "mode" "<MODE>")]) ;; Convert shift to the shiftx pattern to avoid flags dependency. +;; For NF/NDD doesn't support shift count as r, it just support c<S>, +;; but it has no flag. +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:QI 2 "register_operand")))] + "TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (ashift:SWI48 (match_dup 1) (match_dup 2)))] + "operands[2] = gen_lowpart (<MODE>mode, operands[2]);") + (define_split [(set (match_operand:SWI48 0 "register_operand") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") @@ -15159,12 +15170,12 @@ (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] "operands[2] = gen_lowpart (SImode, operands[2]);") -(define_insn "*ashlhi3_1" +(define_insn "*ashlhi3_1<nf_name>" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r") (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm") - (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))] + "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) @@ -15175,16 +15186,16 @@ case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); - return "add{w}\t%0, %0"; + return "<nf_prefix>add{w}\t%0, %0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "sal{w}\t%0"; else - return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}" - : "sal{w}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>sal{w}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>sal{w}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,avx512f,apx_ndd") @@ -15212,12 +15223,12 @@ (const_string "*"))) (set_attr "mode" "HI,SI,HI,HI")]) -(define_insn "*ashlqi3_1" +(define_insn "*ashlqi3_1<nf_name>" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r") (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm") - (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))] + "ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) @@ -15229,14 +15240,14 @@ case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1]))) - return "add{l}\t%k0, %k0"; + return "<nf_prefix>add{l}\t%k0, %k0"; else - return "add{b}\t%0, %0"; + return "<nf_prefix>add{b}\t%0, %0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) { if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t%k0"; @@ -15246,10 +15257,10 @@ else { if (get_attr_mode (insn) == MODE_SI) - return "sal{l}\t{%2, %k0|%k0, %2}"; + return "<nf_prefix>sal{l}\t{%2, %k0|%k0, %2}"; else - return use_ndd ? "sal{b}\t{%2, %1, %0|%0, %1, %2}" - : "sal{b}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>sal{b}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>sal{b}\t{%2, %0|%0, %2}"; } } } @@ -15334,6 +15345,23 @@ (set_attr "mode" "<MODE>")]) ;; Convert ashift to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:SWI 0 "general_reg_operand") + (ashift:SWI (match_operand:SWI 1 "index_reg_operand") + (match_operand 2 "const_0_to_3_operand")))] + "reload_completed + && REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_dup 0) + (mult:<LEAMODE> (match_dup 1) (match_dup 2)))] { + if (<MODE>mode != <LEAMODE>mode) + { + operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]); + operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]); + } + operands[2] = GEN_INT (1 << INTVAL (operands[2])); +}) + (define_split [(set (match_operand:SWI 0 "general_reg_operand") (ashift:SWI (match_operand:SWI 1 "index_reg_operand") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 72d4556f47d..498ca5e4d1b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2169,6 +2169,19 @@ (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) +(define_split + [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand") + (any_lshift:SWI1248_AVX512BW + (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand") + (match_operand 2 "const_int_operand")))] + "TARGET_AVX512F && reload_completed" + [(parallel + [(set (match_dup 0) + (any_lshift:SWI1248_AVX512BW + (match_dup 1) + (match_dup 2))) + (unspec [(const_int 0)] UNSPEC_MASKOP)])]) + (define_split [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand") (any_lshift:SWI1248_AVX512BW -- 2.31.1 ^ permalink raw reply [flat|nested] 6+ messages in thread
[parent not found: <20240522073710.2039035-4-lingling.kong@intel.com>]
* [PATCH v2 4/8] [APX NF] Support APX NF for right shift insns [not found] ` <20240522073710.2039035-4-lingling.kong@intel.com> @ 2024-05-22 8:41 ` Kong, Lingling 0 siblings, 0 replies; 6+ messages in thread From: Kong, Lingling @ 2024-05-22 8:41 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Uros Bizjak, Kong, Lingling gcc/ChangeLog: * config/i386/i386.md (*ashr<mode>3_1_nf): New. (*lshr<mode>3_1_nf): Ditto. (*lshrqi3_1_nf): Ditto. (*lshrhi3_1_nf): Ditto. --- gcc/config/i386/i386.md | 82 +++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 271d449d7c4..7f191749342 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16308,13 +16308,13 @@ [(set_attr "type" "ishiftx") (set_attr "mode" "<MODE>")]) -(define_insn "*ashr<mode>3_1" +(define_insn "*ashr<mode>3_1<nf_name>" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") (ashiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm") - (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))] + "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) @@ -16325,11 +16325,11 @@ default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "sar{<imodesuffix>}\t%0"; else - return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "sar{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,bmi2,apx_ndd") @@ -16369,14 +16369,13 @@ } [(set_attr "isa" "*,*,*,apx_ndd")]) - -(define_insn "*lshr<mode>3_1" +(define_insn "*lshr<mode>3_1<nf_name>" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r") (lshiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm") - (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))] + "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) @@ -16388,11 +16387,11 @@ default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "shr{<imodesuffix>}\t%0"; else - return use_ndd ? "shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "shr{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>shr{<imodesuffix>}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,bmi2,avx512bw,apx_ndd") @@ -16408,6 +16407,17 @@ (set_attr "mode" "<MODE>")]) ;; Convert shift to the shiftx pattern to avoid flags dependency. +;; For NF/NDD doesn't support shift count as r, it just support c<S>, +;; but it has no flag. +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:QI 2 "register_operand")))] + "TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))] + "operands[2] = gen_lowpart (<MODE>mode, operands[2]);") + (define_split [(set (match_operand:SWI48 0 "register_operand") (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") @@ -16476,22 +16486,22 @@ (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))] "operands[2] = gen_lowpart (SImode, operands[2]);") -(define_insn "*ashr<mode>3_1" +(define_insn "*ashr<mode>3_1<nf_name>" [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r") (ashiftrt:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0, rm") - (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))] + "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "sar{<imodesuffix>}\t%0"; else - return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "sar{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}"; } [(set_attr "isa" "*, apx_ndd") (set_attr "type" "ishift") @@ -16504,13 +16514,13 @@ (const_string "*"))) (set_attr "mode" "<MODE>")]) -(define_insn "*lshrqi3_1" +(define_insn "*lshrqi3_1<nf_name>" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k,r") (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0, k, rm") - (match_operand:QI 2 "nonmemory_operand" "cI,Wb,cI"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "cI,Wb,cI")))] + "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) @@ -16518,11 +16528,11 @@ case TYPE_ISHIFT: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "shr{b}\t%0"; else - return use_ndd ? "shr{b}\t{%2, %1, %0|%0, %1, %2}" - : "shr{b}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>shr{b}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>shr{b}\t{%2, %0|%0, %2}"; case TYPE_MSKLOG: return "#"; default: @@ -16541,13 +16551,13 @@ (const_string "*"))) (set_attr "mode" "QI")]) -(define_insn "*lshrhi3_1" +(define_insn "*lshrhi3_1<nf_name>" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r") (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0, k, rm") - (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI")))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) @@ -16555,11 +16565,11 @@ case TYPE_ISHIFT: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "shr{w}\t%0"; else - return use_ndd ? "shr{w}\t{%2, %1, %0|%0, %1, %2}" - : "shr{w}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix>shr{w}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix>shr{w}\t{%2, %0|%0, %2}"; case TYPE_MSKLOG: return "#"; default: -- 2.31.1 ^ permalink raw reply [flat|nested] 6+ messages in thread
[parent not found: <20240522073710.2039035-5-lingling.kong@intel.com>]
* [PATCH v2 5/8] [APX NF] Support APX NF for rotate insns [not found] ` <20240522073710.2039035-5-lingling.kong@intel.com> @ 2024-05-22 8:42 ` Kong, Lingling 0 siblings, 0 replies; 6+ messages in thread From: Kong, Lingling @ 2024-05-22 8:42 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak gcc/ChangeLog: * config/i386/i386.md (ashr<mode>3_cvt_nf): New define_insn. (*<insn><mode>3_1_nf): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-nf.c: Add NF test for rotate insns. --- gcc/config/i386/i386.md | 53 ++++++++++++++++---------- gcc/testsuite/gcc.target/i386/apx-nf.c | 5 +++ 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 7f191749342..731eb12d13a 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16230,19 +16230,19 @@ (define_mode_attr cvt_mnemonic [(SI "{cltd|cdq}") (DI "{cqto|cqo}")]) -(define_insn "ashr<mode>3_cvt" +(define_insn "ashr<mode>3_cvt<nf_name>" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r") (ashiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm") - (match_operand:QI 2 "const_int_operand"))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 2 "const_int_operand")))] "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) - && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)" + && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" "@ <cvt_mnemonic> - sar{<imodesuffix>}\t{%2, %0|%0, %2} - sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + <nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2} + <nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd") (set_attr "type" "imovx,ishift,ishift") (set_attr "prefix_0f" "0,*,*") @@ -17094,13 +17094,13 @@ [(set_attr "type" "rotatex") (set_attr "mode" "<MODE>")]) -(define_insn "*<insn><mode>3_1" +(define_insn "*<insn><mode>3_1<nf_name>" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") (any_rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm") - (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>")))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) @@ -17111,11 +17111,11 @@ default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "<rotate>{<imodesuffix>}\t%0"; else - return use_ndd ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,bmi2,apx_ndd") @@ -17135,6 +17135,19 @@ (set_attr "mode" "<MODE>")]) ;; Convert rotate to the rotatex pattern to avoid flags dependency. +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:QI 2 "const_int_operand")))] + "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)" + [(set (match_dup 0) + (rotatert:SWI48 (match_dup 1) (match_dup 2)))] { + int bitsize = GET_MODE_BITSIZE (<MODE>mode); + + operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize); +}) + (define_split [(set (match_operand:SWI48 0 "register_operand") (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") @@ -17236,22 +17249,22 @@ [(set (match_dup 0) (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]) -(define_insn "*<insn><mode>3_1" +(define_insn "*<insn><mode>3_1<nf_name>" [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r") (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm") - (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>"))) - (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)" + (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD) + && <nf_condition>" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) - && !use_ndd) + && !use_ndd && !<nf_applied>) return "<rotate>{<imodesuffix>}\t%0"; else return use_ndd - ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" - : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; + ? "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; } [(set_attr "isa" "*,apx_ndd") (set_attr "type" "rotate") diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c index 608dbf8f5f7..6e59803be64 100644 --- a/gcc/testsuite/gcc.target/i386/apx-nf.c +++ b/gcc/testsuite/gcc.target/i386/apx-nf.c @@ -3,6 +3,7 @@ /* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */ /* { dg-final { scan-assembler-times "\{nf\} and" 1 } } */ /* { dg-final { scan-assembler-times "\{nf\} or" 1 } } */ +/* { dg-final { scan-assembler-times "\{nf\} rol" 4 } } */ #include "apx-ndd.c" @@ -13,3 +14,7 @@ foo (struct B *b) { b->bit0 = b->bit0 | b->bit1; } +long int f1 (int x) { return ~(1ULL << (x & 0x3f)); } long int f2 (int +x) { return ~(1ULL << x); } long int f3 (unsigned char *x) { return +~(1ULL << (x[0] & 0x3f)); } long int f4 (unsigned char *x) { return +~(1ULL << x[0]); } -- 2.31.1 ^ permalink raw reply [flat|nested] 6+ messages in thread
[parent not found: <20240522073710.2039035-6-lingling.kong@intel.com>]
* [PATCH v2 6/8] [APX NF] Support APX NF for shld/shrd [not found] ` <20240522073710.2039035-6-lingling.kong@intel.com> @ 2024-05-22 8:43 ` Kong, Lingling 0 siblings, 0 replies; 6+ messages in thread From: Kong, Lingling @ 2024-05-22 8:43 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak gcc/ChangeLog: * config/i386/i386.md (x86_64_shld_nf): New define_insn. (x86_64_shld_ndd_nf): Ditto. (x86_64_shld_1_nf): Ditto. (x86_64_shld_ndd_1_nf): Ditto. (*x86_64_shld_shrd_1_nozext_nf): Ditto. (x86_shld_nf): Ditto. (x86_shld_ndd_nf): Ditto. (x86_shld_1_nf): Ditto. (x86_shld_ndd_1_nf): Ditto. (*x86_shld_shrd_1_nozext_nf): Ditto. (<insn><dwi>3_doubleword_lowpart_nf): Ditto. (x86_64_shrd_nf): Ditto. (x86_64_shrd_ndd_nf): Ditto. (x86_64_shrd_1_nf): Ditto. (x86_64_shrd_ndd_1_nf): Ditto. (*x86_64_shrd_shld_1_nozext_nf): Ditto. (x86_shrd_nf): Ditto. (x86_shrd_ndd_nf): Ditto. (x86_shrd_1_nf): Ditto. (x86_shrd_ndd_1_nf): Ditto. (*x86_shrd_shld_1_nozext_nf): Ditto. --- gcc/config/i386/i386.md | 377 +++++++++++++++++++++++++++++++--------- 1 file changed, 296 insertions(+), 81 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 731eb12d13a..4d684e8d919 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14552,7 +14552,7 @@ DONE; }) -(define_insn "x86_64_shld" +(define_insn "x86_64_shld<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (ashift:DI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc") @@ -14562,10 +14562,9 @@ (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) (minus:QI (const_int 64) - (and:QI (match_dup 2) (const_int 63)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "shld{q}\t{%2, %1, %0|%0, %1, %2}" + (and:QI (match_dup 2) (const_int 63)))) 0)))] + "TARGET_64BIT && <nf_condition>" + "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") @@ -14573,7 +14572,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_64_shld_ndd" +(define_insn "x86_64_shld_ndd<nf_name>" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") @@ -14583,14 +14582,13 @@ (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) (minus:QI (const_int 64) - (and:QI (match_dup 3) (const_int 63)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_APX_NDD" - "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (and:QI (match_dup 3) (const_int 63)))) 0)))] + "TARGET_APX_NDD && <nf_condition>" + "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) -(define_insn "x86_64_shld_1" +(define_insn "x86_64_shld_1<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (ashift:DI (match_dup 0) (match_operand:QI 2 "const_0_to_63_operand")) @@ -14598,11 +14596,11 @@ (lshiftrt:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) - (match_operand:QI 3 "const_0_to_255_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 3 "const_0_to_255_operand")) 0)))] "TARGET_64BIT - && INTVAL (operands[3]) == 64 - INTVAL (operands[2])" - "shld{q}\t{%2, %1, %0|%0, %1, %2}" + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && <nf_condition>" + "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") @@ -14611,7 +14609,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_64_shld_ndd_1" +(define_insn "x86_64_shld_ndd_1<nf_name>" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_63_operand")) @@ -14619,15 +14617,66 @@ (lshiftrt:TI (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) - (match_operand:QI 4 "const_0_to_255_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 4 "const_0_to_255_operand")) 0)))] "TARGET_APX_NDD - && INTVAL (operands[4]) == 64 - INTVAL (operands[3])" - "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + && INTVAL (operands[4]) == 64 - INTVAL (operands[3]) + && <nf_condition>" + "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "DI") (set_attr "length_immediate" "1")]) +(define_insn_and_split "*x86_64_shld_shrd_1_nozext_nf" + [(set (match_operand:DI 0 "nonimmediate_operand") + (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_63_operand")) + (lshiftrt:DI + (match_operand:DI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_63_operand"))))] + "TARGET_64BIT && TARGET_APX_NF + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (DImode, operands[4]); + emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[4], operands[3], operands[2])); + } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (DImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else + { + operands[1] = force_reg (DImode, operands[1]); + rtx tmp = gen_reg_rtx (DImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_64_shld_1_nf (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) (define_insn_and_split "*x86_64_shld_shrd_1_nozext" [(set (match_operand:DI 0 "nonimmediate_operand") @@ -14730,7 +14779,7 @@ emit_move_insn (operands[4], operands[0]); }) -(define_insn "x86_shld" +(define_insn "x86_shld<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashift:SI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic") @@ -14740,10 +14789,9 @@ (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (minus:QI (const_int 32) - (and:QI (match_dup 2) (const_int 31)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "" - "shld{l}\t{%2, %1, %0|%0, %1, %2}" + (and:QI (match_dup 2) (const_int 31)))) 0)))] + "<nf_condition>" + "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "SI") @@ -14752,7 +14800,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_shld_ndd" +(define_insn "x86_shld_ndd<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "=r") (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") @@ -14762,15 +14810,14 @@ (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) (minus:QI (const_int 32) - (and:QI (match_dup 3) (const_int 31)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_APX_NDD" - "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (and:QI (match_dup 3) (const_int 31)))) 0)))] + "TARGET_APX_NDD && <nf_condition>" + "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) -(define_insn "x86_shld_1" +(define_insn "x86_shld_1<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashift:SI (match_dup 0) (match_operand:QI 2 "const_0_to_31_operand")) @@ -14778,10 +14825,10 @@ (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) - (match_operand:QI 3 "const_0_to_63_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] - "INTVAL (operands[3]) == 32 - INTVAL (operands[2])" - "shld{l}\t{%2, %1, %0|%0, %1, %2}" + (match_operand:QI 3 "const_0_to_63_operand")) 0)))] + "INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && <nf_condition>" + "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") @@ -14791,7 +14838,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_shld_ndd_1" +(define_insn "x86_shld_ndd_1<nf_name>" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_31_operand")) @@ -14799,15 +14846,66 @@ (lshiftrt:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) - (match_operand:QI 4 "const_0_to_63_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 4 "const_0_to_63_operand")) 0)))] "TARGET_APX_NDD - && INTVAL (operands[4]) == 32 - INTVAL (operands[3])" - "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + && INTVAL (operands[4]) == 32 - INTVAL (operands[3]) + && <nf_condition>" + "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") (set_attr "mode" "SI")]) +(define_insn_and_split "*x86_shld_shrd_1_nozext_nf" + [(set (match_operand:SI 0 "nonimmediate_operand") + (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_31_operand")) + (lshiftrt:SI + (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_31_operand"))))] + "TARGET_APX_NF && + INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shld_1_nf (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (SImode, operands[4]); + emit_insn (gen_x86_shrd_1_nf (operands[0], operands[4], operands[3], operands[2])); + } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (SImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else + { + operands[1] = force_reg (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_shld_1_nf (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) (define_insn_and_split "*x86_shld_shrd_1_nozext" [(set (match_operand:SI 0 "nonimmediate_operand") @@ -15846,6 +15944,26 @@ }) ;; Split truncations of double word right shifts into x86_shrd_1. +(define_insn_and_split "<insn><dwi>3_doubleword_lowpart_nf" + [(set (match_operand:DWIH 0 "register_operand" "=&r") + (subreg:DWIH + (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r") + (match_operand:QI 2 "const_int_operand")) 0))] + "TARGET_APX_NF && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2)) + (subreg:DWIH + (ashift:<DWI> (zero_extend:<DWI> (match_dup 3)) + (match_dup 4)) 0)))] +{ + split_double_mode (<DWI>mode, &operands[1], 1, &operands[1], +&operands[3]); + operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL +(operands[2])); + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + (define_insn_and_split "<insn><dwi>3_doubleword_lowpart" [(set (match_operand:DWIH 0 "register_operand" "=&r") (subreg:DWIH @@ -15869,7 +15987,7 @@ emit_move_insn (operands[0], operands[1]); }) -(define_insn "x86_64_shrd" +(define_insn "x86_64_shrd<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (lshiftrt:DI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc") @@ -15879,10 +15997,9 @@ (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) (minus:QI (const_int 64) - (and:QI (match_dup 2) (const_int 63)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "shrd{q}\t{%2, %1, %0|%0, %1, %2}" + (and:QI (match_dup 2) (const_int 63)))) 0)))] + "TARGET_64BIT && <nf_condition>" + "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") @@ -15890,7 +16007,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_64_shrd_ndd" +(define_insn "x86_64_shrd_ndd<nf_name>" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") @@ -15900,15 +16017,13 @@ (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) (minus:QI (const_int 64) - (and:QI (match_dup 3) (const_int 63)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_APX_NDD" - "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (and:QI (match_dup 3) (const_int 63)))) 0)))] + "TARGET_APX_NDD && <nf_condition>" + "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) - -(define_insn "x86_64_shrd_1" +(define_insn "x86_64_shrd_1<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (lshiftrt:DI (match_dup 0) (match_operand:QI 2 "const_0_to_63_operand")) @@ -15916,11 +16031,11 @@ (ashift:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) - (match_operand:QI 3 "const_0_to_255_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 3 "const_0_to_255_operand")) 0)))] "TARGET_64BIT - && INTVAL (operands[3]) == 64 - INTVAL (operands[2])" - "shrd{q}\t{%2, %1, %0|%0, %1, %2}" + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && <nf_condition>" + "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") @@ -15929,7 +16044,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_64_shrd_ndd_1" +(define_insn "x86_64_shrd_ndd_1<nf_name>" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_63_operand")) @@ -15937,15 +16052,66 @@ (ashift:TI (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) - (match_operand:QI 4 "const_0_to_255_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 4 "const_0_to_255_operand")) 0)))] "TARGET_APX_NDD - && INTVAL (operands[4]) == 64 - INTVAL (operands[3])" - "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + && INTVAL (operands[4]) == 64 - INTVAL (operands[3]) + && <nf_condition>" + "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") (set_attr "mode" "DI")]) +(define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf" + [(set (match_operand:DI 0 "nonimmediate_operand") + (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_63_operand")) + (ashift:DI + (match_operand:DI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_63_operand"))))] + "TARGET_64BIT && TARGET_APX_NF + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (DImode, operands[4]); + emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[4], operands[3], operands[2])); + } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (DImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else + { + operands[1] = force_reg (DImode, operands[1]); + rtx tmp = gen_reg_rtx (DImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_64_shrd_1_nf (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) (define_insn_and_split "*x86_64_shrd_shld_1_nozext" [(set (match_operand:DI 0 "nonimmediate_operand") @@ -16048,7 +16214,7 @@ emit_move_insn (operands[4], operands[0]); }) -(define_insn "x86_shrd" +(define_insn "x86_shrd<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (lshiftrt:SI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic") @@ -16058,10 +16224,9 @@ (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (minus:QI (const_int 32) - (and:QI (match_dup 2) (const_int 31)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "" - "shrd{l}\t{%2, %1, %0|%0, %1, %2}" + (and:QI (match_dup 2) (const_int 31)))) 0)))] + "<nf_condition>" + "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "SI") @@ -16070,7 +16235,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_shrd_ndd" +(define_insn "x86_shrd_ndd<nf_name>" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") @@ -16080,14 +16245,13 @@ (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) (minus:QI (const_int 32) - (and:QI (match_dup 3) (const_int 31)))) 0))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_APX_NDD" - "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + (and:QI (match_dup 3) (const_int 31)))) 0)))] + "TARGET_APX_NDD && <nf_condition>" + "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) -(define_insn "x86_shrd_1" +(define_insn "x86_shrd_1<nf_name>" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (lshiftrt:SI (match_dup 0) (match_operand:QI 2 "const_0_to_31_operand")) @@ -16095,10 +16259,10 @@ (ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) - (match_operand:QI 3 "const_0_to_63_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] - "INTVAL (operands[3]) == 32 - INTVAL (operands[2])" - "shrd{l}\t{%2, %1, %0|%0, %1, %2}" + (match_operand:QI 3 "const_0_to_63_operand")) 0)))] + "INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && <nf_condition>" + "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") @@ -16108,7 +16272,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "x86_shrd_ndd_1" +(define_insn "x86_shrd_ndd_1<nf_name>" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_31_operand")) @@ -16116,15 +16280,66 @@ (ashift:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) - (match_operand:QI 4 "const_0_to_63_operand")) 0))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 4 "const_0_to_63_operand")) 0)))] "TARGET_APX_NDD - && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))" - "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + && (INTVAL (operands[4]) == 32 - INTVAL (operands[3])) + && <nf_condition>" + "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") (set_attr "mode" "SI")]) +(define_insn_and_split "*x86_shrd_shld_1_nozext_nf" + [(set (match_operand:SI 0 "nonimmediate_operand") + (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_31_operand")) + (ashift:SI + (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_31_operand"))))] + "TARGET_APX_NF && + INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shrd_1_nf (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (SImode, operands[4]); + emit_insn (gen_x86_shld_1_nf (operands[0], operands[4], operands[3], operands[2])); + } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (SImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else + { + operands[1] = force_reg (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_shrd_1_nf (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) (define_insn_and_split "*x86_shrd_shld_1_nozext" [(set (match_operand:SI 0 "nonimmediate_operand") -- 2.31.1 ^ permalink raw reply [flat|nested] 6+ messages in thread
[parent not found: <20240522073710.2039035-7-lingling.kong@intel.com>]
* [PATCH v2 7/8] [APX NF] Support APX NF for mul/div [not found] ` <20240522073710.2039035-7-lingling.kong@intel.com> @ 2024-05-22 8:43 ` Kong, Lingling 0 siblings, 0 replies; 6+ messages in thread From: Kong, Lingling @ 2024-05-22 8:43 UTC (permalink / raw) To: gcc-patches; +Cc: Kong, Lingling, Liu, Hongtao, Uros Bizjak gcc/ChangeLog: * config/i386/i386.md (*mul<mode>3_1_nf): New define_insn. (*mulqi3_1_nf): Ditto. (*<u>divmod<mode>4_noext_nf): Ditto. (<u>divmodhiqi3_nf): Ditto. --- gcc/config/i386/i386.md | 47 ++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4d684e8d919..087761e5b3a 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -9896,17 +9896,17 @@ ;; ;; On BDVER1, all HI MULs use DoublePath -(define_insn "*mul<mode>3_1" +(define_insn "*mul<mode>3_1<nf_name>" [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r") (mult:SWIM248 (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0") - (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r"))) - (clobber (reg:CC FLAGS_REG))] - "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))] + "!(MEM_P (operands[1]) && MEM_P (operands[2])) + && <nf_condition>" "@ - imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} - imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} - imul{<imodesuffix>}\t{%2, %0|%0, %2}" + <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + <nf_prefix>imul{<imodesuffix>}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,0,1") (set (attr "athlon_decode") @@ -9967,14 +9967,14 @@ ;; MUL reg8 Direct ;; MUL mem8 Direct -(define_insn "*mulqi3_1" +(define_insn "*mulqi3_1<nf_name>" [(set (match_operand:QI 0 "register_operand" "=a") (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0") - (match_operand:QI 2 "nonimmediate_operand" "qm"))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:QI 2 "nonimmediate_operand" "qm")))] "TARGET_QIMODE_MATH - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "mul{b}\t%2" + && !(MEM_P (operands[1]) && MEM_P (operands[2])) + && <nf_condition>" + "<nf_prefix>mul{b}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") @@ -11117,6 +11117,19 @@ [(set_attr "type" "multi") (set_attr "mode" "SI")]) +(define_insn "*<u>divmod<mode>4_noext_nf" + [(set (match_operand:SWIM248 0 "register_operand" "=a") + (any_div:SWIM248 + (match_operand:SWIM248 2 "register_operand" "0") + (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWIM248 1 "register_operand" "=d") + (<paired_mod>:SWIM248 (match_dup 2) (match_dup 3))) + (use (match_operand:SWIM248 4 "register_operand" "1"))] + "TARGET_APX_NF" + "%{nf%} <sgnprefix>div{<imodesuffix>}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "<MODE>")]) + (define_insn "*<u>divmod<mode>4_noext" [(set (match_operand:SWIM248 0 "register_operand" "=a") (any_div:SWIM248 @@ -11264,7 +11277,7 @@ ;; Change div/mod to HImode and extend the second argument to HImode ;; so that mode of div/mod matches with mode of arguments. Otherwise ;; combine may fail. -(define_insn "<u>divmodhiqi3" +(define_insn "<u>divmodhiqi3<nf_name>" [(set (match_operand:HI 0 "register_operand" "=a") (ior:HI (ashift:HI @@ -11276,10 +11289,10 @@ (const_int 8)) (zero_extend:HI (truncate:QI - (div:HI (match_dup 1) (any_extend:HI (match_dup 2))))))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_QIMODE_MATH" - "<sgnprefix>div{b}\t%2" + (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))] + "TARGET_QIMODE_MATH + && <nf_condition>" + "<nf_prefix><sgnprefix>div{b}\t%2" [(set_attr "type" "idiv") (set_attr "mode" "QI")]) -- 2.31.1 ^ permalink raw reply [flat|nested] 6+ messages in thread
[parent not found: <20240522073710.2039035-8-lingling.kong@intel.com>]
* [PATCH v2 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt [not found] ` <20240522073710.2039035-8-lingling.kong@intel.com> @ 2024-05-22 8:44 ` Kong, Lingling 0 siblings, 0 replies; 6+ messages in thread From: Kong, Lingling @ 2024-05-22 8:44 UTC (permalink / raw) To: gcc-patches; +Cc: Liu, Hongtao, Kong, Lingling, Uros Bizjak gcc/ChangeLog: * config/i386/i386.md (clz<mode>2_lzcnt_nf): New define_insn. (*clz<mode>2_lzcnt_falsedep_nf): Ditto. (<lt_zcnt>_<mode>_nf): Ditto. (*<lt_zcnt>_<mode>_falsedep_nf): Ditto. (<lt_zcnt>_hi_nf): Ditto. (popcount<mode>2_nf): Ditto. (*popcount<mode>2_falsedep_nf): Ditto. (popcounthi2_nf): Ditto. --- gcc/config/i386/i386.md | 124 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 113 insertions(+), 11 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 087761e5b3a..c9a3a99ca70 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -20250,6 +20250,24 @@ operands[3] = gen_reg_rtx (<MODE>mode); }) +(define_insn_and_split "clz<mode>2_lzcnt_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] + "TARGET_APX_NF && TARGET_LZCNT" + "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (clz:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] + "ix86_expand_clear (operands[0]);" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "clz<mode>2_lzcnt" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 @@ -20273,6 +20291,18 @@ ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. +(define_insn "*clz<mode>2_lzcnt_falsedep_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP)] + "TARGET_APX_NF && TARGET_LZCNT" + "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn "*clz<mode>2_lzcnt_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 @@ -20379,6 +20409,25 @@ ;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version ;; provides operand size as output when source operand is zero. +(define_insn_and_split "<lt_zcnt>_<mode>_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))] + "TARGET_APX_NF" + "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (unspec:SWI48 [(match_dup 1)] LT_ZCNT)) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] + "ix86_expand_clear (operands[0]);" + [(set_attr "type" "<lt_zcnt_type>") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "<lt_zcnt>_<mode>" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 @@ -20403,6 +20452,19 @@ ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. +(define_insn "*<lt_zcnt>_<mode>_falsedep_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT)) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP)] + "TARGET_APX_NF" + "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "<lt_zcnt_type>") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + (define_insn "*<lt_zcnt>_<mode>_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 @@ -20417,13 +20479,12 @@ (set_attr "prefix_rep" "1") (set_attr "mode" "<MODE>")]) -(define_insn "<lt_zcnt>_hi" +(define_insn "<lt_zcnt>_hi<nf_name>" [(set (match_operand:HI 0 "register_operand" "=r") (unspec:HI - [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT)) - (clobber (reg:CC FLAGS_REG))] - "" - "<lt_zcnt>{w}\t{%1, %0|%0, %1}" + [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))] + "<nf_condition>" + "<nf_prefix><lt_zcnt>{w}\t{%1, %0|%0, %1}" [(set_attr "type" "<lt_zcnt_type>") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") @@ -20841,6 +20902,30 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "popcount<mode>2_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] + "TARGET_APX_NF && TARGET_POPCNT" +{ +#if TARGET_MACHO + return "%{nf%} popcnt\t{%1, %0|%0, %1}"; #else + return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; #endif } + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (popcount:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] + "ix86_expand_clear (operands[0]);" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "popcount<mode>2" [(set (match_operand:SWI48 0 "register_operand" "=r") (popcount:SWI48 @@ -20870,6 +20955,24 @@ ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. +(define_insn "*popcount<mode>2_falsedep_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP)] + "TARGET_APX_NF && TARGET_POPCNT" +{ +#if TARGET_MACHO + return "%{nf%} popcnt\t{%1, %0|%0, %1}"; #else + return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; #endif } + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn "*popcount<mode>2_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (popcount:SWI48 @@ -21027,17 +21130,16 @@ DONE; }) -(define_insn "popcounthi2" +(define_insn "popcounthi2<nf_name>" [(set (match_operand:HI 0 "register_operand" "=r") (popcount:HI - (match_operand:HI 1 "nonimmediate_operand" "rm"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_POPCNT" + (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "TARGET_POPCNT && <nf_condition>" { #if TARGET_MACHO - return "popcnt\t{%1, %0|%0, %1}"; + return "<nf_prefix>popcnt\t{%1, %0|%0, %1}"; #else - return "popcnt{w}\t{%1, %0|%0, %1}"; + return "<nf_prefix>popcnt{w}\t{%1, %0|%0, %1}"; #endif } [(set_attr "prefix_rep" "1") -- 2.31.1 ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-05-22 8:44 UTC | newest] Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- [not found] <20240522073710.2039035-1-lingling.kong@intel.com> [not found] ` <20240522073710.2039035-3-lingling.kong@intel.com> 2024-05-22 8:41 ` [PATCH v2 3/8] [APX NF] Support APX NF for left shift insns Kong, Lingling [not found] ` <20240522073710.2039035-4-lingling.kong@intel.com> 2024-05-22 8:41 ` [PATCH v2 4/8] [APX NF] Support APX NF for right " Kong, Lingling [not found] ` <20240522073710.2039035-5-lingling.kong@intel.com> 2024-05-22 8:42 ` [PATCH v2 5/8] [APX NF] Support APX NF for rotate insns Kong, Lingling [not found] ` <20240522073710.2039035-6-lingling.kong@intel.com> 2024-05-22 8:43 ` [PATCH v2 6/8] [APX NF] Support APX NF for shld/shrd Kong, Lingling [not found] ` <20240522073710.2039035-7-lingling.kong@intel.com> 2024-05-22 8:43 ` [PATCH v2 7/8] [APX NF] Support APX NF for mul/div Kong, Lingling [not found] ` <20240522073710.2039035-8-lingling.kong@intel.com> 2024-05-22 8:44 ` [PATCH v2 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt Kong, Lingling
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).