From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1363) id 7694C385B83D; Mon, 29 Nov 2021 21:17:37 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 7694C385B83D MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Uros Bizjak To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-5584] i386: Fix and improve movhi_internal and movhf_internal some more. X-Act-Checkin: gcc X-Git-Author: Uros Bizjak X-Git-Refname: refs/heads/master X-Git-Oldrev: f81c5a86dc03352682ac8ae3daa6ec507528a265 X-Git-Newrev: ca5667e867252db3c8642ee90f55427149cd92b6 Message-Id: <20211129211737.7694C385B83D@sourceware.org> Date: Mon, 29 Nov 2021 21:17:37 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 29 Nov 2021 21:17:37 -0000 https://gcc.gnu.org/g:ca5667e867252db3c8642ee90f55427149cd92b6 commit r12-5584-gca5667e867252db3c8642ee90f55427149cd92b6 Author: Uros Bizjak Date: Mon Nov 29 22:16:12 2021 +0100 i386: Fix and improve movhi_internal and movhf_internal some more. An (*v,C) alternative can be added to movhi_internal to directly load HImode constant 0 to xmm register. Also, V4SFmode moves can be used for xmm->xmm moves instead of TImode moves when optimizing for size. Fix invalid %vpinsrw insn template, which needs to duplicate %xmm register for AVX targets. Optimize GPR moves in movhf_internal in the same way as in movhi_internal. Fix pinsrw and pextrw templates for AVX targets. Use sselog1 instead of sselog type. Also, handle TARGET_SSE_PARTIAL_REG_DEPENDENCY and TARGET_SSE_SPLIT_REGS targets. 2021-11-29 Uroš Bizjak gcc/ChangeLog: PR target/102811 * config/i386/i386.md (*movhi_internal): Introduce (*v,C) alternative. Do not allocate non-GPR registers. Optimize xmm->xmm moves when optimizing for size. Fix vpinsrw insn template. (*movhf_internal): Fix pinsrw and pextrw insn templates for AVX targets. Use sselog1 type instead of sselog. Optimize GPR moves. Optimize xmm->xmm moves for TARGET_SSE_PARTIAL_REG_DEPENDENCY and TARGET_SSE_SPLIT_REGS targets. Diff: --- gcc/config/i386/i386.md | 211 +++++++++++++++++++++++++++++------------------- 1 file changed, 128 insertions(+), 83 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a384dae23e2..c88374c9d2b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2495,11 +2495,12 @@ (symbol_ref "true")))]) (define_insn "*movhi_internal" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k,?r,?v,*v,*v,*m") - (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,*r,*km,*k,*k,CBC,v, r, v, m, v"))] + [(set (match_operand:HI 0 "nonimmediate_operand" + "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*v,*v,*v,m") + (match_operand:HI 1 "general_operand" + "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*v"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_hardreg_mov_ok (operands[0], operands[1])" - { switch (get_attr_type (insn)) { @@ -2526,10 +2527,13 @@ return ix86_output_ssemov (insn, operands); case TYPE_SSELOG1: + if (satisfies_constraint_C (operands[1])) + return standard_sse_constant_opcode (insn, operands); + if (SSE_REG_P (operands[0])) return MEM_P (operands[1]) - ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}" - : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}"; + ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" + : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; else return MEM_P (operands[0]) ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" @@ -2550,23 +2554,25 @@ } } [(set (attr "isa") - (cond [(eq_attr "alternative" "9,10,11,12") + (cond [(eq_attr "alternative" "9,10,11,12,13") (const_string "sse2") - (eq_attr "alternative" "13") + (eq_attr "alternative" "14") (const_string "sse4") ] (const_string "*"))) (set (attr "type") - (cond [(eq_attr "alternative" "9,10,12,13") + (cond [(eq_attr "alternative" "4,5,6,7") + (const_string "mskmov") + (eq_attr "alternative" "8") + (const_string "msklog") + (eq_attr "alternative" "9,10,13,14") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "ssemov") (const_string "sselog1")) - (eq_attr "alternative" "4,5,6,7") - (const_string "mskmov") (eq_attr "alternative" "11") + (const_string "sselog1") + (eq_attr "alternative" "12") (const_string "ssemov") - (eq_attr "alternative" "8") - (const_string "msklog") (match_test "optimize_function_for_size_p (cfun)") (const_string "imov") (and (eq_attr "alternative" "0") @@ -2581,33 +2587,54 @@ (const_string "imovx") ] (const_string "imov"))) - (set (attr "prefix") - (cond [(eq_attr "alternative" "9,10,11,12,13") - (const_string "maybe_evex") - (eq_attr "alternative" "4,5,6,7,8") - (const_string "vex") - ] - (const_string "orig"))) - (set (attr "mode") - (cond [(eq_attr "type" "imovx") - (const_string "SI") - (eq_attr "alternative" "9,10,12,13") - (if_then_else (match_test "TARGET_AVX512FP16") - (const_string "HI") - (const_string "TI")) - (eq_attr "alternative" "11") - (if_then_else (match_test "TARGET_AVX512FP16") - (const_string "HF") - (const_string "SF")) - (and (eq_attr "alternative" "1,2") - (match_operand:HI 1 "aligned_operand")) - (const_string "SI") - (and (eq_attr "alternative" "0") - (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) - (not (match_test "TARGET_HIMODE_MATH")))) - (const_string "SI") + (set (attr "prefix") + (cond [(eq_attr "alternative" "4,5,6,7,8") + (const_string "vex") + (eq_attr "alternative" "9,10,11,12,13,14") + (const_string "maybe_evex") + ] + (const_string "orig"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "9,10,13,14") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "TI")) + (eq_attr "alternative" "11") + (cond [(match_test "TARGET_AVX") + (const_string "TI") + (ior (not (match_test "TARGET_SSE2")) + (match_test "optimize_function_for_size_p (cfun)")) + (const_string "V4SF") + ] + (const_string "TI")) + (eq_attr "alternative" "12") + (cond [(match_test "TARGET_AVX512FP16") + (const_string "HI") + (match_test "TARGET_AVX") + (const_string "TI") + (ior (not (match_test "TARGET_SSE2")) + (match_test "optimize_function_for_size_p (cfun)")) + (const_string "V4SF") + ] + (const_string "TI")) + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand")) + (const_string "SI") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "SI") ] - (const_string "HI")))]) + (const_string "HI"))) + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "9") + (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") + (eq_attr "alternative" "10") + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") + ] + (symbol_ref "true")))]) ;; Situation is quite tricky about when to choose full sized (SImode) move ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for @@ -3774,92 +3801,110 @@ (define_insn "*movhf_internal" [(set (match_operand:HF 0 "nonimmediate_operand" - "=?r,?m,v,v,?r,m,?v,v") + "=?r,?r,?r,?m,v,v,?r,m,?v,v") (match_operand:HF 1 "general_operand" - "rmF,rF,C,v, v,v, r,m"))] + "r ,F ,m ,rF,C,v, v,v,r ,m"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (lra_in_progress || reload_completed || !CONST_DOUBLE_P (operands[1]) - || (TARGET_SSE && TARGET_SSE_MATH + || (TARGET_SSE2 && standard_sse_constant_p (operands[1], HFmode) == 1) || memory_operand (operands[0], HFmode))" { switch (get_attr_type (insn)) { - case TYPE_IMOV: - return "mov{w}\t{%1, %0|%0, %1}"; - - case TYPE_SSELOG1: - return standard_sse_constant_opcode (insn, operands); + case TYPE_IMOVX: + /* movzwl is faster than movw on p2 due to partial word stalls, + though not as fast as an aligned movl. */ + return "movz{wl|x}\t{%1, %k0|%k0, %1}"; case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); - case TYPE_SSELOG: + case TYPE_SSELOG1: + if (satisfies_constraint_C (operands[1])) + return standard_sse_constant_opcode (insn, operands); + if (SSE_REG_P (operands[0])) return MEM_P (operands[1]) - ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" - : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; + ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" + : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; else return MEM_P (operands[0]) - ? "pextrw\t{$0, %1, %0|%0, %1, 0}" - : "pextrw\t{$0, %1, %k0|%k0, %1, 0}"; + ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" + : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; default: - gcc_unreachable (); + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{w}\t{%1, %0|%0, %1}"; } } [(set (attr "isa") - (cond [(eq_attr "alternative" "2,3,4,6,7") + (cond [(eq_attr "alternative" "4,5,6,8,9") (const_string "sse2") - (eq_attr "alternative" "5") + (eq_attr "alternative" "7") (const_string "sse4") ] (const_string "*"))) (set (attr "type") - (cond [(eq_attr "alternative" "0,1") - (const_string "imov") - (eq_attr "alternative" "2") + (cond [(eq_attr "alternative" "4") (const_string "sselog1") - (eq_attr "alternative" "4,5,6,7") + (eq_attr "alternative" "5") + (const_string "ssemov") + (eq_attr "alternative" "6,7,8,9") (if_then_else (match_test ("TARGET_AVX512FP16")) (const_string "ssemov") - (const_string "sselog")) - ] - (const_string "ssemov"))) - (set (attr "memory") - (cond [(eq_attr "alternative" "4,6") - (const_string "none") - (eq_attr "alternative" "5") - (const_string "store") - (eq_attr "alternative" "7") - (const_string "load") - ] - (const_string "*"))) + (const_string "sselog1")) + (match_test "optimize_function_for_size_p (cfun)") + (const_string "imov") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "imov") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand")) + (const_string "imov") + (and (match_test "TARGET_MOVX") + (eq_attr "alternative" "0,2")) + (const_string "imovx") + ] + (const_string "imov"))) (set (attr "prefix") - (cond [(eq_attr "alternative" "0,1") - (const_string "orig") + (cond [(eq_attr "alternative" "4,5,6,7,8,9") + (const_string "maybe_vex") ] - (const_string "maybe_vex"))) + (const_string "orig"))) (set (attr "mode") - (cond [(eq_attr "alternative" "0,1") - (const_string "HI") - (eq_attr "alternative" "2") + (cond [(eq_attr "alternative" "4") (const_string "V4SF") - (eq_attr "alternative" "4,5,6,7") + (eq_attr "alternative" "6,7,8,9") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") (const_string "TI")) - (eq_attr "alternative" "3") - (if_then_else - (match_test "TARGET_AVX512FP16") - (const_string "HF") - (const_string "SF")) + (eq_attr "alternative" "5") + (cond [(match_test "TARGET_AVX512FP16") + (const_string "HF") + (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (match_test "TARGET_SSE_SPLIT_REGS")) + (const_string "V4SF") + ] + (const_string "SF")) + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand")) + (const_string "SI") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "SI") ] - (const_string "*")))]) + (const_string "HI")))]) (define_split [(set (match_operand 0 "any_fp_register_operand")