diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 99022990377..ce7102af44f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -6596,7 +6596,9 @@ (define_insn_and_split "*add_1_slp" return "add{}\t{%2, %0|%0, %2}"; } } - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -7001,38 +7003,58 @@ (define_expand "addqi_ext_1" (match_operand:QI 2 "const_int_operand")) 0)) (clobber (reg:CC FLAGS_REG))])]) -(define_insn "*addqi_ext_1" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*addqi_ext_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (plus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) 0)) + (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" + "" { + if (which_alternative) + return "#"; + switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%h0"; else - { + { gcc_assert (operands[2] == constm1_rtx); - return "dec{b}\t%h0"; - } + return "dec{b}\t%h0"; + } default: return "add{b}\t{%2, %h0|%h0, %2}"; } } + "reload_completed + && !rtx_equal_p (operands[0], operands[1])" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (plus:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "addr" "gpr8") (set (attr "type") (if_then_else (match_operand:QI 2 "incdec_operand") @@ -7040,28 +7062,49 @@ (define_insn "*addqi_ext_1" (const_string "alu"))) (set_attr "mode" "QI")]) -(define_insn "*addqi_ext_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*qi_ext_2" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 - (plus:QI + (plusminus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "%0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") + [(match_operand 2 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)]) 0)) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1]) - || rtx_equal_p (operands[0], operands[2])" - "add{b}\t{%h2, %h0|%h0, %h2}" + (clobber (reg:CC FLAGS_REG))] + "" + "@ + {b}\t{%h2, %h0|%h0, %h2} + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || ( == PLUS && rtx_equal_p (operands[0], operands[2])))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (plusminus:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (subreg:QI + (match_op_dup 4 + [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) @@ -7570,7 +7613,8 @@ (define_insn_and_split "*sub_1_slp" "@ sub{}\t{%2, %0|%0, %2} #" - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -7627,28 +7671,44 @@ (define_insn "*subqi_ext_0" (set_attr "type" "alu") (set_attr "mode" "QI")]) -(define_insn "*subqi_ext_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*subqi_ext_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (minus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0)) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" - "sub{b}\t{%h2, %h0|%h0, %h2}" - [(set_attr "type" "alu") + (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "@ + sub{b}\t{%2, %h0|%h0, %2} + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (minus:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "addr" "gpr8") + (set_attr "type" "alu") (set_attr "mode" "QI")]) ;; Subtract with jump on overflow. @@ -11338,20 +11398,22 @@ (define_insn "*andqi_1" (symbol_ref "true")))]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. -(define_insn_and_split "*and_1_slp" +(define_insn_and_split "*_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) - (and:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!") - (match_operand:SWI12 2 "general_operand" "mn,mn"))) + (any_logic:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!") + (match_operand:SWI12 2 "general_operand" "mn,mn"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "@ - and{}\t{%2, %0|%0, %2} + {}\t{%2, %0|%0, %2} #" - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) - (and:SWI12 (match_dup 0) (match_dup 2))) + (any_logic:SWI12 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") @@ -11528,9 +11590,9 @@ (define_insn "*and_2" [(set_attr "type" "alu") (set_attr "mode" "")]) -(define_insn "*andqi_ext_0" +(define_insn "*qi_ext_0" [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn") - (and:QI + (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 2 "int248_register_operand" "Q") @@ -11539,7 +11601,7 @@ (define_insn "*andqi_ext_0" (match_operand:QI 1 "nonimmediate_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "" - "and{b}\t{%h2, %0|%0, %h2}" + "{b}\t{%h2, %0|%0, %h2}" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) @@ -11558,86 +11620,180 @@ (define_expand "andqi_ext_1" (match_operand:QI 2 "const_int_operand")) 0)) (clobber (reg:CC FLAGS_REG))])]) -(define_insn "*andqi_ext_1" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*qi_ext_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 - (and:QI + (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) 0)) + (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" - "and{b}\t{%2, %h0|%h0, %2}" + "" + "@ + {b}\t{%2, %h0|%h0, %2} + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (any_logic:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) -;; Generated by peephole translating test to and. This shows up -;; often in fp comparisons. -(define_insn "*andqi_ext_1_cc" - [(set (reg FLAGS_REG) - (compare - (and:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) - (const_int 0))) +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*qi_ext_1_cc" + [(set (match_operand 4 "flags_reg_operand") + (match_operator 5 "compare_operator" + [(any_logic:QI + (subreg:QI + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "0,!Q") + (const_int 8) + (const_int 8)]) 0) + (match_operand:QI 2 "general_operand" "QnBn,QnBn")) + (const_int 0)])) (set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 - (and:QI + (any_logic:QI (subreg:QI (match_op_dup 3 - [(match_dup 1) - (const_int 8) - (const_int 8)]) 0) + [(match_dup 0) (const_int 8) (const_int 8)]) 0) (match_dup 2)) 0))] - "ix86_match_ccmode (insn, CCNOmode) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && rtx_equal_p (operands[0], operands[1])" - "and{b}\t{%2, %h0|%h0, %2}" + "ix86_match_ccmode (insn, CCNOmode)" + "@ + {b}\t{%2, %h0|%h0, %2} + #" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (match_dup 4) + (match_op_dup 5 + [(any_logic:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) + (const_int 0)])) + (set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (any_logic:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 1) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0))])] + "" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) -(define_insn "*andqi_ext_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*qi_ext_2" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 - (and:QI + (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "%0") + [(match_operand 1 "int248_register_operand" "%0,!Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") + [(match_operand 2 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1]) - || rtx_equal_p (operands[0], operands[2])" - "and{b}\t{%h2, %h0|%h0, %h2}" + "" + "@ + {b}\t{%h2, %h0|%h0, %h2} + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (any_logic:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (subreg:QI + (match_op_dup 4 + [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) -;; *andqi_ext_3 is defined via *qi_ext_3 below. +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*qi_ext_3" + [(set (zero_extract:SWI248 + (match_operand 0 "int248_register_operand" "+Q,&Q") + (const_int 8) + (const_int 8)) + (match_operator:SWI248 3 "extract_operator" + [(any_logic + (match_operand 1 "int248_register_operand" "%0,!Q") + (match_operand 2 "int248_register_operand" "Q,Q")) + (const_int 8) + (const_int 8)])) + (clobber (reg:CC FLAGS_REG))] + "GET_MODE (operands[1]) == GET_MODE (operands[2])" + "@ + {b}\t{%h2, %h0|%h0, %h2} + #" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (match_op_dup 3 + [(any_logic (match_dup 4) (match_dup 2)) + (const_int 8) (const_int 8)])) + (clobber (reg:CC FLAGS_REG))])] + "operands[4] = gen_lowpart (GET_MODE (operands[1]), operands[0]);" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) ;; Convert wide AND instructions with immediate operand to shorter QImode ;; equivalents when possible. @@ -12166,26 +12322,6 @@ (define_insn_and_split "*notxorqi_1" (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) -;; Alternative 1 is needed to work around LRA limitation, see PR82524. -(define_insn_and_split "*_1_slp" - [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) - (any_or:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!") - (match_operand:SWI12 2 "general_operand" "mn,mn"))) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" - "@ - {}\t{%2, %0|%0, %2} - #" - "&& reload_completed" - [(set (strict_low_part (match_dup 0)) (match_dup 1)) - (parallel - [(set (strict_low_part (match_dup 0)) - (any_or:SWI12 (match_dup 0) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "" - [(set_attr "type" "alu") - (set_attr "mode" "")]) - ;; convert (sign_extend:WIDE (any_logic:NARROW (memory, immediate))) ;; to (any_logic:WIDE (sign_extend (memory)), (sign_extend (immediate))). ;; This eliminates sign extension after logic operation. @@ -12276,90 +12412,6 @@ (define_insn "*_3" [(set_attr "type" "alu") (set_attr "mode" "")]) -(define_insn "*qi_ext_0" - [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn") - (any_or:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) - (match_operand:QI 1 "nonimmediate_operand" "0"))) - (clobber (reg:CC FLAGS_REG))] - "" - "{b}\t{%h2, %0|%0, %h2}" - [(set_attr "addr" "gpr8") - (set_attr "type" "alu") - (set_attr "mode" "QI")]) - -(define_insn "*qi_ext_1" - [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (subreg:SWI248 - (any_or:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) 0)) - (clobber (reg:CC FLAGS_REG))] - "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && rtx_equal_p (operands[0], operands[1])" - "{b}\t{%2, %h0|%h0, %2}" - [(set_attr "addr" "gpr8") - (set_attr "type" "alu") - (set_attr "mode" "QI")]) - -(define_insn "*qi_ext_2" - [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (subreg:SWI248 - (any_or:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "%0") - (const_int 8) - (const_int 8)]) 0) - (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0)) 0)) - (clobber (reg:CC FLAGS_REG))] - "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && (rtx_equal_p (operands[0], operands[1]) - || rtx_equal_p (operands[0], operands[2]))" - "{b}\t{%h2, %h0|%h0, %h2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI")]) - -(define_insn "*qi_ext_3" - [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (zero_extract:SWI248 - (any_logic:SWI248 - (match_operand 1 "int248_register_operand" "%0") - (match_operand 2 "int248_register_operand" "Q")) - (const_int 8) - (const_int 8))) - (clobber (reg:CC FLAGS_REG))] - "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && (rtx_equal_p (operands[0], operands[1]) - || rtx_equal_p (operands[0], operands[2]))" - "{b}\t{%h2, %h0|%h0, %h2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI")]) - ;; Convert wide OR instructions with immediate operand to shorter QImode ;; equivalents when possible. ;; Don't do the splitting with memory operands, since it introduces risk @@ -12443,37 +12495,6 @@ (define_expand "xorqi_ext_1_cc" (const_int 8)) 0) (match_dup 2)) 0))])]) -(define_insn "*xorqi_ext_1_cc" - [(set (reg FLAGS_REG) - (compare - (xor:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) - (const_int 0))) - (set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (subreg:SWI248 - (xor:QI - (subreg:QI - (match_op_dup 3 - [(match_dup 1) - (const_int 8) - (const_int 8)]) 0) - (match_dup 2)) 0))] - "ix86_match_ccmode (insn, CCNOmode) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && rtx_equal_p (operands[0], operands[1])" - "xor{b}\t{%2, %h0|%h0, %2}" - [(set_attr "addr" "gpr8") - (set_attr "type" "alu") - (set_attr "mode" "QI")]) - ;; Peephole2 rega = 0; rega op= regb into rega = regb. (define_peephole2 [(parallel [(set (match_operand:SWI 0 "general_reg_operand") @@ -12813,7 +12834,8 @@ (define_insn_and_split "*neg_1_slp" "@ neg{}\t%0 #" - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -12881,22 +12903,40 @@ (define_expand "x86_neg_ccc" (set (match_operand:SWI48 0 "register_operand") (neg:SWI48 (match_dup 1)))])]) -(define_insn "*negqi_ext_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*negqi_ext_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (neg:QI (subreg:QI (match_operator:SWI248 2 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" - "neg{b}\t%h0" + "" + "@ + neg{b}\t%h0 + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (neg:QI + (subreg:QI + (match_op_dup 2 + [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "type" "negnot") (set_attr "mode" "QI")]) @@ -13420,7 +13460,8 @@ (define_insn_and_split "*one_cmpl_1_slp" "@ not{}\t%0 #" - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (set (strict_low_part (match_dup 0)) (not:SWI12 (match_dup 0)))] @@ -13479,6 +13520,40 @@ (define_split (const_int 0)])) (set (match_dup 1) (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]) + +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*one_cmplqi_ext_1" + [(set (zero_extract:SWI248 + (match_operand 0 "int248_register_operand" "+Q,&Q") + (const_int 8) + (const_int 8)) + (subreg:SWI248 + (not:QI + (subreg:QI + (match_operator:SWI248 2 "extract_operator" + [(match_operand 1 "int248_register_operand" "0,!Q") + (const_int 8) + (const_int 8)]) 0)) 0))] + "" + "@ + not{b}\t%h0 + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (not:QI + (subreg:QI + (match_op_dup 2 + [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))] + "" + [(set_attr "type" "negnot") + (set_attr "mode" "QI")]) ;; Shift instructions @@ -14254,7 +14329,8 @@ (define_insn_and_split "*ashl3_1_slp" return "sal{}\t{%2, %0|%0, %2}"; } } - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -14458,23 +14534,26 @@ (define_insn "*ashl3_cconly" (const_string "*"))) (set_attr "mode" "")]) -(define_insn "*ashlqi_ext_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*ashlqi_ext_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (ashift:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (match_operand:QI 2 "nonmemory_operand" "cI")) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" + (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0)) + (clobber (reg:CC FLAGS_REG))] + "" { + if (which_alternative) + return "#"; + switch (get_attr_type (insn)) { case TYPE_ALU: @@ -14489,6 +14568,22 @@ (define_insn "*ashlqi_ext_2" return "sal{b}\t{%2, %h0|%h0, %2}"; } } + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (match_dup 1)) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (ashift:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set (attr "type") (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) @@ -15247,7 +15342,8 @@ (define_insn_and_split "*3_1_slp" else return "{}\t{%2, %0|%0, %2}"; } - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -15361,29 +15457,48 @@ (define_insn "*3_cconly" (const_string "*"))) (set_attr "mode" "")]) -(define_insn "*qi_ext_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*qi_ext_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (any_shiftrt:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (match_operand:QI 2 "nonmemory_operand" "cI")) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" + (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0)) + (clobber (reg:CC FLAGS_REG))] + "" { + if (which_alternative) + return "#"; + if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "{b}\t%h0"; else return "{b}\t{%2, %h0|%h0, %2}"; } + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (match_dup 1)) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (any_shiftrt:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else @@ -15875,7 +15990,8 @@ (define_insn_and_split "*3_1_slp" else return "{}\t{%2, %0|%0, %2}"; } - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1.c b/gcc/testsuite/gcc.target/i386/pr78904-1.c index d27d7fd651d..ed5403f8067 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-1.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-1.c @@ -46,3 +46,12 @@ struct S1 test_add (struct S1 a, struct S1 b) } /* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S1 test_sub (struct S1 a, struct S1 b) +{ + a.val -= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1a.c b/gcc/testsuite/gcc.target/i386/pr78904-1a.c index 7746477d745..aa9273eeb64 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-1a.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-1a.c @@ -45,3 +45,12 @@ struct S1 test_add (struct S1 a, struct S1 b) } /* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S1 test_sub (struct S1 a, struct S1 b) +{ + a.val -= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1b.c b/gcc/testsuite/gcc.target/i386/pr78904-1b.c index 20b677252ab..0687c95e912 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-1b.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-1b.c @@ -47,3 +47,12 @@ struct S1 test_add (struct S1 a, struct S1 b) } /* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S1 test_sub (struct S1 a, struct S1 b) +{ + a.val -= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2.c b/gcc/testsuite/gcc.target/i386/pr78904-2.c index 0cc4aaa91ea..3e9389ec20d 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-2.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-2.c @@ -47,3 +47,12 @@ struct S1 test_add (struct S1 a) } /* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */ + +struct S1 test_sub (struct S1 a) +{ + a.val -= t.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2a.c b/gcc/testsuite/gcc.target/i386/pr78904-2a.c index 41eaa259158..f0c5979c821 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-2a.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-2a.c @@ -46,3 +46,12 @@ struct S1 test_add (struct S1 a) } /* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */ + +struct S1 test_sub (struct S1 a) +{ + a.val -= t.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2b.c b/gcc/testsuite/gcc.target/i386/pr78904-2b.c index 23e975ac93e..e6154e6d918 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-2b.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-2b.c @@ -48,3 +48,12 @@ struct S1 test_add (struct S1 a) } /* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */ + +struct S1 test_sub (struct S1 a) +{ + a.val -= t.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78952-4.c b/gcc/testsuite/gcc.target/i386/pr78952-4.c index c7bd63c9543..d9979672945 100644 --- a/gcc/testsuite/gcc.target/i386/pr78952-4.c +++ b/gcc/testsuite/gcc.target/i386/pr78952-4.c @@ -46,3 +46,12 @@ struct S1 test_add (struct S1 a, struct S1 b) } /* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S1 test_sub (struct S1 a, struct S1 b) +{ + a.val -= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82524-1.c b/gcc/testsuite/gcc.target/i386/pr82524-1.c new file mode 100644 index 00000000000..6539630900a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82524-1.c @@ -0,0 +1,63 @@ +/* PR target/82524 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-additional-options "-mregparm=3" { target ia32 } } */ +/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */ + +struct S +{ + char pad1; + char val; + char pad2; + char pad3; +}; + +struct S +test_plus (struct S a, struct S b, struct S c) +{ + a.val = b.val + c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S +test_minus (struct S a, struct S b, struct S c) +{ + a.val = b.val - c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ + +struct S +test_and (struct S a, struct S b, struct S c) +{ + a.val = b.val & c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S +test_or (struct S a, struct S b, struct S c) +{ + a.val = b.val | c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S +test_xor (struct S a, struct S b, struct S c) +{ + a.val = b.val ^ c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82524-2.c b/gcc/testsuite/gcc.target/i386/pr82524-2.c new file mode 100644 index 00000000000..766dd1aae1e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82524-2.c @@ -0,0 +1,63 @@ +/* PR target/82524 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-additional-options "-mregparm=3" { target ia32 } } */ +/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */ + +struct S +{ + char pad1; + char val; + char pad2; + char pad3; +}; + +struct S +test_plus (struct S a, struct S b, char *c) +{ + a.val = b.val + *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S +test_minus (struct S a, struct S b, char *c) +{ + a.val = b.val - *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ + +struct S +test_and (struct S a, struct S b, char *c) +{ + a.val = b.val & *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S +test_or (struct S a, struct S b, char *c) +{ + a.val = b.val | *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S +test_xor (struct S a, struct S b, char *c) +{ + a.val = b.val ^ *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82524-3.c b/gcc/testsuite/gcc.target/i386/pr82524-3.c new file mode 100644 index 00000000000..7a66712193e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82524-3.c @@ -0,0 +1,42 @@ +/* PR target/82524 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ + +struct S +{ + unsigned char pad1; + unsigned char val; + unsigned short pad2; + unsigned int pad3; +}; + +struct S +test_and (struct S a, struct S b, struct S c) +{ + a.val = b.val & c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S +test_or (struct S a, struct S b, struct S c) +{ + a.val = b.val | c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S +test_xor (struct S a, struct S b, struct S c) +{ + a.val = b.val ^ c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82524.c b/gcc/testsuite/gcc.target/i386/pr82524.c new file mode 100644 index 00000000000..058f0a2d14d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82524.c @@ -0,0 +1,83 @@ +/* PR target/82524 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-additional-options "-mregparm=3" { target ia32 } } */ +/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */ + +struct S +{ + char pad1; + char val; + char pad2; + char pad3; +}; + +struct S +test_plus (struct S a, struct S b, char c) +{ + a.val = b.val + c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S +test_minus (struct S a, struct S b, char c) +{ + a.val = b.val - c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ + +struct S +test_neg (struct S a, struct S b) +{ + a.val = -b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]negb" } } */ + +struct S +test_and (struct S a, struct S b, char c) +{ + a.val = b.val & c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S +test_or (struct S a, struct S b, char c) +{ + a.val = b.val | c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S +test_xor (struct S a, struct S b, char c) +{ + a.val = b.val ^ c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ + +struct S +test_not (struct S a, struct S b) +{ + a.val = ~b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]notb" } } */