PR rtl-optimization/40900 * doc/md.texi (Standard Names): Document call_value_zext, call_value_pop_sext and call_value_pop_zext. * optabs.h (enum optab_index): Add OTI_call_value_sext, OTI_call_value_zext, OTI_call_value_pop_sext, OTI_call_value_po_zext. (call_value_sext_optab, call_value_zext_optab, call_value_pop_sext_optab, call_value_pop_zext_optab): New macros. * genopinit.c (optabs): Initialize the new optabs. * final.c (call_from_call_insn): Handle ZERO_EXTEND and SIGN_EXTEND. * calls.c: Include "recog.h". (emit_call_1): Look for and use extending call patterns if the return value is promoted. * combine.c (record_dead_and_set_regs): Use record_dead_and_set_regs_1 normally for call insns. * config/arm/arm.md (define_code_iterator cext): New. (define_code_attr optab): New. (define_mode_iterator CEXT): New. (call_value_, call_value__internal, call_value__reg_armv5, call_value__reg_arm, call_value__mem, call_value__reg_thumb1_v5, call_value__reg_thumb1, call_value__symbol, call_value__insn): New patterns. * config/arm/thumb2.md (call_value_reg_thumb2): New pattern. PR rtl-optimization/40900 * gcc.target/arm/pr40900.c: New test. Index: doc/md.texi =================================================================== --- doc/md.texi (revision 158639) +++ doc/md.texi (working copy) @@ -4775,6 +4775,17 @@ For machines where @code{RETURN_POPS_ARG patterns increases the number of functions for which the frame pointer can be eliminated, if desired. +@cindex @code{call_value_sext@var{mode}} instruction pattern +@cindex @code{call_value_zext@var{mode}} instruction pattern +@cindex @code{call_value_pop_sext@var{mode}} instruction pattern +@cindex @code{call_value_pop_zext@var{mode}} instruction pattern + +These patterns correspond to @samp{call_value} and @samp{call_value_pop}, +but they describe a call which returns a sign-extended or zero-extended +result. This may enable some optimization opportunities. Defining them +is optional; if they don't exist, @samp{call_value} or @samp{call_value_pop} +will be used in their place. + @cindex @code{untyped_call} instruction pattern @item @samp{untyped_call} Subroutine call instruction returning a value of any type. Operand 0 is Index: optabs.h =================================================================== --- optabs.h (revision 158643) +++ optabs.h (working copy) @@ -369,6 +369,12 @@ enum optab_index /* Perform a raise to the power of integer. */ OTI_powi, + /* Perform a call where we know how the return value is extended. */ + OTI_call_value_sext, + OTI_call_value_zext, + OTI_call_value_pop_sext, + OTI_call_value_pop_zext, + OTI_MAX }; @@ -546,6 +552,11 @@ extern struct optab_d optab_table[OTI_MA #define powi_optab (&optab_table[OTI_powi]) +#define call_value_sext_optab (&optab_table[OTI_call_value_sext]) +#define call_value_zext_optab (&optab_table[OTI_call_value_zext]) +#define call_value_pop_sext_optab (&optab_table[OTI_call_value_pop_sext]) +#define call_value_pop_zext_optab (&optab_table[OTI_call_value_pop_zext]) + /* Conversion optabs have their own table and indexes. */ enum convert_optab_index { Index: genopinit.c =================================================================== --- genopinit.c (revision 158639) +++ genopinit.c (working copy) @@ -272,7 +272,11 @@ static const char * const optabs[] = "optab_handler (vec_pack_ssat_optab, $A)->insn_code = CODE_FOR_$(vec_pack_ssat_$a$)", "optab_handler (vec_pack_usat_optab, $A)->insn_code = CODE_FOR_$(vec_pack_usat_$a$)", "optab_handler (vec_pack_sfix_trunc_optab, $A)->insn_code = CODE_FOR_$(vec_pack_sfix_trunc_$a$)", - "optab_handler (vec_pack_ufix_trunc_optab, $A)->insn_code = CODE_FOR_$(vec_pack_ufix_trunc_$a$)" + "optab_handler (vec_pack_ufix_trunc_optab, $A)->insn_code = CODE_FOR_$(vec_pack_ufix_trunc_$a$)", + "optab_handler (call_value_sext_optab, $A)->insn_code = CODE_FOR_$(call_value_sext$I$a$)", + "optab_handler (call_value_zext_optab, $A)->insn_code = CODE_FOR_$(call_value_zext$I$a$)", + "optab_handler (call_value_pop_sext_optab, $A)->insn_code = CODE_FOR_$(call_value_pop_sext$I$a$)", + "optab_handler (call_value_pop_zext_optab, $A)->insn_code = CODE_FOR_$(call_value_pop_zext$I$a$)" }; static void gen_insn (rtx); Index: final.c =================================================================== --- final.c (revision 158639) +++ final.c (working copy) @@ -1791,6 +1791,10 @@ call_from_call_insn (rtx insn) case PARALLEL: x = XVECEXP (x, 0, 0); break; + case ZERO_EXTEND: + case SIGN_EXTEND: + x = XEXP (x, 0); + break; case SET: x = XEXP (x, 1); break; Index: calls.c =================================================================== --- calls.c (revision 158639) +++ calls.c (working copy) @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. #include "flags.h" #include "expr.h" #include "optabs.h" +#include "recog.h" #include "libfuncs.h" #include "function.h" #include "regs.h" @@ -256,6 +257,8 @@ emit_call_1 (rtx funexp, tree fntree ATT rtx call_insn; int already_popped = 0; HOST_WIDE_INT n_popped = RETURN_POPS_ARGS (fndecl, funtype, stack_size); + enum insn_code pop_icode = CODE_FOR_nothing; + enum insn_code icode = CODE_FOR_nothing; #ifdef CALL_POPS_ARGS n_popped += CALL_POPS_ARGS (* args_so_far); @@ -267,6 +270,31 @@ emit_call_1 (rtx funexp, tree fntree ATT if (GET_CODE (funexp) != SYMBOL_REF) funexp = memory_address (FUNCTION_MODE, funexp); + + if (valreg) + { + enum machine_mode retval_mode, promoted_mode; + int retval_unsignedp; + + retval_unsignedp = TYPE_UNSIGNED (TREE_TYPE (funtype)); + retval_mode = TYPE_MODE (TREE_TYPE (funtype)); + promoted_mode + = promote_function_mode (TREE_TYPE (funtype), retval_mode, + &retval_unsignedp, funtype, 1); + + if (promoted_mode != retval_mode) + { + optab call_optab = (retval_unsignedp + ? call_value_pop_zext_optab + : call_value_pop_sext_optab); + pop_icode = optab_handler (call_optab, retval_mode)->insn_code; + call_optab = (retval_unsignedp + ? call_value_zext_optab + : call_value_sext_optab); + icode = optab_handler (call_optab, retval_mode)->insn_code; + } + } + #if defined (HAVE_sibcall_pop) && defined (HAVE_sibcall_value_pop) if ((ecf_flags & ECF_SIBCALL) && HAVE_sibcall_pop && HAVE_sibcall_value_pop @@ -311,7 +339,11 @@ emit_call_1 (rtx funexp, tree fntree ATT /* If this subroutine pops its own args, record that in the call insn if possible, for the sake of frame pointer elimination. */ - if (valreg) + if (valreg && pop_icode != CODE_FOR_nothing) + pat = GEN_FCN (pop_icode) (valreg, + gen_rtx_MEM (FUNCTION_MODE, funexp), + rounded_stack_size_rtx, next_arg_reg, n_pop); + else if (valreg) pat = GEN_CALL_VALUE_POP (valreg, gen_rtx_MEM (FUNCTION_MODE, funexp), rounded_stack_size_rtx, next_arg_reg, n_pop); @@ -345,7 +377,12 @@ emit_call_1 (rtx funexp, tree fntree ATT #if defined (HAVE_call) && defined (HAVE_call_value) if (HAVE_call && HAVE_call_value) { - if (valreg) + if (valreg && icode != CODE_FOR_nothing) + emit_call_insn (GEN_FCN (icode) (valreg, + gen_rtx_MEM (FUNCTION_MODE, funexp), + rounded_stack_size_rtx, next_arg_reg, + NULL_RTX)); + else if (valreg) emit_call_insn (GEN_CALL_VALUE (valreg, gen_rtx_MEM (FUNCTION_MODE, funexp), rounded_stack_size_rtx, next_arg_reg, Index: combine.c =================================================================== --- combine.c (revision 158639) +++ combine.c (working copy) @@ -11889,15 +11889,8 @@ record_dead_and_set_regs (rtx insn) } last_call_luid = mem_last_set = DF_INSN_LUID (insn); - - /* We can't combine into a call pattern. Remember, though, that - the return value register is set at this LUID. We could - still replace a register with the return value from the - wrong subroutine call! */ - note_stores (PATTERN (insn), record_dead_and_set_regs_1, NULL_RTX); } - else - note_stores (PATTERN (insn), record_dead_and_set_regs_1, insn); + note_stores (PATTERN (insn), record_dead_and_set_regs_1, insn); } /* If a SUBREG has the promoted bit set, it is in fact a property of the Index: Makefile.in =================================================================== --- Makefile.in (revision 158639) +++ Makefile.in (working copy) @@ -2807,7 +2807,7 @@ builtins.o : builtins.c $(CONFIG_H) $(SY libfuncs.h $(REAL_H) langhooks.h $(BASIC_BLOCK_H) tree-mudflap.h \ $(BUILTINS_DEF) $(MACHMODE_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) value-prof.h calls.o : calls.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ - $(TREE_H) $(FLAGS_H) $(EXPR_H) $(OPTABS_H) langhooks.h $(TARGET_H) \ + $(TREE_H) $(FLAGS_H) $(EXPR_H) $(OPTABS_H) $(RECOG_H) langhooks.h $(TARGET_H) \ libfuncs.h $(REGS_H) $(TOPLEV_H) output.h $(FUNCTION_H) $(TIMEVAR_H) $(TM_P_H) \ $(CGRAPH_H) $(EXCEPT_H) sbitmap.h $(DBGCNT_H) $(TREE_FLOW_H) expmed.o : expmed.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) \ Index: config/arm/arm.md =================================================================== --- config/arm/arm.md (revision 158639) +++ config/arm/arm.md (working copy) @@ -8561,6 +8561,51 @@ (define_expand "call_value" }" ) +(define_code_iterator cext [sign_extend zero_extend]) +(define_code_attr optab [(sign_extend "sext") + (zero_extend "zext")]) + +(define_mode_iterator CEXT [QI HI]) + +(define_expand "call_value_" + [(parallel [(set (match_operand:SI 0 "" "") + (cext:SI + (call:CEXT (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" "")))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))])] + "TARGET_EITHER" + " + { + rtx pat, callee; + + /* In an untyped call, we can get NULL for operand 2. */ + if (operands[3] == 0) + operands[3] = const0_rtx; + + /* Decide if we should generate indirect calls by loading the + 32-bit address of the callee into a register before performing the + branch and link. */ + callee = XEXP (operands[1], 0); + if (GET_CODE (callee) == SYMBOL_REF + ? arm_is_long_call_p (SYMBOL_REF_DECL (callee)) + : !REG_P (callee)) + XEXP (operands[1], 0) = force_reg (Pmode, callee); + + pat = gen_call_value__internal (operands[0], operands[1], + operands[2], operands[3]); + arm_emit_call_insn (pat, XEXP (operands[1], 0)); + DONE; + }" +) + +(define_expand "call_value__internal" + [(parallel [(set (match_operand 0 "" "") + (cext:SI (call:CEXT (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" "")))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))])]) + (define_expand "call_value_internal" [(parallel [(set (match_operand 0 "" "") (call (match_operand 1 "memory_operand" "") @@ -8642,6 +8687,80 @@ (define_insn "*call_value_reg_thumb1" [(set_attr "type" "call")] ) +(define_insn "*call_value__reg_armv5" + [(set (match_operand:SI 0 "" "") + (cext:SI (call:CEXT (mem:SI (match_operand:SI 1 "s_register_operand" "r")) + (match_operand 2 "" "")))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && arm_arch5" + "blx%?\\t%1" + [(set_attr "type" "call")] +) + +(define_insn "*call_value__reg_arm" + [(set (match_operand:SI 0 "" "") + (cext:SI (call:CEXT (mem:SI (match_operand:SI 1 "s_register_operand" "r")) + (match_operand 2 "" "")))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && !arm_arch5" + "* + return output_call (&operands[1]); + " + [(set_attr "length" "12") + (set_attr "type" "call")] +) + +;; Note: see *call_mem + +(define_insn "*call_value__mem" + [(set (match_operand:SI 0 "" "") + (cext:SI (call:CEXT (mem:SI (match_operand:SI 1 "call_memory_operand" "m")) + (match_operand 2 "" "")))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && !arm_arch5 && (!CONSTANT_ADDRESS_P (XEXP (operands[1], 0)))" + "* + return output_call_mem (&operands[1]); + " + [(set_attr "length" "12") + (set_attr "type" "call")] +) + +(define_insn "*call_value__reg_thumb1_v5" + [(set (match_operand:SI 0 "" "") + (cext:SI (call:CEXT (mem:SI (match_operand:SI 1 "register_operand" "l*r")) + (match_operand 2 "" "")))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 && arm_arch5" + "blx\\t%1" + [(set_attr "length" "2") + (set_attr "type" "call")] +) + +(define_insn "*call_value__reg_thumb1" + [(set (match_operand:SI 0 "" "") + (cext:SI (call:CEXT (mem:SI (match_operand:SI 1 "register_operand" "l*r")) + (match_operand 2 "" "")))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 && !arm_arch5" + "* + { + if (!TARGET_CALLER_INTERWORKING) + return thumb_call_via_reg (operands[1]); + else if (operands[2] == const0_rtx) + return \"bl\\t%__interwork_call_via_%1\"; + else if (frame_pointer_needed) + return \"bl\\t%__interwork_r7_call_via_%1\"; + else + return \"bl\\t%__interwork_r11_call_via_%1\"; + }" + [(set_attr "type" "call")] +) + ;; Allow calls to SYMBOL_REFs specially as they are not valid general addresses ;; The 'a' causes the operand to be treated as an address, i.e. no '#' output. @@ -8676,6 +8795,22 @@ (define_insn "*call_value_symbol" [(set_attr "type" "call")] ) +(define_insn "*call_value__symbol" + [(set (match_operand:SI 0 "" "") + (cext:SI (call:CEXT (mem:SI (match_operand:SI 1 "" "")) + (match_operand:SI 2 "" "")))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM + && (GET_CODE (operands[1]) == SYMBOL_REF) + && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))" + "* + { + return NEED_PLT_RELOC ? \"bl%?\\t%a1(PLT)\" : \"bl%?\\t%a1\"; + }" + [(set_attr "type" "call")] +) + (define_insn "*call_insn" [(call (mem:SI (match_operand:SI 0 "" "")) (match_operand:SI 1 "" "")) @@ -8703,6 +8838,20 @@ (define_insn "*call_value_insn" (set_attr "type" "call")] ) +(define_insn "*call_value__insn" + [(set (match_operand:SI 0 "" "") + (cext:SI (call:CEXT (mem:SI (match_operand 1 "" "")) + (match_operand 2 "" "")))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB + && GET_CODE (operands[1]) == SYMBOL_REF + && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))" + "bl\\t%a1" + [(set_attr "length" "4") + (set_attr "type" "call")] +) + ;; We may also be able to do sibcalls for Thumb, but it's much harder... (define_expand "sibcall" [(parallel [(call (match_operand 0 "memory_operand" "") Index: config/arm/thumb2.md =================================================================== --- config/arm/thumb2.md (revision 158639) +++ config/arm/thumb2.md (working copy) @@ -448,6 +448,17 @@ (define_insn "*call_value_reg_thumb2" [(set_attr "type" "call")] ) +(define_insn "*call_value__reg_thumb2" + [(set (match_operand:SI 0 "" "") + (cext:SI (call:CEXT (mem:SI (match_operand:SI 1 "register_operand" "l*r")) + (match_operand 2 "" "")))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB2" + "blx\\t%1" + [(set_attr "type" "call")] +) + (define_insn "*thumb2_indirect_jump" [(set (pc) (match_operand:SI 0 "register_operand" "l*r"))] Index: testsuite/gcc.target/arm/pr40900.c =================================================================== --- testsuite/gcc.target/arm/pr40900.c (revision 0) +++ testsuite/gcc.target/arm/pr40900.c (revision 0) @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-optimize-sibling-calls" } */ + +extern short shortv2(); +short shortv1() +{ + return shortv2(); +} + +/* { dg-final { scan-assembler-not "lsl" } } */ +/* { dg-final { scan-assembler-not "asr" } } */ +/* { dg-final { scan-assembler-not "sxth" } } */