From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 53181 invoked by alias); 1 Aug 2016 01:57:17 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 53016 invoked by uid 89); 1 Aug 2016 01:57:17 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-1.9 required=5.0 tests=AWL,BAYES_00,KAM_LAZY_DOMAIN_SECURITY,RP_MATCHES_RCVD autolearn=ham version=3.3.2 spammy=IN_RANGE, in_range, Pmode, pmode X-HELO: gcc1-power7.osuosl.org Received: from gcc1-power7.osuosl.org (HELO gcc1-power7.osuosl.org) (140.211.15.137) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Mon, 01 Aug 2016 01:56:57 +0000 Received: by gcc1-power7.osuosl.org (Postfix, from userid 10019) id DA1A31C0927; Mon, 1 Aug 2016 01:43:15 +0000 (UTC) From: Segher Boessenkool To: gcc-patches@gcc.gnu.org Cc: bschmidt@redhat.com, Segher Boessenkool Subject: [PATCH 9/9] rs6000: Separate shrink-wrapping Date: Mon, 01 Aug 2016 01:57:00 -0000 Message-Id: <02df4c5a5c2bbf3139fa0be1c522f027879cd67e.1470015604.git.segher@kernel.crashing.org> In-Reply-To: References: In-Reply-To: References: X-IsSubscribed: yes X-SW-Source: 2016-08/txt/msg00006.txt.bz2 This implements the hooks for separate shrink-wrapping for rs6000. It handles GPRs and LR. The GPRs get a component number corresponding to their register number; LR gets component number 0. This improves specint by 1.8%, specfp by 0.5%, separate benchmarks much more. It improves the hot path in various interpreters (some improve by double digits), and e.g. in glibc's malloc. 2016-06-07 Segher Boessenkool * config/rs6000/rs6000.c (machine_function): Add new fields gpr_is_wrapped_separately and lr_is_wrapped_separately. (TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS, TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB, TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS, TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS, TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS, TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Define. (rs6000_get_separate_components): New function. (rs6000_components_for_bb): New function. (rs6000_disqualify_components): New function. (rs6000_emit_prologue_components): New function. (rs6000_emit_epilogue_components): New function. (rs6000_set_handled_components): New function. (rs6000_emit_prologue): Don't emit LR save if lr_is_wrapped_separately. Don't emit GPR saves if gpr_is_wrapped_separately for that register. (restore_saved_lr): Don't restore LR if lr_is_wrapped_separately. (rs6000_emit_epilogue): Don't emit GPR restores if gpr_is_wrapped_separately for that register. Don't make a REG_CFA_RESTORE note for registers we did not restore, either. --- gcc/config/rs6000/rs6000.c | 257 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 242 insertions(+), 15 deletions(-) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 46b46d7..723fea5 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -152,6 +152,10 @@ typedef struct GTY(()) machine_function bool split_stack_argp_used; /* Flag if r2 setup is needed with ELFv2 ABI. */ bool r2_setup_needed; + /* The components already handled by separate shrink-wrapping, which should + not be considered by the prologue and epilogue. */ + bool gpr_is_wrapped_separately[32]; + bool lr_is_wrapped_separately; } machine_function; /* Support targetm.vectorize.builtin_mask_for_load. */ @@ -1513,6 +1517,19 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_SET_UP_BY_PROLOGUE #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue +#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS +#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components +#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB +#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb +#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS +#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components +#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS +#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components +#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS +#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components +#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS +#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components + #undef TARGET_EXTRA_LIVE_ON_ENTRY #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry @@ -26444,6 +26461,201 @@ rs6000_global_entry_point_needed_p (void) return cfun->machine->r2_setup_needed; } +/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */ +static sbitmap +rs6000_get_separate_components (void) +{ + rs6000_stack_t *info = rs6000_stack_info (); + + if (!(info->savres_strategy & SAVE_INLINE_GPRS) + || !(info->savres_strategy & REST_INLINE_GPRS) + || WORLD_SAVE_P (info)) + return NULL; + + sbitmap components = sbitmap_alloc (32); + bitmap_clear (components); + + /* The GPRs we need saved to the frame. */ + int reg_size = TARGET_32BIT ? 4 : 8; + int offset = info->gp_save_offset; + if (info->push_p) + offset += info->total_size; + + for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++) + { + if (IN_RANGE (offset, -0x8000, 0x7fff) + && rs6000_reg_live_or_pic_offset_p (regno)) + bitmap_set_bit (components, regno); + + offset += reg_size; + } + + /* Don't mess with the hard frame pointer. */ + if (frame_pointer_needed) + bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM); + + /* Don't mess with the fixed TOC register. */ + if ((TARGET_TOC && TARGET_MINIMAL_TOC) + || (flag_pic == 1 && DEFAULT_ABI == ABI_V4) + || (flag_pic && DEFAULT_ABI == ABI_DARWIN)) + bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM); + + /* Optimize LR save and restore if we can. This is component 0. */ + if (info->lr_save_p + && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))) + { + offset = info->lr_save_offset; + if (info->push_p) + offset += info->total_size; + if (IN_RANGE (offset, -0x8000, 0x7fff)) + bitmap_set_bit (components, 0); + } + + return components; +} + +/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */ +static sbitmap +rs6000_components_for_bb (basic_block bb) +{ + rs6000_stack_t *info = rs6000_stack_info (); + + bitmap in = DF_LIVE_IN (bb); + bitmap gen = &DF_LIVE_BB_INFO (bb)->gen; + bitmap kill = &DF_LIVE_BB_INFO (bb)->kill; + + sbitmap components = sbitmap_alloc (32); + bitmap_clear (components); + + /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */ + for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++) + if (bitmap_bit_p (in, regno) + || bitmap_bit_p (gen, regno) + || bitmap_bit_p (kill, regno)) + bitmap_set_bit (components, regno); + + /* LR needs to be saved around a bb if it is killed in that bb. */ + if (bitmap_bit_p (kill, LR_REGNO)) + bitmap_set_bit (components, 0); + + return components; +} + +/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */ +static void +rs6000_disqualify_components (sbitmap components, edge e, + sbitmap edge_components, bool /*is_prologue*/) +{ + /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be + live where we want to place that code. */ + if (bitmap_bit_p (edge_components, 0) + && bitmap_bit_p (DF_LIVE_IN (e->dest), 0)) + { + if (dump_file) + fprintf (dump_file, "Disqualifying LR because GPR0 is live " + "on entry to bb %d\n", e->dest->index); + bitmap_clear_bit (components, 0); + } +} + +/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */ +static void +rs6000_emit_prologue_components (sbitmap components) +{ + rs6000_stack_t *info = rs6000_stack_info (); + rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + int reg_size = TARGET_32BIT ? 4 : 8; + + /* Prologue for LR. */ + if (bitmap_bit_p (components, 0)) + { + rtx reg = gen_rtx_REG (Pmode, 0); + emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO)); + RTX_FRAME_RELATED_P (get_last_insn ()) = 1; + + int offset = info->lr_save_offset; + if (info->push_p) + offset += info->total_size; + + emit_insn (gen_frame_store (reg, sp_reg_rtx, offset)); + RTX_FRAME_RELATED_P (get_last_insn ()) = 1; + } + + /* Prologue for the GPRs. */ + int offset = info->gp_save_offset; + if (info->push_p) + offset += info->total_size; + + for (int i = info->first_gp_reg_save; i < 32; i++) + { + if (bitmap_bit_p (components, i)) + { + rtx reg = gen_rtx_REG (Pmode, i); + emit_insn (gen_frame_store (reg, sp_reg_rtx, offset)); + RTX_FRAME_RELATED_P (get_last_insn ()) = 1; + } + + offset += reg_size; + } +} + +/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */ +static void +rs6000_emit_epilogue_components (sbitmap components) +{ + rs6000_stack_t *info = rs6000_stack_info (); + rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + int reg_size = TARGET_32BIT ? 4 : 8; + + /* Epilogue for the GPRs. */ + int offset = info->gp_save_offset; + if (info->push_p) + offset += info->total_size; + + for (int i = info->first_gp_reg_save; i < 32; i++) + { + if (bitmap_bit_p (components, i)) + { + rtx reg = gen_rtx_REG (Pmode, i); + emit_insn (gen_frame_load (reg, sp_reg_rtx, offset)); + RTX_FRAME_RELATED_P (get_last_insn ()) = 1; + add_reg_note (get_last_insn (), REG_CFA_RESTORE, reg); + } + + offset += reg_size; + } + + /* Epilogue for LR. */ + if (bitmap_bit_p (components, 0)) + { + int offset = info->lr_save_offset; + if (info->push_p) + offset += info->total_size; + + rtx reg = gen_rtx_REG (Pmode, 0); + emit_insn (gen_frame_load (reg, sp_reg_rtx, offset)); + + rtx lr = gen_rtx_REG (Pmode, LR_REGNO); + emit_move_insn (lr, reg); + RTX_FRAME_RELATED_P (get_last_insn ()) = 1; + add_reg_note (get_last_insn (), REG_CFA_RESTORE, lr); + } +} + +/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */ +static void +rs6000_set_handled_components (sbitmap components) +{ + rs6000_stack_t *info = rs6000_stack_info (); + + for (int i = info->first_gp_reg_save; i < 32; i++) + if (bitmap_bit_p (components, i)) + cfun->machine->gpr_is_wrapped_separately[i] = true; + + if (bitmap_bit_p (components, 0)) + cfun->machine->lr_is_wrapped_separately = true; +} + /* Emit function prologue as insns. */ void @@ -26701,7 +26913,8 @@ rs6000_emit_prologue (void) } /* If we use the link register, get it into r0. */ - if (!WORLD_SAVE_P (info) && info->lr_save_p) + if (!WORLD_SAVE_P (info) && info->lr_save_p + && !cfun->machine->lr_is_wrapped_separately) { rtx addr, reg, mem; @@ -26929,13 +27142,16 @@ rs6000_emit_prologue (void) } else if (!WORLD_SAVE_P (info)) { - int i; - for (i = 0; i < 32 - info->first_gp_reg_save; i++) - if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) - emit_frame_save (frame_reg_rtx, reg_mode, - info->first_gp_reg_save + i, - info->gp_save_offset + frame_off + reg_size * i, - sp_off - frame_off); + int offset = info->gp_save_offset + frame_off; + for (int i = info->first_gp_reg_save; i < 32; i++) + { + if (rs6000_reg_live_or_pic_offset_p (i) + && !cfun->machine->gpr_is_wrapped_separately[i]) + emit_frame_save (frame_reg_rtx, reg_mode, i, offset, + sp_off - frame_off); + + offset += reg_size; + } } if (crtl->calls_eh_return) @@ -27740,6 +27956,9 @@ load_lr_save (int regno, rtx frame_reg_rtx, int offset) static void restore_saved_lr (int regno, bool exit_func) { + if (cfun->machine->lr_is_wrapped_separately) + return; + rtx reg = gen_rtx_REG (Pmode, regno); rtx lr = gen_rtx_REG (Pmode, LR_REGNO); rtx_insn *insn = emit_move_insn (lr, reg); @@ -28497,12 +28716,18 @@ rs6000_emit_epilogue (int sibcall) } else { - for (i = 0; i < 32 - info->first_gp_reg_save; i++) - if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) - emit_insn (gen_frame_load - (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i), - frame_reg_rtx, - info->gp_save_offset + frame_off + reg_size * i)); + int offset = info->gp_save_offset + frame_off; + for (i = info->first_gp_reg_save; i < 32; i++) + { + if (rs6000_reg_live_or_pic_offset_p (i) + && !cfun->machine->gpr_is_wrapped_separately[i]) + { + rtx reg = gen_rtx_REG (reg_mode, i); + emit_insn (gen_frame_load (reg, frame_reg_rtx, offset)); + } + + offset += reg_size; + } } if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) @@ -28541,8 +28766,10 @@ rs6000_emit_epilogue (int sibcall) || using_load_multiple || rs6000_reg_live_or_pic_offset_p (i)) { - rtx reg = gen_rtx_REG (reg_mode, i); + if (cfun->machine->gpr_is_wrapped_separately[i]) + continue; + rtx reg = gen_rtx_REG (reg_mode, i); cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); } } -- 1.9.3