Hi, this patch puts static constructors, functions called only from the static constructors and the function main() into .text.startup subsection. Similarly the static desturctors and functions called only from the static destructors into the .text.exit subsection. Together with linker script update (attached at binutils side) this cause significant improvements in the startup time (40% of Mozilla's binary startup is paging in hundreds of static constructors evenly spred across the binary). I am not sure how to update gold - I basically copied existing code in binutils for .text.unlikely group in GNU LD linker script, but I think gold is doing independent decisions somewhere. Boostrapped/regtested x86_64 linux. I am not sure if darwin.h update is correct, I am just ugessing based on unlikely section definition. OK for the target bits? Honza * doc/tm.texi.in (STARTUP_EXECUTED_TEXT_SECTION_NAME, EXIT_EXECUTED_TEXT_SECTION_NAME): Document * config/i386/darwin.h (STARTUP_TEXT_SECTION_NAME, * EXIT_TEXT_SECTION_NAME): Define. * config/rs6000/darwin.h (STARTUP_TEXT_SECTION_NAME, * EXIT_TEXT_SECTION_NAME): Define. * config/ia64/hpux.h (STARTUP_TEXT_SECTION_NAME, EXIT_TEXT_SECTION_NAME): Define. * cgraph.c (dump_cgraph_node): Dump SAME_COMDAT_GROUP, ONLY_CALLED_AT_STARTUP and ONLY_CALLED_AT_EXIT. (cgraph_propagate_frequency): Compute only_called_at_startup and only_called_at_exit. * cgraph.h (struct cgraph_node): Add only_called_at_startup and only_called_at_exit fields. * defaults.h (STARTUP_TEXT_SECTION_NAME, EXIT_TEXT_SECTION_NAME): New. * lto-cgraph.c (lto_output_node): Add only_called_at_startup and only_called_at_exit. (input_overwrite_node): Likewise. * predict.c (compute_function_frequency): Set only_called_at_startup and only_called_at_exit. (choose_function_section): Set function sections. Index: doc/tm.texi =================================================================== *** doc/tm.texi (revision 165478) --- doc/tm.texi (working copy) *************** If defined, a C string constant for the *** 6873,6878 **** --- 6873,6888 ---- executed functions in the program. @end defmac + @defmac STARTUP_EXECUTED_TEXT_SECTION_NAME + If defined, a C string constant for the name of the section containing + functions executed at startup of the program. + @end defmac + + @defmac EXIT_EXECUTED_TEXT_SECTION_NAME + If defined, a C string constant for the name of the section containing + functions executed only when the program is doing exit. + @end defmac + @defmac DATA_SECTION_ASM_OP A C expression whose value is a string, including spacing, containing the assembler operation to identify the following data as writable initialized Index: doc/tm.texi.in =================================================================== *** doc/tm.texi.in (revision 165478) --- doc/tm.texi.in (working copy) *************** If defined, a C string constant for the *** 6869,6874 **** --- 6869,6884 ---- executed functions in the program. @end defmac + @defmac STARTUP_EXECUTED_TEXT_SECTION_NAME + If defined, a C string constant for the name of the section containing + functions executed at startup of the program. + @end defmac + + @defmac EXIT_EXECUTED_TEXT_SECTION_NAME + If defined, a C string constant for the name of the section containing + functions executed only when the program is doing exit. + @end defmac + @defmac DATA_SECTION_ASM_OP A C expression whose value is a string, including spacing, containing the assembler operation to identify the following data as writable initialized Index: cgraph.c =================================================================== *** cgraph.c (revision 165478) --- cgraph.c (working copy) *************** dump_cgraph_node (FILE *f, struct cgraph *** 1807,1812 **** --- 1807,1816 ---- fprintf (f, " (inline copy in %s/%i)", cgraph_node_name (node->global.inlined_to), node->global.inlined_to->uid); + if (node->same_comdat_group) + fprintf (f, " (same comdat group as %s/%i)", + cgraph_node_name (node->same_comdat_group), + node->same_comdat_group->uid); if (node->clone_of) fprintf (f, " (clone of %s/%i)", cgraph_node_name (node->clone_of), *************** dump_cgraph_node (FILE *f, struct cgraph *** 1867,1872 **** --- 1871,1880 ---- fprintf (f, " redefined_extern_inline"); if (TREE_ASM_WRITTEN (node->decl)) fprintf (f, " asm_written"); + if (node->only_called_at_startup) + fprintf (f, " only_called_at_startup"); + if (node->only_called_at_exit) + fprintf (f, " only_called_at_exit"); fprintf (f, "\n called by: "); for (edge = node->callers; edge; edge = edge->next_caller) *************** bool *** 2618,2637 **** cgraph_propagate_frequency (struct cgraph_node *node) { bool maybe_unlikely_executed = true, maybe_executed_once = true; struct cgraph_edge *edge; if (!node->local.local) return false; gcc_assert (node->analyzed); - if (node->frequency == NODE_FREQUENCY_HOT) - return false; - if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) - return false; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Processing frequency %s\n", cgraph_node_name (node)); for (edge = node->callers; ! edge && (maybe_unlikely_executed || maybe_executed_once); edge = edge->next_caller) { if (!edge->frequency) continue; switch (edge->caller->frequency) --- 2626,2657 ---- cgraph_propagate_frequency (struct cgraph_node *node) { bool maybe_unlikely_executed = true, maybe_executed_once = true; + bool only_called_at_startup = true; + bool only_called_at_exit = true; + bool changed = false; struct cgraph_edge *edge; + if (!node->local.local) return false; gcc_assert (node->analyzed); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Processing frequency %s\n", cgraph_node_name (node)); + for (edge = node->callers; ! edge && (maybe_unlikely_executed || maybe_executed_once ! || only_called_at_startup || only_called_at_exit); edge = edge->next_caller) { + if (edge->caller != node) + { + only_called_at_startup &= edge->caller->only_called_at_startup; + /* It makes snese to put main() together with the static constructors. + It will be executed for sure, but rest of functions called from + main are definitly not at startup only. */ + if (MAIN_NAME_P (DECL_NAME (edge->caller->decl))) + only_called_at_startup = 0; + only_called_at_exit &= edge->caller->only_called_at_exit; + } if (!edge->frequency) continue; switch (edge->caller->frequency) *************** cgraph_propagate_frequency (struct cgrap *** 2640,2646 **** break; case NODE_FREQUENCY_EXECUTED_ONCE: if (dump_file && (dump_flags & TDF_DETAILS)) ! fprintf (dump_file, " Called by %s that is executed once\n", cgraph_node_name (node)); maybe_unlikely_executed = false; if (edge->loop_nest) { --- 2660,2667 ---- break; case NODE_FREQUENCY_EXECUTED_ONCE: if (dump_file && (dump_flags & TDF_DETAILS)) ! fprintf (dump_file, " Called by %s that is executed once\n", ! cgraph_node_name (node)); maybe_unlikely_executed = false; if (edge->loop_nest) { *************** cgraph_propagate_frequency (struct cgrap *** 2652,2678 **** case NODE_FREQUENCY_HOT: case NODE_FREQUENCY_NORMAL: if (dump_file && (dump_flags & TDF_DETAILS)) ! fprintf (dump_file, " Called by %s that is normal or hot\n", cgraph_node_name (node)); maybe_unlikely_executed = false; maybe_executed_once = false; break; } } ! if (maybe_unlikely_executed) ! { ! node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; if (dump_file) ! fprintf (dump_file, "Node %s promoted to unlikely executed.\n", cgraph_node_name (node)); ! return true; ! } ! if (maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE) ! { ! node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; if (dump_file) ! fprintf (dump_file, "Node %s promoted to executed once.\n", cgraph_node_name (node)); ! return true; ! } ! return false; } /* Return true when NODE can not return or throw and thus --- 2673,2724 ---- case NODE_FREQUENCY_HOT: case NODE_FREQUENCY_NORMAL: if (dump_file && (dump_flags & TDF_DETAILS)) ! fprintf (dump_file, " Called by %s that is normal or hot\n", ! cgraph_node_name (node)); maybe_unlikely_executed = false; maybe_executed_once = false; break; } } ! if ((only_called_at_startup && !only_called_at_exit) ! && !node->only_called_at_startup) ! { ! node->only_called_at_startup = true; if (dump_file) ! fprintf (dump_file, "Node %s promoted to only called at startup.\n", ! cgraph_node_name (node)); ! changed = true; ! } ! if ((only_called_at_exit && !only_called_at_startup) ! && !node->only_called_at_exit) ! { ! node->only_called_at_exit = true; if (dump_file) ! fprintf (dump_file, "Node %s promoted to only called at exit.\n", ! cgraph_node_name (node)); ! changed = true; ! } ! /* These come either from profile or user hints; never update them. */ ! if (node->frequency == NODE_FREQUENCY_HOT ! || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) ! return changed; ! if (maybe_unlikely_executed) ! { ! node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; ! if (dump_file) ! fprintf (dump_file, "Node %s promoted to unlikely executed.\n", ! cgraph_node_name (node)); ! changed = true; ! } ! if (maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE) ! { ! node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; ! if (dump_file) ! fprintf (dump_file, "Node %s promoted to executed once.\n", ! cgraph_node_name (node)); ! changed = true; ! } ! return changed; } /* Return true when NODE can not return or throw and thus Index: cgraph.h =================================================================== *** cgraph.h (revision 165478) --- cgraph.h (working copy) *************** struct GTY((chain_next ("%h.next"), chai *** 301,306 **** --- 301,310 ---- /* How commonly executed the node is. Initialized during branch probabilities pass. */ ENUM_BITFIELD (node_frequency) frequency : 2; + /* True when function can only be called at startup (from static ctor). */ + unsigned only_called_at_startup : 1; + /* True when function can only be called at startup (from static dtor). */ + unsigned only_called_at_exit : 1; }; typedef struct cgraph_node *cgraph_node_ptr; Index: defaults.h =================================================================== *** defaults.h (revision 165478) --- defaults.h (working copy) *************** see the files COPYING3 and COPYING.RUNTI *** 939,944 **** --- 939,952 ---- #define TARGET_DEC_EVAL_METHOD 2 #endif + #ifndef STARTUP_TEXT_SECTION_NAME + #define STARTUP_TEXT_SECTION_NAME ".text.startup" + #endif + + #ifndef EXIT_TEXT_SECTION_NAME + #define EXIT_TEXT_SECTION_NAME ".text.exit" + #endif + #ifndef HOT_TEXT_SECTION_NAME #define HOT_TEXT_SECTION_NAME ".text.hot" #endif Index: lto-cgraph.c =================================================================== *** lto-cgraph.c (revision 165478) --- lto-cgraph.c (working copy) *************** lto_output_node (struct lto_simple_outpu *** 518,523 **** --- 518,525 ---- bp_pack_value (&bp, node->alias, 1); bp_pack_value (&bp, node->finalized_by_frontend, 1); bp_pack_value (&bp, node->frequency, 2); + bp_pack_value (&bp, node->only_called_at_startup, 1); + bp_pack_value (&bp, node->only_called_at_exit, 1); lto_output_bitpack (&bp); lto_output_uleb128_stream (ob->main_stream, node->resolution); *************** input_overwrite_node (struct lto_file_de *** 978,983 **** --- 980,987 ---- node->alias = bp_unpack_value (bp, 1); node->finalized_by_frontend = bp_unpack_value (bp, 1); node->frequency = (enum node_frequency)bp_unpack_value (bp, 2); + node->only_called_at_startup = bp_unpack_value (bp, 1); + node->only_called_at_exit = bp_unpack_value (bp, 1); node->resolution = resolution; } Index: predict.c =================================================================== *** predict.c (revision 165478) --- predict.c (working copy) *************** compute_function_frequency (void) *** 2187,2192 **** --- 2187,2197 ---- { basic_block bb; struct cgraph_node *node = cgraph_node (current_function_decl); + if (DECL_STATIC_CONSTRUCTOR (current_function_decl) + || MAIN_NAME_P (DECL_NAME (current_function_decl))) + node->only_called_at_startup = true; + if (DECL_STATIC_DESTRUCTOR (current_function_decl)) + node->only_called_at_exit = true; if (!profile_info || !flag_branch_probabilities) { *************** choose_function_section (void) *** 2233,2238 **** --- 2238,2250 ---- || DECL_ONE_ONLY (current_function_decl)) return; + if (node->only_called_at_startup) + DECL_SECTION_NAME (current_function_decl) = + build_string (strlen (STARTUP_TEXT_SECTION_NAME), STARTUP_TEXT_SECTION_NAME); + else if (node->only_called_at_exit) + DECL_SECTION_NAME (current_function_decl) = + build_string (strlen (STARTUP_TEXT_SECTION_NAME), EXIT_TEXT_SECTION_NAME); + /* If we are doing the partitioning optimization, let the optimization choose the correct section into which to put things. */ Index: config/i386/darwin.h =================================================================== *** config/i386/darwin.h (revision 165478) --- config/i386/darwin.h (working copy) *************** extern int darwin_emit_branch_islands; *** 198,203 **** --- 198,207 ---- #define HOT_TEXT_SECTION_NAME "__TEXT,__text,regular,pure_instructions" #define UNLIKELY_EXECUTED_TEXT_SECTION_NAME \ "__TEXT,__unlikely,regular,pure_instructions" + #define STARTUP_TEXT_SECTION_NAME \ + "__TEXT,__startup,regular,pure_instructions" + #define EXIT_TEXT_SECTION_NAME \ + "__TEXT,__exit,regular,pure_instructions" /* Assembler pseudos to introduce constants of various size. */ Index: config/ia64/hpux.h =================================================================== *** config/ia64/hpux.h (revision 165478) --- config/ia64/hpux.h (working copy) *************** do { \ *** 226,228 **** --- 226,234 ---- #undef HOT_TEXT_SECTION_NAME #define HOT_TEXT_SECTION_NAME ".text" + + #undef STARTUP_TEXT_SECTION_NAME + #define STARTUP_TEXT_SECTION_NAME ".text" + + #undef EXIT_TEXT_SECTION_NAME + #define EXIT_TEXT_SECTION_NAME ".text" Index: config/rs6000/darwin.h =================================================================== *** config/rs6000/darwin.h (revision 165478) --- config/rs6000/darwin.h (working copy) *************** extern int darwin_emit_branch_islands; *** 188,193 **** --- 188,195 ---- #define HOT_TEXT_SECTION_NAME "__TEXT,__text,regular,pure_instructions" #define UNLIKELY_EXECUTED_TEXT_SECTION_NAME \ "__TEXT,__unlikely,regular,pure_instructions" + #define STARTUP_TEXT_SECTION_NAME "__TEXT,__startup,regular,pure_instructions" + #define EXIT_TEXT_SECTION_NAME "__TEXT,__exit,regular,pure_instructions" /* Define cutoff for using external functions to save floating point. Currently on Darwin, always use inline stores. */