* [spu-4_5-branch][Patch,committed] Various fixes to software icache
@ 2009-11-30 8:32 Revital1 Eres
0 siblings, 0 replies; only message in thread
From: Revital1 Eres @ 2009-11-30 8:32 UTC (permalink / raw)
To: gcc-patches; +Cc: Ulrich Weigand
[-- Attachment #1: Type: text/plain, Size: 271 bytes --]
Hello,
This patch contains various fixes to problems in the software i-cache
implementation
which Ulrich discovered and fixed while testing the software icache.
Tested on the SPU and committed to the branch.
Thanks,
Revital
(See attached file: fixes_30_11.txt)
[-- Attachment #2: fixes_30_11.txt --]
[-- Type: text/plain, Size: 18340 bytes --]
Index: ChangeLog.spu-4_5
===================================================================
--- ChangeLog.spu-4_5 (revision 154669)
+++ ChangeLog.spu-4_5 (working copy)
@@ -1,3 +1,31 @@
+2009-11-30 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
+ Revital Eres <eres@il.ibm.com>
+
+ * target.h (bb_partitioning_into_sections): Rename
+ estimate_section_overhead to fallthru_edge_overhead.
+ * target-def.h (TARGET_ESTIMATE_SECTION_OVERHEAD): Rename to...
+ (TARGET_FALLTHRU_EDGE_OVERHEAD): This.
+ * bb-reorder.c (start_new_section_for_loop): Add assert.
+ (get_estimate_section_overhead): Consider only basic-blocks with
+ fallthru edges.
+ (create_sections): Add cost of branch overhead when splitting
+ basic-blocks, change condition when to start a new section and
+ call get_estimate_section_overhead.
+ (instruction_size_exceeds_threshold): Call
+ get_estimate_section_overhead for each basic-block.
+ (gate_handle_partition_blocks_size): Remove DECL_ONE_ONLY
+ restriction and the call to get_estimate_section_overhead.
+ * config/spu/spu.c (spu_estimate_section_overhead): Rename to...
+ (spu_fallthru_edge_overhead): This.
+ (TARGET_ESTIMATE_SECTION_OVERHEAD): Rename to...
+ (TARGET_FALLTHRU_EDGE_OVERHEAD): This.
+ (get_stub_size): Do'nt add stub size for jump-tables.
+ (spu_dont_create_jumptable): Change table-size calculation.
+ (record_link_elements_liveness): Add missing REGNO.
+ (pass_partition_block): Remove TODO_dump_func from todo_flags_start.
+ * stmt.c (expand_case): Pass range instead of count to
+ dont_create_jumptable.
+
2009-11-17 Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
* bb-reorder.c (estimate_size_of_insns_in_bb): Skip debug insns.
Index: target.h
===================================================================
--- target.h (revision 154669)
+++ target.h (working copy)
@@ -805,10 +805,6 @@ struct gcc_target
/* Functions relating to basic-block partitioning into sections. */
struct bb_partitioning_into_sections
{
- /* Estimate the number of extra instructions that will be added for each
- section. Called when partitioning a function into sections. */
- unsigned HOST_WIDE_INT (* estimate_section_overhead) (void);
-
/* Return the size of instruction in bytes. Take into account the
size of extra machine depndent instructions that can be added as
a result of insn. (like branch-hints for branch instructions).
@@ -833,6 +829,11 @@ struct gcc_target
bool (* dont_create_jumptable) (unsigned int table_size);
/* Free the data structures needed for function partitioning. */
void (* fpart_finalize) (void);
+ /* Estimate the number of extra instructions (in bytes) that will
+ be added for a fallthru edge that might be converted into a branch
+ between two sections. Called when partitioning a function into
+ sections. */
+ unsigned HOST_WIDE_INT (* fallthru_edge_overhead) (void);
} bb_partitioning;
/* Create the __builtin_va_list type. */
Index: testsuite/gcc.target/spu/icache-1.c
===================================================================
--- testsuite/gcc.target/spu/icache-1.c (revision 154669)
+++ testsuite/gcc.target/spu/icache-1.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -msoftware-icache -fdump-rtl-fpart" } */
+/* { dg-options "-O2 -msoftware-icache" } */
#include <stdarg.h>
@@ -51,6 +51,4 @@ main (void)
return 0;
}
-/* { dg-final { scan-rtl-dump-times "icache_ptr_handler" 1 "fpart" } } */
-/* { dg-final { cleanup-rtl-dump "fpart" } } */
-
+ /* { dg-final { scan-assembler-times "icache_ptr_handler" 1 } } */
Index: testsuite/gcc.target/spu/icache-2.c
===================================================================
--- testsuite/gcc.target/spu/icache-2.c (revision 154669)
+++ testsuite/gcc.target/spu/icache-2.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -msoftware-icache -fdump-rtl-fpart" } */
+/* { dg-options "-O2 -msoftware-icache " } */
int i;
@@ -23,6 +23,5 @@ int main()
}
}
-/* { dg-final { scan-rtl-dump-times "icache_ptr_handler" 1 "fpart" } } */
-/* { dg-final { cleanup-rtl-dump "fpart" } } */
+ /* { dg-final { scan-assembler-times "icache_ptr_handler" 1 } } */
Index: testsuite/ChangeLog.spu-4_5
===================================================================
--- testsuite/ChangeLog.spu-4_5 (revision 154669)
+++ testsuite/ChangeLog.spu-4_5 (working copy)
@@ -1,3 +1,8 @@
+2009-11-30 Revital Eres <eres@il.ibm.com>
+
+ * gcc.target/spu/icache-1.c: Change dump check.
+ * gcc.target/spu/icache-2.c: Likewise.
+
2009-05-18 Revital Eres <eres@il.ibm.com>
* gcc.target/spu/icache-1.c: Change dump file check.
Index: target-def.h
===================================================================
--- target-def.h (revision 154669)
+++ target-def.h (working copy)
@@ -449,21 +449,21 @@
#define TARGET_SECTION_TYPE_FLAGS default_section_type_flags
#endif
-#define TARGET_ESTIMATE_SECTION_OVERHEAD 0
#define TARGET_ESTIMATE_INSTRUCTION_SIZE 0
#define TARGET_START_NEW_SECTION 0
#define TARGET_LEGAL_BREAKPOINT 0
#define TARGET_DONT_CREATE_JUMPTABLE 0
#define TARGET_FPART_FINALIZE 0
+#define TARGET_FALLTHRU_EDGE_OVERHEAD 0
#define TARGET_BB_PARTITIONING \
{ \
- TARGET_ESTIMATE_SECTION_OVERHEAD, \
TARGET_ESTIMATE_INSTRUCTION_SIZE, \
TARGET_START_NEW_SECTION, \
TARGET_LEGAL_BREAKPOINT, \
TARGET_DONT_CREATE_JUMPTABLE, \
- TARGET_FPART_FINALIZE \
+ TARGET_FPART_FINALIZE, \
+ TARGET_FALLTHRU_EDGE_OVERHEAD \
}
Index: bb-reorder.c
===================================================================
--- bb-reorder.c (revision 154669)
+++ bb-reorder.c (working copy)
@@ -1518,6 +1518,8 @@ start_new_section_for_loop (basic_block
if (last_section_size == 0)
return false;
+ gcc_assert (estimate_max_section_size != 0);
+
/* The loops are sorted in loops_info_list according to the loop size
(in bytes); where the loop with the samllest layout appears first.
*/
@@ -1543,6 +1545,62 @@ start_new_section_for_loop (basic_block
return false;
}
+/* The overhead (in bytes) of creating a new section includes adding
+ unconditional branch instruction between sections should also be
+ taken into account. */
+static void
+get_estimate_section_overhead (basic_block bb)
+{
+ edge e;
+ edge_iterator ei;
+
+ gcc_assert (flag_partition_functions_into_sections != 0);
+
+ estimate_section_overhead = 0;
+ estimate_max_section_size = 0;
+
+ if (uncond_jump_length == 0)
+ uncond_jump_length = get_uncond_jump_length ();
+
+ if (dump_file)
+ fprintf (dump_file, ";; In get_estimate_section_overhead\n");
+ /* If the basic-block does not have fallthru edge then no need
+ to add overhead for it. */
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (dump_file)
+ {
+ dump_edge_info (dump_file, e, 1);
+ fprintf (dump_file, "\n");
+ }
+ if ((e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_FALLTHRU))
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Found fallthru edge: ");
+ dump_edge_info (dump_file, e, 1);
+ fprintf (dump_file, "\n");
+ }
+
+ if (targetm.bb_partitioning.fallthru_edge_overhead != 0)
+ {
+ /* The machine depndent pass could add extra instructions
+ as a result of the new branches. */
+ estimate_section_overhead =
+ targetm.bb_partitioning.fallthru_edge_overhead ();
+ return;
+ }
+ else
+ {
+ estimate_section_overhead = uncond_jump_length;
+ return;
+ }
+ }
+ }
+ if (dump_file)
+ fprintf (dump_file, ";; Fallthru edge does not exist\n");
+}
+
/* Create sections for the current function. Return the edges that
cross between sections in CROSSING_EDGES array which is of size
N_CROSSING_EDGES so they could be fixed later.
@@ -1581,15 +1639,18 @@ create_sections (void)
unsigned HOST_WIDE_INT last_section_size = 0;
int current_section_id = 0;
unsigned HOST_WIDE_INT first_partition_actual_size;
-
+ int split_section_overhead = 0;
+
+ if (targetm.bb_partitioning.fallthru_edge_overhead != 0)
+ split_section_overhead = targetm.bb_partitioning.fallthru_edge_overhead ();
+ else
+ split_section_overhead = uncond_jump_length;
+
/* Mark which section each basic block belongs in. */
if (dump_file)
fprintf (dump_file,
- "\n\n--- partition functions into sections --- (%dB)\n\n"
- ";; section overhead size %dB\n" ";; max section size %dB\n\n",
- flag_partition_functions_into_sections,
- (int) estimate_section_overhead,
- (int) estimate_max_section_size);
+ "\n\n--- partition functions into sections --- (%dB)\n\n",
+ flag_partition_functions_into_sections);
FOR_EACH_BB (bb)
{
@@ -1597,17 +1658,24 @@ create_sections (void)
bool start_new_section_for_loop_p = false;
bool start_new_section_due_to_hotness_prop_p = false;
bool start_new_sction_md_p = false;
+ unsigned HOST_WIDE_INT first_partition_size = 0;
/* Validate that fbb_data array has a corresponding element for
this bb, so we could access it directly. */
validate_fbb_data_element (bb->index);
-
+ get_estimate_section_overhead (bb);
+ /* Update the maximum section size (in bytes). */
+ estimate_max_section_size =
+ flag_partition_functions_into_sections - estimate_section_overhead;
bb_size = estimate_size_of_insns_in_bb (bb);
if (dump_file)
fprintf (dump_file,
";; Trying to add bb %d (" HOST_WIDE_INT_PRINT_DEC
- "B) to section %d", bb->index, bb_size, current_section_id);
-
+ "B) to section %d \n;; section overhead size %dB\n"
+ ";; max section size %dB\n\n", bb->index, bb_size,
+ current_section_id, (int) estimate_section_overhead,
+ (int) estimate_max_section_size);
+
if (targetm.bb_partitioning.start_new_section != 0)
start_new_sction_md_p =
targetm.bb_partitioning.start_new_section (bb->index, bb_size,
@@ -1683,8 +1751,9 @@ create_sections (void)
3) The hotness property of this basic block is different then
the previous.
4) There is a machine-specific reasons. */
- if ((last_section_size != 0
- && bb_size <= estimate_max_section_size)
+ if ((last_section_size != 0
+ && bb_size <= estimate_max_section_size)
+ || last_section_size > estimate_max_section_size / 2
|| start_new_section_for_loop_p
|| start_new_section_due_to_hotness_prop_p
|| start_new_sction_md_p)
@@ -1699,12 +1768,16 @@ create_sections (void)
continue;
}
+ first_partition_size = flag_partition_functions_into_sections
+ - last_section_size - split_section_overhead;
+ if (dump_file)
+ fprintf (dump_file, ";; Split bb with first partition size: "
+ HOST_WIDE_INT_PRINT_DEC "\n", first_partition_size);
/* Split the basic-block. Try to insert it's first partition
to the last section such that the section size will not exceed
the section size threshold. */
new_bb =
- split_bb (bb,
- estimate_max_section_size - last_section_size,
+ split_bb (bb, first_partition_size,
&first_partition_actual_size);
if (new_bb != NULL)
@@ -3212,33 +3285,6 @@ struct rtl_opt_pass pass_partition_block
}
};
-/* The overhead (in bytes) of creating a new section includes adding
- unconditional branch instruction between sections should also be
- taken into account. */
-static void
-get_estimate_section_overhead (void)
-{
- gcc_assert (flag_partition_functions_into_sections != 0);
-
- estimate_section_overhead = 0;
- estimate_max_section_size = 0;
-
- if (uncond_jump_length == 0)
- uncond_jump_length = get_uncond_jump_length ();
-
- if (targetm.bb_partitioning.estimate_section_overhead != 0)
- {
- /* The machine depndent pass could add extra instructions
- as a result of the new branches. */
- estimate_section_overhead =
- targetm.bb_partitioning.estimate_section_overhead ();
- }
- /* Add the size of the new branch that will be created for each
- sections. */
- else
- estimate_section_overhead += uncond_jump_length;
-}
-
/* Return TRUE if an instruction exists such that it exceeds the threshold
of the section size. Otherwise return FALSE. */
static bool
@@ -3250,6 +3296,11 @@ instruction_size_exceeds_threshold (void
FOR_EACH_BB (bb)
{
+ /* Estimate the overhead in creating new section in term of the new
+ instruction that are needed to support it and need to be
+ considered. */
+ get_estimate_section_overhead (bb);
+
FOR_BB_INSNS (bb, insn)
{
if (NONDEBUG_INSN_P (insn) || NOTE_P (insn))
@@ -3277,15 +3328,9 @@ static bool
gate_handle_partition_blocks_size (void)
{
if ((flag_partition_functions_into_sections == 0)
- || DECL_ONE_ONLY (current_function_decl)
|| user_defined_section_attribute)
return 0;
- /* Estimate the overhead in creating new section in term of the new
- instruction that are needed to support it and need to be
- considered. */
- get_estimate_section_overhead ();
-
/* Make sure there is no instruction with size that exceeds the
estimated section size. */
return (!instruction_size_exceeds_threshold ());
@@ -3354,7 +3399,7 @@ struct rtl_opt_pass pass_partition_block
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
- TODO_dump_func, /* todo_flags_start */
+ 0, /* todo_flags_start */
TODO_dump_func, /* todo_flags_finish */
}
};
Index: config/spu/spu.c
===================================================================
--- config/spu/spu.c (revision 154669)
+++ config/spu/spu.c (working copy)
@@ -245,7 +245,7 @@ static section *spu_select_section (tree
static void spu_unique_section (tree, int);
static rtx spu_expand_load (rtx, rtx, rtx, int);
static void spu_trampoline_init (rtx, tree, rtx);
-static unsigned HOST_WIDE_INT spu_estimate_section_overhead (void);
+static unsigned HOST_WIDE_INT spu_fallthru_edge_overhead (void);
static unsigned HOST_WIDE_INT spu_estimate_instruction_size (rtx);
static bool begin_critical_section (rtx, enum critical_section_type *);
static bool end_critical_section (rtx, enum critical_section_type *);
@@ -498,8 +498,8 @@ static const struct attribute_spec spu_a
#undef TARGET_TRAMPOLINE_INIT
#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
-#undef TARGET_ESTIMATE_SECTION_OVERHEAD
-#define TARGET_ESTIMATE_SECTION_OVERHEAD spu_estimate_section_overhead
+#undef TARGET_FALLTHRU_EDGE_OVERHEAD
+#define TARGET_FALLTHRU_EDGE_OVERHEAD spu_fallthru_edge_overhead
#undef TARGET_ESTIMATE_INSTRUCTION_SIZE
#define TARGET_ESTIMATE_INSTRUCTION_SIZE spu_estimate_instruction_size
@@ -2267,9 +2267,9 @@ get_stub_size (rtx insn)
/* If the branch instruction and the branch target are in the
same basic-block they will probably be in the same section
as well. Do not add the stub size in this case. */
- if (!tablejump_p (insn, NULL, NULL)
- && JUMP_LABEL (insn)
- && (BLOCK_NUM (JUMP_LABEL (insn)) == BLOCK_NUM (insn)))
+ if (tablejump_p (insn, NULL, NULL)
+ || (JUMP_LABEL (insn)
+ && BLOCK_NUM (JUMP_LABEL (insn)) == BLOCK_NUM (insn)))
stub_size = 0;
/* For indirect branches including jump-tables (not including the
@@ -2435,14 +2435,14 @@ spu_estimate_instruction_size (rtx insn)
return size;
}
-/* Estimate the size in bytes of the extra instructions that will be
- generated for each section as a result of creating a new branch for
- that section. Called when partitioning a function into sections. */
+/* Estimate the number of extra instructions (in bytes) that will be added
+ for a fallthru edge that might be converted into a branch between
+ two sections. Called when partitioning a function into sections. */
static unsigned HOST_WIDE_INT
-spu_estimate_section_overhead (void)
+spu_fallthru_edge_overhead (void)
{
int extra_branch_insns = 0;
-
+
if (TARGET_BRANCH_HINTS && optimize != 0)
{
/* Add the nops and branch hint which are added for each branch.
@@ -3104,11 +3104,11 @@ spu_dont_create_jumptable (unsigned int
/* For the software icache scheme we should take into account the
inline check. */
if (TARGET_SOFTWARE_ICACHE)
- table_size += (12 * 4);
+ table_size += (TARGET_LARGE_MEM? 30 : 18) * 4;
if (flag_partition_functions_into_sections == 0)
return false;
-
+
if ((table_size) > (unsigned int)flag_partition_functions_into_sections)
return true;
return false;
@@ -3220,7 +3220,7 @@ record_link_elements_liveness (void)
else if (REG_P (dest)
&& REGNO (dest) == STACK_POINTER_REGNUM
&& GET_CODE (src) == PLUS
- && XEXP (src, 0) == STACK_POINTER_REGNUM
+ && REGNO (XEXP (src, 0)) == STACK_POINTER_REGNUM
&& (GET_CODE (XEXP (src, 1)) == CONST_INT)
&& (INTVAL (XEXP (src, 1)) > 0))
Index: stmt.c
===================================================================
--- stmt.c (revision 154669)
+++ stmt.c (working copy)
@@ -2392,7 +2392,8 @@ expand_case (gimple stmt)
as negative numbers. */
|| compare_tree_int (range, 0) < 0
|| ((targetm.bb_partitioning.dont_create_jumptable != 0)
- && targetm.bb_partitioning.dont_create_jumptable (count))
+ && targetm.bb_partitioning.dont_create_jumptable
+ (tree_low_cst (range, 0) + 1))
#ifndef ASM_OUTPUT_ADDR_DIFF_ELT
|| flag_pic
#endif
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2009-11-30 8:18 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-30 8:32 [spu-4_5-branch][Patch,committed] Various fixes to software icache Revital1 Eres
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).