public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [spu-4_5-branch][Patch,committed] Various fixes to software icache
@ 2009-11-30  8:32 Revital1 Eres
  0 siblings, 0 replies; only message in thread
From: Revital1 Eres @ 2009-11-30  8:32 UTC (permalink / raw)
  To: gcc-patches; +Cc: Ulrich Weigand

[-- Attachment #1: Type: text/plain, Size: 271 bytes --]


Hello,

This patch contains various fixes to problems in the software i-cache
implementation
which Ulrich discovered and fixed while testing the software icache.

Tested on the SPU and committed to the branch.

Thanks,
Revital


     (See attached file: fixes_30_11.txt)

[-- Attachment #2: fixes_30_11.txt --]
[-- Type: text/plain, Size: 18340 bytes --]

Index: ChangeLog.spu-4_5
===================================================================
--- ChangeLog.spu-4_5	(revision 154669)
+++ ChangeLog.spu-4_5	(working copy)
@@ -1,3 +1,31 @@
+2009-11-30  Ulrich Weigand  <Ulrich.Weigand@de.ibm.com>
+	    Revital Eres  <eres@il.ibm.com>
+
+	* target.h (bb_partitioning_into_sections): Rename
+	estimate_section_overhead to fallthru_edge_overhead.
+	* target-def.h (TARGET_ESTIMATE_SECTION_OVERHEAD): Rename to...
+	(TARGET_FALLTHRU_EDGE_OVERHEAD): This.
+	* bb-reorder.c  (start_new_section_for_loop): Add assert.
+	(get_estimate_section_overhead): Consider only basic-blocks with
+	fallthru edges.
+	(create_sections): Add cost of branch overhead when splitting
+	basic-blocks, change condition when to start a new section and
+	call get_estimate_section_overhead.
+	(instruction_size_exceeds_threshold): Call
+	get_estimate_section_overhead for each basic-block.
+	(gate_handle_partition_blocks_size): Remove DECL_ONE_ONLY
+	restriction and the call to get_estimate_section_overhead.
+	* config/spu/spu.c (spu_estimate_section_overhead): Rename to...
+	(spu_fallthru_edge_overhead): This.
+	(TARGET_ESTIMATE_SECTION_OVERHEAD): Rename to...
+	(TARGET_FALLTHRU_EDGE_OVERHEAD): This.
+	(get_stub_size): Do'nt add stub size for jump-tables.
+	(spu_dont_create_jumptable): Change table-size calculation.
+	(record_link_elements_liveness): Add missing REGNO.
+	(pass_partition_block): Remove TODO_dump_func from todo_flags_start. 
+	* stmt.c (expand_case): Pass range instead of count to
+	dont_create_jumptable.
+
 2009-11-17  Ulrich Weigand  <Ulrich.Weigand@de.ibm.com>
 
 	* bb-reorder.c (estimate_size_of_insns_in_bb): Skip debug insns.
Index: target.h
===================================================================
--- target.h	(revision 154669)
+++ target.h	(working copy)
@@ -805,10 +805,6 @@ struct gcc_target
   /* Functions relating to basic-block partitioning into sections.  */
   struct bb_partitioning_into_sections
   {
-    /* Estimate the number of extra instructions that will be added for each
-       section.  Called when partitioning a function into sections.  */
-    unsigned HOST_WIDE_INT (* estimate_section_overhead) (void);
-
     /* Return the size of instruction in bytes.  Take into account the
        size of extra machine depndent instructions that can be added as
        a result of insn. (like branch-hints for branch instructions).
@@ -833,6 +829,11 @@ struct gcc_target
     bool (* dont_create_jumptable) (unsigned int table_size);
     /* Free the data structures needed for function partitioning.  */
     void (* fpart_finalize) (void);
+    /* Estimate the number of extra instructions (in bytes) that will
+       be added for a fallthru edge that might be converted into a branch
+       between two sections.  Called when partitioning a function into
+       sections.  */
+    unsigned HOST_WIDE_INT (* fallthru_edge_overhead) (void);
   } bb_partitioning;
 
   /* Create the __builtin_va_list type.  */
Index: testsuite/gcc.target/spu/icache-1.c
===================================================================
--- testsuite/gcc.target/spu/icache-1.c	(revision 154669)
+++ testsuite/gcc.target/spu/icache-1.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -msoftware-icache -fdump-rtl-fpart" } */
+/* { dg-options "-O2 -msoftware-icache" } */
 
 #include <stdarg.h>
 
@@ -51,6 +51,4 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-rtl-dump-times "icache_ptr_handler" 1 "fpart" } } */
-/* { dg-final { cleanup-rtl-dump "fpart" } } */
-
+ /* { dg-final { scan-assembler-times "icache_ptr_handler" 1 } } */
Index: testsuite/gcc.target/spu/icache-2.c
===================================================================
--- testsuite/gcc.target/spu/icache-2.c	(revision 154669)
+++ testsuite/gcc.target/spu/icache-2.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -msoftware-icache -fdump-rtl-fpart" } */
+/* { dg-options "-O2 -msoftware-icache " } */
 
 int i;
 
@@ -23,6 +23,5 @@ int main()
   }
 }
 
-/* { dg-final { scan-rtl-dump-times "icache_ptr_handler" 1 "fpart" } } */
-/* { dg-final { cleanup-rtl-dump "fpart" } } */
+ /* { dg-final { scan-assembler-times "icache_ptr_handler" 1 } } */
 
Index: testsuite/ChangeLog.spu-4_5
===================================================================
--- testsuite/ChangeLog.spu-4_5	(revision 154669)
+++ testsuite/ChangeLog.spu-4_5	(working copy)
@@ -1,3 +1,8 @@
+2009-11-30  Revital Eres  <eres@il.ibm.com>
+
+	* gcc.target/spu/icache-1.c: Change dump check.
+	* gcc.target/spu/icache-2.c: Likewise.
+
 2009-05-18  Revital Eres  <eres@il.ibm.com>
 
 	* gcc.target/spu/icache-1.c: Change dump file check.
Index: target-def.h
===================================================================
--- target-def.h	(revision 154669)
+++ target-def.h	(working copy)
@@ -449,21 +449,21 @@
 #define TARGET_SECTION_TYPE_FLAGS default_section_type_flags
 #endif
 
-#define TARGET_ESTIMATE_SECTION_OVERHEAD 0
 #define TARGET_ESTIMATE_INSTRUCTION_SIZE 0
 #define TARGET_START_NEW_SECTION 0
 #define TARGET_LEGAL_BREAKPOINT 0
 #define TARGET_DONT_CREATE_JUMPTABLE 0
 #define TARGET_FPART_FINALIZE 0
+#define TARGET_FALLTHRU_EDGE_OVERHEAD 0
 
 #define TARGET_BB_PARTITIONING						\
   {									\
-     TARGET_ESTIMATE_SECTION_OVERHEAD,					\
      TARGET_ESTIMATE_INSTRUCTION_SIZE,					\
      TARGET_START_NEW_SECTION,						\
      TARGET_LEGAL_BREAKPOINT,						\
      TARGET_DONT_CREATE_JUMPTABLE,					\
-     TARGET_FPART_FINALIZE						\
+     TARGET_FPART_FINALIZE,						\
+     TARGET_FALLTHRU_EDGE_OVERHEAD					\
   }
 
 
Index: bb-reorder.c
===================================================================
--- bb-reorder.c	(revision 154669)
+++ bb-reorder.c	(working copy)
@@ -1518,6 +1518,8 @@ start_new_section_for_loop (basic_block 
   if (last_section_size == 0)
     return false;
 
+  gcc_assert (estimate_max_section_size != 0);
+
   /* The loops are sorted in loops_info_list according to the loop size
      (in bytes); where the loop with the samllest layout appears first.
    */
@@ -1543,6 +1545,62 @@ start_new_section_for_loop (basic_block 
   return false;
 }
 
+/* The overhead (in bytes) of creating a new section includes adding
+   unconditional branch instruction between sections should also be
+   taken into account.  */
+static void
+get_estimate_section_overhead (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+
+  gcc_assert (flag_partition_functions_into_sections != 0);
+
+  estimate_section_overhead = 0;
+  estimate_max_section_size = 0;
+
+  if (uncond_jump_length == 0)
+    uncond_jump_length = get_uncond_jump_length ();
+
+  if (dump_file)
+    fprintf (dump_file, ";; In get_estimate_section_overhead\n");
+  /* If the basic-block does not have fallthru edge then no need
+     to add overhead for it.  */
+  FOR_EACH_EDGE (e, ei, bb->succs)
+  {
+    if (dump_file)
+      {
+	dump_edge_info (dump_file, e, 1);
+	fprintf (dump_file, "\n");
+      }
+    if ((e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_FALLTHRU))
+      {
+	if (dump_file)
+	  {
+	    fprintf (dump_file, "Found fallthru edge: ");
+	    dump_edge_info (dump_file, e, 1);
+	    fprintf (dump_file, "\n");
+	  }
+
+	if (targetm.bb_partitioning.fallthru_edge_overhead != 0)
+	  {
+	    /* The machine depndent pass could add extra instructions
+	       as a result of the new branches.  */
+	    estimate_section_overhead =
+	      targetm.bb_partitioning.fallthru_edge_overhead ();
+	    return;
+	  }
+	else
+	  {
+	    estimate_section_overhead = uncond_jump_length;
+	    return;
+	  }
+      }
+  }
+  if (dump_file)
+    fprintf (dump_file, ";; Fallthru edge does not exist\n");
+}
+
 /* Create sections for the current function.  Return the edges that
    cross between sections in CROSSING_EDGES array which is of size
    N_CROSSING_EDGES so they could be fixed later.  
@@ -1581,15 +1639,18 @@ create_sections (void)
   unsigned HOST_WIDE_INT last_section_size = 0;
   int current_section_id = 0;
   unsigned HOST_WIDE_INT first_partition_actual_size;
-  
+  int split_section_overhead = 0;
+
+  if (targetm.bb_partitioning.fallthru_edge_overhead != 0)
+    split_section_overhead = targetm.bb_partitioning.fallthru_edge_overhead ();
+  else
+    split_section_overhead = uncond_jump_length;
+
   /* Mark which section each basic block belongs in.  */
   if (dump_file)
     fprintf (dump_file,
-	     "\n\n--- partition functions into sections --- (%dB)\n\n"
-	     ";; section overhead size %dB\n" ";; max section size %dB\n\n",
-	     flag_partition_functions_into_sections,
-	     (int) estimate_section_overhead,
-	     (int) estimate_max_section_size);
+	     "\n\n--- partition functions into sections --- (%dB)\n\n",
+	     flag_partition_functions_into_sections);
   
   FOR_EACH_BB (bb)
     {
@@ -1597,17 +1658,24 @@ create_sections (void)
       bool start_new_section_for_loop_p = false;
       bool start_new_section_due_to_hotness_prop_p = false;
       bool start_new_sction_md_p = false;
+      unsigned HOST_WIDE_INT first_partition_size = 0;
       
       /* Validate that fbb_data array has a corresponding element for
 	 this bb, so we could access it directly.  */
       validate_fbb_data_element (bb->index);
-      
+      get_estimate_section_overhead (bb);
+      /* Update the maximum section size (in bytes).  */
+      estimate_max_section_size =
+	flag_partition_functions_into_sections - estimate_section_overhead;
       bb_size = estimate_size_of_insns_in_bb (bb);
       if (dump_file)
 	fprintf (dump_file,
 		 ";; Trying to add bb %d (" HOST_WIDE_INT_PRINT_DEC
-		 "B) to section %d", bb->index, bb_size, current_section_id);
-     
+		 "B) to section %d \n;; section overhead size %dB\n"
+		 ";; max section size %dB\n\n", bb->index, bb_size, 
+		 current_section_id, (int) estimate_section_overhead,
+		 (int) estimate_max_section_size);
+      
       if (targetm.bb_partitioning.start_new_section != 0)
         start_new_sction_md_p =
           targetm.bb_partitioning.start_new_section (bb->index, bb_size,
@@ -1683,8 +1751,9 @@ create_sections (void)
 	     3) The hotness property of this basic block is different then
 	     the previous.  
              4) There is a machine-specific reasons.  */
-	  if ((last_section_size != 0
-	       && bb_size <= estimate_max_section_size)
+ 	  if ((last_section_size != 0
+ 	       && bb_size <= estimate_max_section_size)
+              || last_section_size > estimate_max_section_size / 2 
 	      || start_new_section_for_loop_p
 	      || start_new_section_due_to_hotness_prop_p
 	      || start_new_sction_md_p)
@@ -1699,12 +1768,16 @@ create_sections (void)
 	      continue;
 	    }
 
+	  first_partition_size = flag_partition_functions_into_sections
+	    - last_section_size - split_section_overhead;
+	  if (dump_file)
+	    fprintf (dump_file, ";; Split bb with first partition size: " 
+		     HOST_WIDE_INT_PRINT_DEC "\n", first_partition_size);
 	  /* Split the basic-block.  Try to insert it's first partition
 	     to the last section such that the section size will not exceed
 	     the section size threshold.  */
 	  new_bb =
-	    split_bb (bb,
-		      estimate_max_section_size - last_section_size,
+	    split_bb (bb, first_partition_size,
 		      &first_partition_actual_size);
 	  
 	  if (new_bb != NULL)
@@ -3212,33 +3285,6 @@ struct rtl_opt_pass pass_partition_block
  }
 };
 
-/* The overhead (in bytes) of creating a new section includes adding
-   unconditional branch instruction between sections should also be
-   taken into account.  */
-static void
-get_estimate_section_overhead (void)
-{
-  gcc_assert (flag_partition_functions_into_sections != 0);
-
-  estimate_section_overhead = 0;
-  estimate_max_section_size = 0;
-
-  if (uncond_jump_length == 0)
-    uncond_jump_length = get_uncond_jump_length ();
-
-  if (targetm.bb_partitioning.estimate_section_overhead != 0)
-    {
-      /* The machine depndent pass could add extra instructions
-         as a result of the new branches.  */
-      estimate_section_overhead =
-        targetm.bb_partitioning.estimate_section_overhead ();
-    }
-  /* Add the size of the new branch that will be created for each
-     sections.  */
-  else
-    estimate_section_overhead += uncond_jump_length;
-}
-
 /* Return TRUE if an instruction exists such that it exceeds the threshold
    of the section size.  Otherwise return FALSE.  */
 static bool
@@ -3250,6 +3296,11 @@ instruction_size_exceeds_threshold (void
 
   FOR_EACH_BB (bb)
     {
+      /* Estimate the overhead in creating new section in term of the new
+	 instruction that are needed to support it and need to be
+	 considered.  */
+      get_estimate_section_overhead (bb);
+      
       FOR_BB_INSNS (bb, insn)
 	{
 	  if (NONDEBUG_INSN_P (insn) || NOTE_P (insn))
@@ -3277,15 +3328,9 @@ static bool
 gate_handle_partition_blocks_size (void)
 {
   if ((flag_partition_functions_into_sections == 0)
-      || DECL_ONE_ONLY (current_function_decl)
       || user_defined_section_attribute)
     return 0;
  
-  /* Estimate the overhead in creating new section in term of the new
-     instruction that are needed to support it and need to be
-     considered.  */
-  get_estimate_section_overhead ();
-    
   /* Make sure there is no instruction with size that exceeds the
      estimated section size.  */
   return (!instruction_size_exceeds_threshold ());
@@ -3354,7 +3399,7 @@ struct rtl_opt_pass pass_partition_block
    0,                                    /* properties_required */
    0,                                    /* properties_provided */
    0,                                    /* properties_destroyed */
-   TODO_dump_func,                       /* todo_flags_start */
+   0,                                    /* todo_flags_start */
    TODO_dump_func,                       /* todo_flags_finish */
   }
 };
Index: config/spu/spu.c
===================================================================
--- config/spu/spu.c	(revision 154669)
+++ config/spu/spu.c	(working copy)
@@ -245,7 +245,7 @@ static section *spu_select_section (tree
 static void spu_unique_section (tree, int);
 static rtx spu_expand_load (rtx, rtx, rtx, int);
 static void spu_trampoline_init (rtx, tree, rtx);
-static unsigned HOST_WIDE_INT spu_estimate_section_overhead (void);
+static unsigned HOST_WIDE_INT spu_fallthru_edge_overhead (void);
 static unsigned HOST_WIDE_INT spu_estimate_instruction_size (rtx);
 static bool begin_critical_section (rtx, enum critical_section_type *);
 static bool end_critical_section (rtx, enum critical_section_type *);
@@ -498,8 +498,8 @@ static const struct attribute_spec spu_a
 #undef TARGET_TRAMPOLINE_INIT
 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
 
-#undef TARGET_ESTIMATE_SECTION_OVERHEAD
-#define TARGET_ESTIMATE_SECTION_OVERHEAD spu_estimate_section_overhead
+#undef TARGET_FALLTHRU_EDGE_OVERHEAD
+#define TARGET_FALLTHRU_EDGE_OVERHEAD spu_fallthru_edge_overhead
 
 #undef TARGET_ESTIMATE_INSTRUCTION_SIZE
 #define TARGET_ESTIMATE_INSTRUCTION_SIZE spu_estimate_instruction_size
@@ -2267,9 +2267,9 @@ get_stub_size (rtx insn)
       /* If the branch instruction and the branch target are in the
          same basic-block they will probably be in the same section
          as well.  Do not add the stub size in this case.  */
-      if (!tablejump_p (insn, NULL, NULL)
-	  && JUMP_LABEL (insn)
-	  && (BLOCK_NUM (JUMP_LABEL (insn)) == BLOCK_NUM (insn)))
+      if (tablejump_p (insn, NULL, NULL)
+	  || (JUMP_LABEL (insn)
+	      && BLOCK_NUM (JUMP_LABEL (insn)) == BLOCK_NUM (insn)))
 	stub_size = 0;
       
       /* For indirect branches including jump-tables (not including the
@@ -2435,14 +2435,14 @@ spu_estimate_instruction_size (rtx insn)
   return size;
 }
 
-/* Estimate the size in bytes of the extra instructions that will be
-   generated for each section as a result of creating a new branch for
-   that section.  Called when partitioning a function into sections.  */
+/* Estimate the number of extra instructions (in bytes) that will be added
+   for a fallthru edge that might be converted into a branch between
+   two sections.  Called when partitioning a function into sections.  */
 static unsigned HOST_WIDE_INT
-spu_estimate_section_overhead (void)
+spu_fallthru_edge_overhead (void)
 {
   int extra_branch_insns = 0;
-  
+
   if (TARGET_BRANCH_HINTS && optimize != 0)
     {
       /* Add the nops and branch hint which are added for each branch.
@@ -3104,11 +3104,11 @@ spu_dont_create_jumptable (unsigned int 
   /* For the software icache scheme we should take into account the
      inline check.  */
   if (TARGET_SOFTWARE_ICACHE)
-    table_size += (12 * 4);
+    table_size += (TARGET_LARGE_MEM? 30 : 18) * 4;
 
   if (flag_partition_functions_into_sections == 0)
     return false;
-  
+
   if ((table_size) > (unsigned int)flag_partition_functions_into_sections)
     return true;
   return false;
@@ -3220,7 +3220,7 @@ record_link_elements_liveness (void)
 	  else if (REG_P (dest)
 		   && REGNO (dest) == STACK_POINTER_REGNUM
 		   && GET_CODE (src) == PLUS
-		   && XEXP (src, 0) == STACK_POINTER_REGNUM
+		   && REGNO (XEXP (src, 0)) == STACK_POINTER_REGNUM
 		   && (GET_CODE (XEXP (src, 1)) == CONST_INT)
 		   && (INTVAL (XEXP (src, 1)) > 0))
 
Index: stmt.c
===================================================================
--- stmt.c	(revision 154669)
+++ stmt.c	(working copy)
@@ -2392,7 +2392,8 @@ expand_case (gimple stmt)
 		  as negative numbers.  */
 	       || compare_tree_int (range, 0) < 0
 	       || ((targetm.bb_partitioning.dont_create_jumptable != 0)
-	           && targetm.bb_partitioning.dont_create_jumptable (count))
+	           && targetm.bb_partitioning.dont_create_jumptable
+		   (tree_low_cst (range, 0) + 1))
 #ifndef ASM_OUTPUT_ADDR_DIFF_ELT
 	       || flag_pic
 #endif

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2009-11-30  8:18 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-30  8:32 [spu-4_5-branch][Patch,committed] Various fixes to software icache Revital1 Eres

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).