public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Separate jump bypassing pass
@ 2003-01-15 19:03 Roger Sayle
  2003-01-15 21:15 ` Richard Henderson
                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Roger Sayle @ 2003-01-15 19:03 UTC (permalink / raw)
  To: gcc-patches



The following patch splits our current GCSE, so that the optimizations
that significantly modify a function's control flow graph are performed
after GCC's loop optimizations.

The motivation for this patch and SPECcpu2000 timings were presented
in http://gcc.gnu.org/ml/gcc/2002-09/msg01129.html back in September.
The patch below is "New Bypass" as described in that post.  I had
originally intended to post it for review shortly afterwards, but
Mark Mitchell requested that we concentrate fixing bugs in 3.2.1
and the 3.3 branch before I got around to it.
http://gcc.gnu.org/ml/gcc/2002-09/msg01157.html

This patch provides significant performance advantages on
SPECfp2000, but primarily it allows more aggressive forms of
GCSE without adversely affecting performance by confusing GCC's
loop optimizations with irreducible CFGs or misplaced loop notes.


The following patch has been tested on i686-pc-linux-gnu by a full
bootstrap, all languages except Ada and treelang, and a complete
"make -k check" with no new regressions.

Ok for mainline?


2003-01-15  Roger Sayle  <roger@eyesopen.com>

	* gcse.c (one_cprop_pass): Change function arguments to take both
	cprop_jumps and bypass_jumps flags instead of just alter_jumps.
	(gcse_main): Update calls to one_cprop_pass, disabling bypassing.
	(bypass_jumps): New function to perform separate jump bypassing pass.
	* rtl.h (bypass_jumps): Add function prototype.
	* timevar.def (TV_BYPASS): New timing variable.
	* toplev.c (enum dump_file_index): Add new entry DFI_bypass.
	(dump_file): New entry for the bypass RTL dump file.
	(rest_of_compilation): Insert new jump bypassing optimization
	pass after loop.
	* doc/passes.texi: Document new pass.


Index: gcse.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/gcse.c,v
retrieving revision 1.225
diff -c -3 -p -r1.225 gcse.c
*** gcse.c	7 Jan 2003 22:14:42 -0000	1.225
--- gcse.c	15 Jan 2003 05:18:55 -0000
***************
*** 1,6 ****
  /* Global common subexpression elimination/Partial redundancy elimination
     and global constant/copy propagation for GNU compiler.
!    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002
     Free Software Foundation, Inc.

  This file is part of GCC.
--- 1,6 ----
  /* Global common subexpression elimination/Partial redundancy elimination
     and global constant/copy propagation for GNU compiler.
!    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
     Free Software Foundation, Inc.

  This file is part of GCC.
*************** static int load_killed_in_block_p    PAR
*** 614,620 ****
  static void canon_list_insert        PARAMS ((rtx, rtx, void *));
  static int cprop_insn		PARAMS ((rtx, int));
  static int cprop		PARAMS ((int));
! static int one_cprop_pass	PARAMS ((int, int));
  static bool constprop_register	PARAMS ((rtx, rtx, rtx, int));
  static struct expr *find_bypass_set PARAMS ((int, int));
  static int bypass_block		    PARAMS ((basic_block, rtx, rtx));
--- 614,620 ----
  static void canon_list_insert        PARAMS ((rtx, rtx, void *));
  static int cprop_insn		PARAMS ((rtx, int));
  static int cprop		PARAMS ((int));
! static int one_cprop_pass	PARAMS ((int, int, int));
  static bool constprop_register	PARAMS ((rtx, rtx, rtx, int));
  static struct expr *find_bypass_set PARAMS ((int, int));
  static int bypass_block		    PARAMS ((basic_block, rtx, rtx));
*************** gcse_main (f, file)
*** 819,825 ****

        /* Don't allow constant propagation to modify jumps
  	 during this pass.  */
!       changed = one_cprop_pass (pass + 1, 0);

        if (optimize_size)
  	changed |= one_classic_gcse_pass (pass + 1);
--- 819,825 ----

        /* Don't allow constant propagation to modify jumps
  	 during this pass.  */
!       changed = one_cprop_pass (pass + 1, 0, 0);

        if (optimize_size)
  	changed |= one_classic_gcse_pass (pass + 1);
*************** gcse_main (f, file)
*** 886,892 ****
    max_gcse_regno = max_reg_num ();
    alloc_gcse_mem (f);
    /* This time, go ahead and allow cprop to alter jumps.  */
!   one_cprop_pass (pass + 1, 1);
    free_gcse_mem ();

    if (file)
--- 886,892 ----
    max_gcse_regno = max_reg_num ();
    alloc_gcse_mem (f);
    /* This time, go ahead and allow cprop to alter jumps.  */
!   one_cprop_pass (pass + 1, 1, 0);
    free_gcse_mem ();

    if (file)
*************** cprop (alter_jumps)
*** 4463,4482 ****
  }

  /* Perform one copy/constant propagation pass.
!    F is the first insn in the function.
!    PASS is the pass count.  */

  static int
! one_cprop_pass (pass, alter_jumps)
       int pass;
!      int alter_jumps;
  {
    int changed = 0;

    const_prop_count = 0;
    copy_prop_count = 0;

!   local_cprop_pass (alter_jumps);

    alloc_hash_table (max_cuid, &set_hash_table, 1);
    compute_hash_table (&set_hash_table);
--- 4463,4484 ----
  }

  /* Perform one copy/constant propagation pass.
!    PASS is the pass count.  If CPROP_JUMPS is true, perform constant
!    propagation into conditional jumps.  If BYPASS_JUMPS is true,
!    perform conditional jump bypassing optimizations.  */

  static int
! one_cprop_pass (pass, cprop_jumps, bypass_jumps)
       int pass;
!      int cprop_jumps;
!      int bypass_jumps;
  {
    int changed = 0;

    const_prop_count = 0;
    copy_prop_count = 0;

!   local_cprop_pass (cprop_jumps);

    alloc_hash_table (max_cuid, &set_hash_table, 1);
    compute_hash_table (&set_hash_table);
*************** one_cprop_pass (pass, alter_jumps)
*** 4486,4493 ****
      {
        alloc_cprop_mem (last_basic_block, set_hash_table.n_elems);
        compute_cprop_data ();
!       changed = cprop (alter_jumps);
!       if (alter_jumps)
  	changed |= bypass_conditional_jumps ();
        free_cprop_mem ();
      }
--- 4488,4495 ----
      {
        alloc_cprop_mem (last_basic_block, set_hash_table.n_elems);
        compute_cprop_data ();
!       changed = cprop (cprop_jumps);
!       if (bypass_jumps)
  	changed |= bypass_conditional_jumps ();
        free_cprop_mem ();
      }
*************** store_motion ()
*** 7346,7351 ****
--- 7348,7458 ----
    free_edge_list (edge_list);
    remove_fake_edges ();
    end_alias_analysis ();
+ }
+
+
+ /* Entry point for jump bypassing optimization pass.  */
+
+ int
+ bypass_jumps (file)
+      FILE *file;
+ {
+   int changed;
+
+   /* We do not construct an accurate cfg in functions which call
+      setjmp, so just punt to be safe.  */
+   if (current_function_calls_setjmp)
+     return 0;
+
+   /* For calling dump_foo fns from gdb.  */
+   debug_stderr = stderr;
+   gcse_file = file;
+
+   /* Identify the basic block information for this function, including
+      successors and predecessors.  */
+   max_gcse_regno = max_reg_num ();
+
+   if (file)
+     dump_flow_info (file);
+
+   /* Return if there's nothing to do.  */
+   if (n_basic_blocks <= 1)
+     return 0;
+
+   /* Trying to perform global optimizations on flow graphs which have
+      a high connectivity will take a long time and is unlikely to be
+      particularly useful.
+
+      In normal circumstances a cfg should have about twice as many edges
+      as blocks.  But we do not want to punish small functions which have
+      a couple switch statements.  So we require a relatively large number
+      of basic blocks and the ratio of edges to blocks to be high.  */
+   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
+     {
+       if (warn_disabled_optimization)
+         warning ("BYPASS disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
+                  n_basic_blocks, n_edges / n_basic_blocks);
+       return 0;
+     }
+
+   /* If allocating memory for the cprop bitmap would take up too much
+      storage it's better just to disable the optimization.  */
+   if ((n_basic_blocks
+        * SBITMAP_SET_SIZE (max_gcse_regno)
+        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
+     {
+       if (warn_disabled_optimization)
+         warning ("GCSE disabled: %d basic blocks and %d registers",
+                  n_basic_blocks, max_gcse_regno);
+
+       return 0;
+     }
+
+   /* See what modes support reg/reg copy operations.  */
+   if (! can_copy_init_p)
+     {
+       compute_can_copy ();
+       can_copy_init_p = 1;
+     }
+
+   gcc_obstack_init (&gcse_obstack);
+   bytes_used = 0;
+
+   /* We need alias.  */
+   init_alias_analysis ();
+
+   /* Record where pseudo-registers are set.  This data is kept accurate
+      during each pass.  ??? We could also record hard-reg information here
+      [since it's unchanging], however it is currently done during hash table
+      computation.
+
+      It may be tempting to compute MEM set information here too, but MEM sets
+      will be subject to code motion one day and thus we need to compute
+      information about memory sets when we build the hash tables.  */
+
+   alloc_reg_set_mem (max_gcse_regno);
+   compute_sets (get_insns ());
+
+   max_gcse_regno = max_reg_num ();
+   alloc_gcse_mem (get_insns ());
+   changed = one_cprop_pass (1, 1, 1);
+   free_gcse_mem ();
+
+   if (file)
+     {
+       fprintf (file, "BYPASS of %s: %d basic blocks, ",
+ 	       current_function_name, n_basic_blocks);
+       fprintf (file, "%d bytes\n\n", bytes_used);
+     }
+
+   obstack_free (&gcse_obstack, NULL);
+   free_reg_set_mem ();
+
+   /* We are finished with alias.  */
+   end_alias_analysis ();
+   allocate_reg_info (max_reg_num (), FALSE, FALSE);
+
+   return changed;
  }

  #include "gt-gcse.h"
Index: rtl.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/rtl.h,v
retrieving revision 1.379
diff -c -3 -p -r1.379 rtl.h
*** rtl.h	8 Jan 2003 11:20:18 -0000	1.379
--- rtl.h	15 Jan 2003 05:18:56 -0000
***************
*** 1,6 ****
  /* Register Transfer Language (RTL) definitions for GNU C-Compiler
     Copyright (C) 1987, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
!    1999, 2000, 2001, 2002 Free Software Foundation, Inc.

  This file is part of GCC.

--- 1,6 ----
  /* Register Transfer Language (RTL) definitions for GNU C-Compiler
     Copyright (C) 1987, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
!    1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.

  This file is part of GCC.

*************** extern rtx expand_mult_highpart		PARAMS
*** 2086,2091 ****
--- 2086,2092 ----
  /* In gcse.c */
  #ifdef BUFSIZ
  extern int gcse_main			PARAMS ((rtx, FILE *));
+ extern int bypass_jumps			PARAMS ((FILE *));
  #endif

  /* In global.c */
Index: timevar.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/timevar.def,v
retrieving revision 1.14
diff -c -3 -p -r1.14 timevar.def
*** timevar.def	1 Jun 2002 21:31:35 -0000	1.14
--- timevar.def	15 Jan 2003 05:18:56 -0000
***************
*** 1,6 ****
  /* This file contains the definitions for timing variables used to
     measure run-time performance of the compiler.
!    Copyright (C) 2000 Free Software Foundation, Inc.
     Contributed by Alex Samuel <samuel@codesourcery.com>

     This file is part of GCC.
--- 1,6 ----
  /* This file contains the definitions for timing variables used to
     measure run-time performance of the compiler.
!    Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
     Contributed by Alex Samuel <samuel@codesourcery.com>

     This file is part of GCC.
*************** DEFTIMEVAR (TV_JUMP                  , "
*** 58,63 ****
--- 58,64 ----
  DEFTIMEVAR (TV_CSE                   , "CSE")
  DEFTIMEVAR (TV_GCSE                  , "global CSE")
  DEFTIMEVAR (TV_LOOP                  , "loop analysis")
+ DEFTIMEVAR (TV_BYPASS                , "bypass jumps")
  DEFTIMEVAR (TV_TRACER                , "tracer")
  DEFTIMEVAR (TV_CSE2                  , "CSE 2")
  DEFTIMEVAR (TV_BRANCH_PROB           , "branch prediction")
Index: toplev.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/toplev.c,v
retrieving revision 1.695
diff -c -3 -p -r1.695 toplev.c
*** toplev.c	11 Jan 2003 00:45:47 -0000	1.695
--- toplev.c	15 Jan 2003 05:18:58 -0000
*************** enum dump_file_index
*** 232,237 ****
--- 232,238 ----
    DFI_addressof,
    DFI_gcse,
    DFI_loop,
+   DFI_bypass,
    DFI_cfg,
    DFI_bp,
    DFI_ce1,
*************** static struct dump_file_info dump_file[D
*** 281,286 ****
--- 282,288 ----
    { "addressof", 'F', 0, 0, 0 },
    { "gcse",	'G', 1, 0, 0 },
    { "loop",	'L', 1, 0, 0 },
+   { "bypass",   'G', 1, 0, 0 }, /* Yes, duplicate enable switch.  */
    { "cfg",	'f', 1, 0, 0 },
    { "bp",	'b', 1, 0, 0 },
    { "ce1",	'C', 1, 0, 0 },
*************** rest_of_compilation (decl)
*** 2962,2967 ****
--- 2964,2995 ----
        find_basic_blocks (insns, max_reg_num (), rtl_dump_file);

        ggc_collect ();
+     }
+
+   /* Perform jump bypassing and control flow optimizations.  */
+   if (optimize > 0 && flag_gcse)
+     {
+       timevar_push (TV_BYPASS);
+       open_dump_file (DFI_bypass, decl);
+
+       cleanup_cfg (CLEANUP_EXPENSIVE);
+       tem = bypass_jumps (rtl_dump_file);
+
+       if (tem)
+         {
+           rebuild_jump_labels (insns);
+           cleanup_cfg (CLEANUP_EXPENSIVE);
+           delete_trivially_dead_insns (insns, max_reg_num ());
+         }
+
+       close_dump_file (DFI_bypass, print_rtl_with_bb, insns);
+       timevar_pop (TV_BYPASS);
+
+       ggc_collect ();
+
+ #ifdef ENABLE_CHECKING
+       verify_flow_info ();
+ #endif
      }

    /* Do control and data flow analysis; wrote some of the results to
Index: doc/passes.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/passes.texi,v
retrieving revision 1.14
diff -c -3 -p -r1.14 passes.texi
*** doc/passes.texi	10 Jan 2003 02:22:21 -0000	1.14
--- doc/passes.texi	15 Jan 2003 05:18:58 -0000
***************
*** 1,5 ****
! @c Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2002,
! @c 1999, 2000, 2001 Free Software Foundation, Inc.
  @c This is part of the GCC manual.
  @c For copying conditions, see the file gcc.texi.

--- 1,5 ----
! @c Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
! @c 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
  @c This is part of the GCC manual.
  @c For copying conditions, see the file gcc.texi.

*************** Loop dependency analysis routines are co
*** 337,342 ****
--- 337,355 ----
  The option @option{-dL} causes a debugging dump of the RTL code after
  this pass.  This dump file's name is made by appending @samp{.loop} to
  the input file name.
+
+ @cindex jump bypassing
+ @item
+ Jump bypassing.  This pass is an aggressive form of GCSE that transforms
+ the control flow graph of a function by propagating constants into
+ conditional branch instructions.
+
+ The source file for this pass is @file{gcse.c}.
+
+ @opindex dG
+ The option @option{-dG} causes a debugging dump of the RTL code after
+ this pass.  This dump file's name is made by appending @samp{.bypass}
+ to the input file name.

  @item
  @opindex frerun-cse-after-loop

Roger
--
Roger Sayle,                         E-mail: roger@eyesopen.com
OpenEye Scientific Software,         WWW: http://www.eyesopen.com/
Suite 1107, 3600 Cerrillos Road,     Tel: (+1) 505-473-7385
Santa Fe, New Mexico, 87507.         Fax: (+1) 505-473-0833

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Separate jump bypassing pass
  2003-01-15 19:03 [PATCH] Separate jump bypassing pass Roger Sayle
@ 2003-01-15 21:15 ` Richard Henderson
  2003-01-16 22:27 ` Kazu Hirata
  2003-02-09 14:52 ` [PATCH] Separate jump bypassing pass Jan Hubicka
  2 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2003-01-15 21:15 UTC (permalink / raw)
  To: Roger Sayle; +Cc: gcc-patches

On Wed, Jan 15, 2003 at 11:43:17AM -0700, Roger Sayle wrote:
> 	* gcse.c (one_cprop_pass): Change function arguments to take both
> 	cprop_jumps and bypass_jumps flags instead of just alter_jumps.
> 	(gcse_main): Update calls to one_cprop_pass, disabling bypassing.
> 	(bypass_jumps): New function to perform separate jump bypassing pass.
> 	* rtl.h (bypass_jumps): Add function prototype.
> 	* timevar.def (TV_BYPASS): New timing variable.
> 	* toplev.c (enum dump_file_index): Add new entry DFI_bypass.
> 	(dump_file): New entry for the bypass RTL dump file.
> 	(rest_of_compilation): Insert new jump bypassing optimization
> 	pass after loop.
> 	* doc/passes.texi: Document new pass.

Ok.


r~

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Separate jump bypassing pass
  2003-01-15 19:03 [PATCH] Separate jump bypassing pass Roger Sayle
  2003-01-15 21:15 ` Richard Henderson
@ 2003-01-16 22:27 ` Kazu Hirata
  2003-01-17  2:44   ` Roger Sayle
  2003-02-09 14:52 ` [PATCH] Separate jump bypassing pass Jan Hubicka
  2 siblings, 1 reply; 19+ messages in thread
From: Kazu Hirata @ 2003-01-16 22:27 UTC (permalink / raw)
  To: roger; +Cc: gcc-patches

Hi Roger,

> 2003-01-15  Roger Sayle  <roger@eyesopen.com>
> 
> 	* gcse.c (one_cprop_pass): Change function arguments to take both
> 	cprop_jumps and bypass_jumps flags instead of just alter_jumps.
> 	(gcse_main): Update calls to one_cprop_pass, disabling bypassing.
> 	(bypass_jumps): New function to perform separate jump bypassing pass.
> 	* rtl.h (bypass_jumps): Add function prototype.
> 	* timevar.def (TV_BYPASS): New timing variable.
> 	* toplev.c (enum dump_file_index): Add new entry DFI_bypass.
> 	(dump_file): New entry for the bypass RTL dump file.
> 	(rest_of_compilation): Insert new jump bypassing optimization
> 	pass after loop.
> 	* doc/passes.texi: Document new pass.

With this patch, gcc seems to cause ICE when compiling
execute/980526-3.c for H8/300.  Here is a minimized version of
980526-3.c that still fails.

/* Use h8300-hms-gcc -mh -O2 -S 980526-3.c  */

int
bar (y)
     int y;
{
  return y;
}

int
foo ()
{
  int j = 2;

  if (bar (1 % j))
    return 1;
  else
    return 0;
}

I get

980526-3.c: In function `foo':
980526-3.c:17: internal compiler error: in simplify_subreg, at simplify-rtx.c:2248

Is there anyway you could look into this?  When I replace the
definition of bar() with just a prototype for bar(), I don't get an
error any more.  I'll also try to analyze what's going on.

Thanks,

Kazu Hirata

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Separate jump bypassing pass
  2003-01-16 22:27 ` Kazu Hirata
@ 2003-01-17  2:44   ` Roger Sayle
  2003-01-17  3:30     ` Richard Henderson
  0 siblings, 1 reply; 19+ messages in thread
From: Roger Sayle @ 2003-01-17  2:44 UTC (permalink / raw)
  To: Kazu Hirata, John David Anglin; +Cc: gcc-patches, Richard Henderson


Hi Kazu and David,
On Thu, 16 Jan 2003, Kazu Hirata wrote:
> > 2003-01-15  Roger Sayle  <roger@eyesopen.com>
> >
> > 	* gcse.c (one_cprop_pass): Change function arguments to take both
> > 	cprop_jumps and bypass_jumps flags instead of just alter_jumps.
> >     ...
>
> With this patch, gcc seems to cause ICE when compiling
> execute/980526-3.c for H8/300.
> I get
>
> 980526-3.c: In function `foo':
> 980526-3.c:17: internal compiler error: in simplify_subreg,
> at simplify-rtx.c:2248


It looks like that by splitting GCSE and moving jump bypassing
after the loop optimization passes I've fallen foul of some
RTL sharing issues.

In Kazu's example above, 980526-3.c, the RTL prior the the
jump bypassing pass contains:

(insn 13 11 14 0x40018a7c (set (reg:HI 0 r0)
        (subreg:HI (reg:SI 22) 2)) 7 {*h8300.md:266} (nil)
    (expr_list:REG_EQUAL (const_int 1 [0x1])
        (insn_list:REG_LIBCALL 15 (nil))))

(call_insn/u 14 13 15 0x40018a7c (set (reg:HI 0 r0)
        (call (mem:QI (symbol_ref:SI ("bar")) [0 S1 A8])
            (const_int 0 [0x0]))) 80 {call_value} (nil)
    (expr_list:REG_EH_REGION (const_int -1 [0xffffffff])
        (nil))
    (expr_list (use (reg:HI 0 r0))
        (nil)))

(insn 15 14 17 0x40018a7c (set (reg:HI 23)
        (reg:HI 0 r0)) 7 {*h8300.md:266} (nil)
    (insn_list:REG_RETVAL 13 (expr_list:REG_EQUAL (expr_list (symbol_ref:SI ("bar"))
                (expr_list (subreg:HI (reg:SI 22) 2)
                    (nil)))
            (nil))))

In do_local_cprop, we then try substituting "(reg:SI 22)" in insn 13
with "(const_int 1 [0x1])".  The problem is that the RTL expression:
"(subreg:HI (reg:SI 22) 2)" is shared between the SET_SRC in insn 13
and the REG_EQUAL note in insn 15.  So although simplify_subreg can
handle the "(subreg:HI (const_int 1 [0x1]) 2)" in insn 13 because it
knows the mode was formerly SI, by the time "adjust_libcall_notes"
reaches insn 15, subsistution has already been made and the intended
mode of the const_int can't be recovered.


I'm note sure if this sharing of RTL is legal (valid), so I'd like
advice from someone that understands RTL sharing, RTH or Jan?


It looks like David Anglin has also run into the same (or very
similar) problem bootstrapping hppa64-hp-hpux11.11.
http://gcc.gnu.org/ml/gcc-patches/2003-01/msg01191.html

Given my ignorance of RTL sharing and a reported bootstrap failure,
we should start the clock ticking and revert the patch if we're no
closer to a solution in a day or so.


Is the above RTL sharing allowed?
If so, do I need to make GCSE avoid cprop in libcalls?
If not, what's the best way to avoid this RTL sharing.
Where is the shared RTL generated, or should I just call
"unshare_all_rtl"?

Many thanks in advance,

Roger
--

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Separate jump bypassing pass
  2003-01-17  2:44   ` Roger Sayle
@ 2003-01-17  3:30     ` Richard Henderson
  2003-01-17 14:24       ` [PATCH] Fix RTL sharing problem in CSE Roger Sayle
  0 siblings, 1 reply; 19+ messages in thread
From: Richard Henderson @ 2003-01-17  3:30 UTC (permalink / raw)
  To: Roger Sayle; +Cc: Kazu Hirata, John David Anglin, gcc-patches

On Thu, Jan 16, 2003 at 07:24:22PM -0700, Roger Sayle wrote:
> Is the above RTL sharing allowed?

I don't think so.

> If not, what's the best way to avoid this RTL sharing.
> Where is the shared RTL generated, or should I just call
> "unshare_all_rtl"?

Where it's generated.


r~

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] Fix RTL sharing problem in CSE
  2003-01-17  3:30     ` Richard Henderson
@ 2003-01-17 14:24       ` Roger Sayle
  2003-01-17 16:04         ` John David Anglin
                           ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Roger Sayle @ 2003-01-17 14:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: John David Anglin


The following problem fixes a latent RTL sharing problem in CSE
that was exposed by patch to split GCSE and move jump bypassing
into a separate pass after loop optimizations.

When CSE performs a substitution inside a libcall, it also updates
the notes on the RETVAL instruction.  Unfortunately, this code was
sharing the replacement rather than copying it for the REG_EQUAL
note on the RETVAL.  This is fixed by the patch below.

This patch has been tested on i686-pc-linux-gnu with a full
"make bootstrap", all languages except Ada and treelang, and
with a complete "make -k check" with no new regressions.  I've
also confirmed that it fixes the segmentation fault in
simplify_subreg reported by Kazu by building a cross compiler
to h8300-hms, where the testcase is compiled with this patch
but fails without it.

Ok for mainline and perhaps the 3.3 branch?


Dave, could you check if this patch fixes the hppa64-hp-hpux11.11
bootstrap?  http://gcc.gnu.org/ml/gcc-patches/2003-01/msg01191.html
The ICE is also in simplify_subreg but might be a different RTL
sharing problem.


2003-01-16  Roger Sayle  <roger@eyesopen.com>

	* cse.c (cse_insn): Avoid RTL sharing when updating the RETVAL
	insn's notes following a substitution inside a libcall.


Index: cse.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cse.c,v
retrieving revision 1.245
diff -c -3 -p -r1.245 cse.c
*** cse.c	16 Dec 2002 18:19:19 -0000	1.245
--- cse.c	17 Jan 2003 05:19:13 -0000
*************** cse_insn (insn, libcall_insn)
*** 5557,5562 ****
--- 5557,5564 ----
  	  /* Look for a substitution that makes a valid insn.  */
  	  else if (validate_change (insn, &SET_SRC (sets[i].rtl), trial, 0))
  	    {
+ 	      rtx new = canon_reg (SET_SRC (sets[i].rtl), insn);
+
  	      /* If we just made a substitution inside a libcall, then we
  		 need to make the same substitution in any notes attached
  		 to the RETVAL insn.  */
*************** cse_insn (insn, libcall_insn)
*** 5565,5578 ****
  		      || GET_CODE (sets[i].orig_src) == SUBREG
  		      || GET_CODE (sets[i].orig_src) == MEM))
  		replace_rtx (REG_NOTES (libcall_insn), sets[i].orig_src,
! 			     canon_reg (SET_SRC (sets[i].rtl), insn));

  	      /* The result of apply_change_group can be ignored; see
  		 canon_reg.  */

! 	      validate_change (insn, &SET_SRC (sets[i].rtl),
! 			       canon_reg (SET_SRC (sets[i].rtl), insn),
! 			       1);
  	      apply_change_group ();
  	      break;
  	    }
--- 5567,5578 ----
  		      || GET_CODE (sets[i].orig_src) == SUBREG
  		      || GET_CODE (sets[i].orig_src) == MEM))
  		replace_rtx (REG_NOTES (libcall_insn), sets[i].orig_src,
! 			     copy_rtx (new));

  	      /* The result of apply_change_group can be ignored; see
  		 canon_reg.  */

! 	      validate_change (insn, &SET_SRC (sets[i].rtl), new, 1);
  	      apply_change_group ();
  	      break;
  	    }

Roger
--
Roger Sayle,                         E-mail: roger@eyesopen.com
OpenEye Scientific Software,         WWW: http://www.eyesopen.com/
Suite 1107, 3600 Cerrillos Road,     Tel: (+1) 505-473-7385
Santa Fe, New Mexico, 87507.         Fax: (+1) 505-473-0833

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Fix RTL sharing problem in CSE
  2003-01-17 14:24       ` [PATCH] Fix RTL sharing problem in CSE Roger Sayle
@ 2003-01-17 16:04         ` John David Anglin
  2003-01-20  6:13         ` Kazu Hirata
  2003-01-21  1:31         ` Richard Henderson
  2 siblings, 0 replies; 19+ messages in thread
From: John David Anglin @ 2003-01-17 16:04 UTC (permalink / raw)
  To: Roger Sayle; +Cc: gcc-patches

> 2003-01-16  Roger Sayle  <roger@eyesopen.com>
> 
> 	* cse.c (cse_insn): Avoid RTL sharing when updating the RETVAL
> 	insn's notes following a substitution inside a libcall.

This doesn't fix the hppa64 problem.  I will try to investigate further.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6605)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Fix RTL sharing problem in CSE
  2003-01-17 14:24       ` [PATCH] Fix RTL sharing problem in CSE Roger Sayle
  2003-01-17 16:04         ` John David Anglin
@ 2003-01-20  6:13         ` Kazu Hirata
  2003-01-21  1:31         ` Richard Henderson
  2 siblings, 0 replies; 19+ messages in thread
From: Kazu Hirata @ 2003-01-20  6:13 UTC (permalink / raw)
  To: roger; +Cc: gcc-patches, dave

Hi Roger,

> 2003-01-16  Roger Sayle  <roger@eyesopen.com>
> 
> 	* cse.c (cse_insn): Avoid RTL sharing when updating the RETVAL
> 	insn's notes following a substitution inside a libcall.

I just want to add that your patch introduces no new regressions on
h8300 port while fixing execute/980526-3.c.

Thanks!

Kazu Hirata

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Fix RTL sharing problem in CSE
  2003-01-17 14:24       ` [PATCH] Fix RTL sharing problem in CSE Roger Sayle
  2003-01-17 16:04         ` John David Anglin
  2003-01-20  6:13         ` Kazu Hirata
@ 2003-01-21  1:31         ` Richard Henderson
  2 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2003-01-21  1:31 UTC (permalink / raw)
  To: Roger Sayle; +Cc: gcc-patches, John David Anglin

On Fri, Jan 17, 2003 at 07:03:50AM -0700, Roger Sayle wrote:
> 	* cse.c (cse_insn): Avoid RTL sharing when updating the RETVAL
> 	insn's notes following a substitution inside a libcall.

Ok.


r~

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Separate jump bypassing pass
  2003-01-15 19:03 [PATCH] Separate jump bypassing pass Roger Sayle
  2003-01-15 21:15 ` Richard Henderson
  2003-01-16 22:27 ` Kazu Hirata
@ 2003-02-09 14:52 ` Jan Hubicka
  2 siblings, 0 replies; 19+ messages in thread
From: Jan Hubicka @ 2003-02-09 14:52 UTC (permalink / raw)
  To: Roger Sayle; +Cc: gcc-patches

> 
> 
> The following patch splits our current GCSE, so that the optimizations
> that significantly modify a function's control flow graph are performed
> after GCC's loop optimizations.
> 
> The motivation for this patch and SPECcpu2000 timings were presented
> in http://gcc.gnu.org/ml/gcc/2002-09/msg01129.html back in September.
> The patch below is "New Bypass" as described in that post.  I had
> originally intended to post it for review shortly afterwards, but
> Mark Mitchell requested that we concentrate fixing bugs in 3.2.1
> and the 3.3 branch before I got around to it.
> http://gcc.gnu.org/ml/gcc/2002-09/msg01157.html
> 
> This patch provides significant performance advantages on
> SPECfp2000, but primarily it allows more aggressive forms of
> GCSE without adversely affecting performance by confusing GCC's
> loop optimizations with irreducible CFGs or misplaced loop notes.
> 
> 
> The following patch has been tested on i686-pc-linux-gnu by a full
> bootstrap, all languages except Ada and treelang, and a complete
> "make -k check" with no new regressions.
> 
> Ok for mainline?

Roger,
thanks for the jump bypassing!  It helps to the performance more than
I've expected :)
I think you should write also something to gcc-3.3/changes.html and
news, so we manifestate some activity to outside world.

Honza

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Fix RTL sharing problem in CSE
@ 2003-01-18  3:56 David Edelsohn
  0 siblings, 0 replies; 19+ messages in thread
From: David Edelsohn @ 2003-01-18  3:56 UTC (permalink / raw)
  To: Roger Sayle; +Cc: gcc-patches

	BTW, now that Dale fixed the latent bug in the GCC MD file, one of
the benchmark testcases (not the one that segfaulted) shows a dramatic
performance improvement after your patch.

David

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Fix RTL sharing problem in CSE
  2003-01-17 22:33         ` David Edelsohn
@ 2003-01-17 23:56           ` Dale Johannesen
  0 siblings, 0 replies; 19+ messages in thread
From: Dale Johannesen @ 2003-01-17 23:56 UTC (permalink / raw)
  To: David Edelsohn; +Cc: Dale Johannesen, Roger Sayle, gcc-patches


On Friday, January 17, 2003, at 02:33  PM, David Edelsohn wrote:

>>>>>> Roger Sayle writes:
>
>> I believe that the problems on hppa64 and powerpc will turn out to
>> be more latent bugs in the existing GCSE or CSE constant propagation
>> code, just like the one in cse_insn that it has already uncovered
>> on H8300.
>
> 	This does appear to be the case for the PowerPC segfault.  After
> further investigation, I can construct a simplified testcase which 
> fails
> with or without the jump bypass patch.  The jump bypass patch simply
> causes the latent bug to occur in the complete version of the function
> that appears in the benchmark.

Right.  I've tracked this down to a lurker in the ppc-specific code.
Patch coming.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Fix RTL sharing problem in CSE
  2003-01-17 18:59       ` Roger Sayle
  2003-01-17 22:33         ` David Edelsohn
@ 2003-01-17 22:46         ` law
  1 sibling, 0 replies; 19+ messages in thread
From: law @ 2003-01-17 22:46 UTC (permalink / raw)
  To: Roger Sayle; +Cc: Jan Hubicka, gcc-patches

In message <Pine.LNX.4.44.0301171112100.6779-100000@www.eyesopen.com>, Roger Sa
yle writes:
 >I believe that the problems on hppa64 and powerpc will turn out to
 >be more latent bugs in the existing GCSE or CSE constant propagation
 >code, just like the one in cse_insn that it has already uncovered
 >on H8300.
I agree completely.

It's worth noting that the code we're tripping in the PA backend is
notoriously flakey -- even more so for PA64.  If it wasn't such a 
huge win to use xmpyu I'd suggest we drop the damn thing.


Jeff

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Fix RTL sharing problem in CSE
  2003-01-17 18:59       ` Roger Sayle
@ 2003-01-17 22:33         ` David Edelsohn
  2003-01-17 23:56           ` Dale Johannesen
  2003-01-17 22:46         ` law
  1 sibling, 1 reply; 19+ messages in thread
From: David Edelsohn @ 2003-01-17 22:33 UTC (permalink / raw)
  To: Roger Sayle; +Cc: gcc-patches

>>>>> Roger Sayle writes:

> I believe that the problems on hppa64 and powerpc will turn out to
> be more latent bugs in the existing GCSE or CSE constant propagation
> code, just like the one in cse_insn that it has already uncovered
> on H8300.

	This does appear to be the case for the PowerPC segfault.  After
further investigation, I can construct a simplified testcase which fails
with or without the jump bypass patch.  The jump bypass patch simply
causes the latent bug to occur in the complete version of the function
that appears in the benchmark.

David

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Fix RTL sharing problem in CSE
  2003-01-17 18:25     ` Jan Hubicka
@ 2003-01-17 18:59       ` Roger Sayle
  2003-01-17 22:33         ` David Edelsohn
  2003-01-17 22:46         ` law
  0 siblings, 2 replies; 19+ messages in thread
From: Roger Sayle @ 2003-01-17 18:59 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: law, gcc-patches


Hi Jan and Jeff,
> >  >The ICE occurs because combine tries to simplify it and it can't because
> >  >of the constant substitution in the regclass.i.13.bypass pass.  It would
> >  >seem to me that the subreg should be simplified when the substitution for
> >  >DI 262 is made.  After that, the mode information is lost.
> > Agreed.
> When replacement is made by validate_replace_reg, this is done
> automatically.  I think bypass pass should use it consistently...

No new RTL modification code was introduced splitting GCSE and
moving the jump bypassing pass after the loop optimizations.
Even prior to that, jump bypassing itself only affects branch
instructions and doesn't modify RTL expressions at all.

I believe that the problems on hppa64 and powerpc will turn out to
be more latent bugs in the existing GCSE or CSE constant propagation
code, just like the one in cse_insn that it has already uncovered
on H8300.


I know the rules, and am more than happy to take responsibility
for latent bugs that are exposed by my changes.  If I'm right,
these fixes should also be applied to the 3.3 branch, as they'll
predate my recent changes.

Roger
--

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Fix RTL sharing problem in CSE
  2003-01-17 17:24   ` law
@ 2003-01-17 18:25     ` Jan Hubicka
  2003-01-17 18:59       ` Roger Sayle
  0 siblings, 1 reply; 19+ messages in thread
From: Jan Hubicka @ 2003-01-17 18:25 UTC (permalink / raw)
  To: law; +Cc: John David Anglin, roger, gcc-patches

> In message <200301171715.h0HHFOL0006221@hiauly1.hia.nrc.ca>, "John David Anglin
> " writes:
>  >> > 2003-01-16  Roger Sayle  <roger@eyesopen.com>
>  >> > 
>  >> > 	* cse.c (cse_insn): Avoid RTL sharing when updating the RETVAL
>  >> > 	insn's notes following a substitution inside a libcall.
>  >> 
>  >> This doesn't fix the hppa64 problem.  I will try to investigate further.
>  >
>  >This is the insn that causes the ICE:
>  >
>  >regclass.i.12.loop:
>  >(insn 599 598 600 0000000000000000 (set (reg:DI 263)
>  >        (mult:DI (zero_extend:DI (subreg:SI (reg:DI 262) 4))
>  >	    (zero_extend:DI (subreg:SI (reg/v:DI 157) 4)))) -1 (nil)
>  >	(nil))
>  >
>  >regclass.i.13.bypass:
>  >(insn 599 598 600 21 0000000000000000 (set (reg:DI 263)
>  >        (mult:DI (zero_extend:DI (subreg:SI (const_int -44 [0xffffffffffffffd
>  >4]) 4))
>  >	    (zero_extend:DI (subreg:SI (reg/v:DI 157) 4)))) -1 (nil)
>  >	(nil))
>  >
>  >regclass.i.19.life:
>  >(insn 599 598 600 18 0000000000000000 (set (reg:DI 263)
>  >        (mult:DI (zero_extend:DI (subreg:SI (const_int -44 [0xffffffffffffffd
>  >4]) 4))
>  >	    (zero_extend:DI (subreg:SI (reg/v:DI 157) 4)))) -1 (nil)
>  >    (expr_list:REG_DEAD (reg/v:DI 157)
>  >	(nil)))
>  >
>  >The ICE occurs because combine tries to simplify it and it can't because
>  >of the constant substitution in the regclass.i.13.bypass pass.  It would
>  >seem to me that the subreg should be simplified when the substitution for
>  >DI 262 is made.  After that, the mode information is lost.
> Agreed.
When replacement is made by validate_replace_reg, this is done
automatically.  I think bypass pass should use it consistently...

Honza
> 
> jeff

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Fix RTL sharing problem in CSE
@ 2003-01-17 17:50 David Edelsohn
  0 siblings, 0 replies; 19+ messages in thread
From: David Edelsohn @ 2003-01-17 17:50 UTC (permalink / raw)
  To: Roger Sayle; +Cc: gcc-patches

	By the way, your original bypass patch also causes a PowerPC
benchmark to segfault.  It looks like another code sharing problem, but I
still am trying to track it down.

David

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Fix RTL sharing problem in CSE
  2003-01-17 17:15 ` [PATCH] Fix RTL sharing problem in CSE John David Anglin
@ 2003-01-17 17:24   ` law
  2003-01-17 18:25     ` Jan Hubicka
  0 siblings, 1 reply; 19+ messages in thread
From: law @ 2003-01-17 17:24 UTC (permalink / raw)
  To: John David Anglin; +Cc: roger, gcc-patches

In message <200301171715.h0HHFOL0006221@hiauly1.hia.nrc.ca>, "John David Anglin
" writes:
 >> > 2003-01-16  Roger Sayle  <roger@eyesopen.com>
 >> > 
 >> > 	* cse.c (cse_insn): Avoid RTL sharing when updating the RETVAL
 >> > 	insn's notes following a substitution inside a libcall.
 >> 
 >> This doesn't fix the hppa64 problem.  I will try to investigate further.
 >
 >This is the insn that causes the ICE:
 >
 >regclass.i.12.loop:
 >(insn 599 598 600 0000000000000000 (set (reg:DI 263)
 >        (mult:DI (zero_extend:DI (subreg:SI (reg:DI 262) 4))
 >	    (zero_extend:DI (subreg:SI (reg/v:DI 157) 4)))) -1 (nil)
 >	(nil))
 >
 >regclass.i.13.bypass:
 >(insn 599 598 600 21 0000000000000000 (set (reg:DI 263)
 >        (mult:DI (zero_extend:DI (subreg:SI (const_int -44 [0xffffffffffffffd
 >4]) 4))
 >	    (zero_extend:DI (subreg:SI (reg/v:DI 157) 4)))) -1 (nil)
 >	(nil))
 >
 >regclass.i.19.life:
 >(insn 599 598 600 18 0000000000000000 (set (reg:DI 263)
 >        (mult:DI (zero_extend:DI (subreg:SI (const_int -44 [0xffffffffffffffd
 >4]) 4))
 >	    (zero_extend:DI (subreg:SI (reg/v:DI 157) 4)))) -1 (nil)
 >    (expr_list:REG_DEAD (reg/v:DI 157)
 >	(nil)))
 >
 >The ICE occurs because combine tries to simplify it and it can't because
 >of the constant substitution in the regclass.i.13.bypass pass.  It would
 >seem to me that the subreg should be simplified when the substitution for
 >DI 262 is made.  After that, the mode information is lost.
Agreed.

jeff

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Fix RTL sharing problem in CSE
       [not found] <no.id>
@ 2003-01-17 17:15 ` John David Anglin
  2003-01-17 17:24   ` law
  0 siblings, 1 reply; 19+ messages in thread
From: John David Anglin @ 2003-01-17 17:15 UTC (permalink / raw)
  To: John David Anglin; +Cc: roger, gcc-patches

> > 2003-01-16  Roger Sayle  <roger@eyesopen.com>
> > 
> > 	* cse.c (cse_insn): Avoid RTL sharing when updating the RETVAL
> > 	insn's notes following a substitution inside a libcall.
> 
> This doesn't fix the hppa64 problem.  I will try to investigate further.

This is the insn that causes the ICE:

regclass.i.12.loop:
(insn 599 598 600 0000000000000000 (set (reg:DI 263)
        (mult:DI (zero_extend:DI (subreg:SI (reg:DI 262) 4))
	    (zero_extend:DI (subreg:SI (reg/v:DI 157) 4)))) -1 (nil)
	(nil))

regclass.i.13.bypass:
(insn 599 598 600 21 0000000000000000 (set (reg:DI 263)
        (mult:DI (zero_extend:DI (subreg:SI (const_int -44 [0xffffffffffffffd4]) 4))
	    (zero_extend:DI (subreg:SI (reg/v:DI 157) 4)))) -1 (nil)
	(nil))

regclass.i.19.life:
(insn 599 598 600 18 0000000000000000 (set (reg:DI 263)
        (mult:DI (zero_extend:DI (subreg:SI (const_int -44 [0xffffffffffffffd4]) 4))
	    (zero_extend:DI (subreg:SI (reg/v:DI 157) 4)))) -1 (nil)
    (expr_list:REG_DEAD (reg/v:DI 157)
	(nil)))

The ICE occurs because combine tries to simplify it and it can't because
of the constant substitution in the regclass.i.13.bypass pass.  It would
seem to me that the subreg should be simplified when the substitution for
DI 262 is made.  After that, the mode information is lost.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6605)

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2003-02-09 14:52 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-01-15 19:03 [PATCH] Separate jump bypassing pass Roger Sayle
2003-01-15 21:15 ` Richard Henderson
2003-01-16 22:27 ` Kazu Hirata
2003-01-17  2:44   ` Roger Sayle
2003-01-17  3:30     ` Richard Henderson
2003-01-17 14:24       ` [PATCH] Fix RTL sharing problem in CSE Roger Sayle
2003-01-17 16:04         ` John David Anglin
2003-01-20  6:13         ` Kazu Hirata
2003-01-21  1:31         ` Richard Henderson
2003-02-09 14:52 ` [PATCH] Separate jump bypassing pass Jan Hubicka
     [not found] <no.id>
2003-01-17 17:15 ` [PATCH] Fix RTL sharing problem in CSE John David Anglin
2003-01-17 17:24   ` law
2003-01-17 18:25     ` Jan Hubicka
2003-01-17 18:59       ` Roger Sayle
2003-01-17 22:33         ` David Edelsohn
2003-01-17 23:56           ` Dale Johannesen
2003-01-17 22:46         ` law
2003-01-17 17:50 David Edelsohn
2003-01-18  3:56 David Edelsohn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).