public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] xtensa: Eliminate unused stack frame allocation/freeing
@ 2022-09-02 10:18 Takayuki 'January June' Suwa
  2022-09-05 16:05 ` Max Filippov
  0 siblings, 1 reply; 2+ messages in thread
From: Takayuki 'January June' Suwa @ 2022-09-02 10:18 UTC (permalink / raw)
  To: GCC Patches; +Cc: Max Filippov

Changes from v1:
  (xtensa_expand_epilogue): Fixed forgetting to consider hard_frame_pointer_rtx when sharing codes.

---
In the example below, 'x' is once placed on the stack frame and then read
into registers as the argument value of bar():

    /* example */
    struct foo {
      int a, b;
    };
    extern struct foo bar(struct foo);
    struct foo test(void) {
      struct foo x = { 0, 1 };
      return bar(x);
    }

Thanks to the dead store elimination, the initialization of 'x' turns into
merely loading the immediates to registers, but corresponding stack frame
growth is not rolled back.  As a result:

    ;; prereq: the CALL0 ABI
    ;; before
    test:
	addi	sp, sp, -16	// unused stack frame allocation/freeing
	movi.n	a2, 0
	movi.n	a3, 1
	addi	sp, sp, 16	// because no instructions that refer to
	j.l	bar, a9		// the stack pointer between the two

This patch eliminates such unused stack frame allocation/freeing:

    ;; after
    test:
	movi.n	a2, 0
	movi.n	a3, 1
	j.l	bar, a9

gcc/ChangeLog:

	* config/xtensa/xtensa.cc (machine_function): New member to track
	the insns for stack pointer adjustment inside of the pro/epilogue.
	(xtensa_emit_adjust_stack_ptr): New function to share the common
	codes and to record the insns for stack pointer adjustment.
	(xtensa_expand_prologue): Change to use the function mentioned
	above when using the CALL0 ABI.
	(xtensa_expand_epilogue): Ditto.
	And also change to cancel emitting the insns for the stack pointer
	adjustment if only used for its own.
---
 gcc/config/xtensa/xtensa.cc | 230 ++++++++++++++++++------------------
 1 file changed, 118 insertions(+), 112 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index b673b6764da..17416fc6c3f 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -102,6 +102,7 @@ struct GTY(()) machine_function
   int callee_save_size;
   bool frame_laid_out;
   bool epilogue_done;
+  hash_set<rtx_insn *> *logues_a1_adjusts;
 };
 
 /* Vector, indexed by hard register number, which contains 1 for a
@@ -3048,7 +3049,7 @@ xtensa_output_literal (FILE *file, rtx x, machine_mode mode, int labelno)
 }
 
 static bool
-xtensa_call_save_reg(int regno)
+xtensa_call_save_reg (int regno)
 {
   if (TARGET_WINDOWED_ABI)
     return false;
@@ -3084,7 +3085,7 @@ compute_frame_size (poly_int64 size)
   cfun->machine->callee_save_size = 0;
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
     {
-      if (xtensa_call_save_reg(regno))
+      if (xtensa_call_save_reg (regno))
 	cfun->machine->callee_save_size += UNITS_PER_WORD;
     }
 
@@ -3143,6 +3144,51 @@ xtensa_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED)
    and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
 
+#define ADJUST_SP_NONE      0x0
+#define ADJUST_SP_NEED_NOTE 0x1
+#define ADJUST_SP_FRAME_PTR 0x2
+static rtx_insn *
+xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags)
+{
+  rtx_insn *insn;
+  rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx
+					  : stack_pointer_rtx;
+
+  if (xtensa_simm8 (offset)
+      || xtensa_simm8x256 (offset))
+    insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset)));
+  else
+    {
+      rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+      rtx_insn* tmp_insn;
+
+      if (offset < 0)
+	{
+	  tmp_insn = emit_move_insn (tmp_reg, GEN_INT (-offset));
+	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg));
+	}
+      else
+	{
+	  tmp_insn = emit_move_insn (tmp_reg, GEN_INT (offset));
+	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr,	tmp_reg));
+	}
+      cfun->machine->logues_a1_adjusts->add (tmp_insn);
+    }
+
+  if (flags & ADJUST_SP_NEED_NOTE)
+    {
+      rtx note_rtx = gen_rtx_SET (stack_pointer_rtx,
+				  plus_constant (Pmode, stack_pointer_rtx,
+						 offset));
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+    }
+
+  cfun->machine->logues_a1_adjusts->add (insn);
+  return insn;
+}
+
 void
 xtensa_expand_prologue (void)
 {
@@ -3175,16 +3221,13 @@ xtensa_expand_prologue (void)
       HOST_WIDE_INT offset = 0;
       int callee_save_size = cfun->machine->callee_save_size;
 
+      cfun->machine->logues_a1_adjusts = new hash_set<rtx_insn *>;
+
       /* -128 is a limit of single addi instruction. */
       if (IN_RANGE (total_size, 1, 128))
 	{
-	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
-					GEN_INT (-total_size)));
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	  note_rtx = gen_rtx_SET (stack_pointer_rtx,
-				  plus_constant (Pmode, stack_pointer_rtx,
-						 -total_size));
-	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+	  insn = xtensa_emit_adjust_stack_ptr (-total_size,
+					       ADJUST_SP_NEED_NOTE);
 	  offset = total_size - UNITS_PER_WORD;
 	}
       else if (callee_save_size)
@@ -3194,75 +3237,35 @@ xtensa_expand_prologue (void)
 	   * move it to its final location. */
 	  if (total_size > 1024)
 	    {
-	      insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
-					    GEN_INT (-callee_save_size)));
-	      RTX_FRAME_RELATED_P (insn) = 1;
-	      note_rtx = gen_rtx_SET (stack_pointer_rtx,
-				      plus_constant (Pmode, stack_pointer_rtx,
-						     -callee_save_size));
-	      add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+	      insn = xtensa_emit_adjust_stack_ptr (-callee_save_size,
+						   ADJUST_SP_NEED_NOTE);
 	      offset = callee_save_size - UNITS_PER_WORD;
 	    }
 	  else
 	    {
-	      if (xtensa_simm8x256 (-total_size))
-		insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
-					      stack_pointer_rtx,
-					      GEN_INT (-total_size)));
-	      else
-		{
-		  rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-		  emit_move_insn (tmp_reg, GEN_INT (total_size));
-		  insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
-						stack_pointer_rtx, tmp_reg));
-		}
-	      RTX_FRAME_RELATED_P (insn) = 1;
-	      note_rtx = gen_rtx_SET (stack_pointer_rtx,
-				      plus_constant (Pmode, stack_pointer_rtx,
-						     -total_size));
-	      add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+	      insn = xtensa_emit_adjust_stack_ptr (-total_size,
+						   ADJUST_SP_NEED_NOTE);
 	      offset = total_size - UNITS_PER_WORD;
 	    }
 	}
 
       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
-	{
-	  if (xtensa_call_save_reg(regno))
-	    {
-	      rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
-	      rtx mem = gen_frame_mem (SImode, x);
-	      rtx reg = gen_rtx_REG (SImode, regno);
-
-	      offset -= UNITS_PER_WORD;
-	      insn = emit_move_insn (mem, reg);
-	      RTX_FRAME_RELATED_P (insn) = 1;
-	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
-			    gen_rtx_SET (mem, reg));
-	    }
-	}
+	if (xtensa_call_save_reg (regno))
+	  {
+	    rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+	    rtx mem = gen_frame_mem (SImode, x);
+	    rtx reg = gen_rtx_REG (SImode, regno);
+
+	    offset -= UNITS_PER_WORD;
+	    insn = emit_move_insn (mem, reg);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_rtx_SET (mem, reg));
+	  }
       if (total_size > 1024
 	  || (!callee_save_size && total_size > 128))
-	{
-	  if (xtensa_simm8x256 (callee_save_size - total_size))
-	    insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
-					  stack_pointer_rtx,
-					  GEN_INT (callee_save_size -
-						   total_size)));
-	  else
-	    {
-	      rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-	      emit_move_insn (tmp_reg, GEN_INT (total_size -
-						callee_save_size));
-	      insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
-					    stack_pointer_rtx, tmp_reg));
-	    }
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	  note_rtx = gen_rtx_SET (stack_pointer_rtx,
-				  plus_constant (Pmode, stack_pointer_rtx,
-						 callee_save_size -
-						 total_size));
-	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
-	}
+	insn = xtensa_emit_adjust_stack_ptr (callee_save_size - total_size,
+					     ADJUST_SP_NEED_NOTE);
     }
 
   if (frame_pointer_needed)
@@ -3326,24 +3329,17 @@ xtensa_expand_epilogue (bool sibcall_p)
     {
       int regno;
       HOST_WIDE_INT offset;
+      df_ref ref;
+      bool stack_pointer_used = false;
 
-      if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 1024))
+      if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127
+								    : 1024))
 	{
-	  if (xtensa_simm8x256 (cfun->machine->current_frame_size -
-				cfun->machine->callee_save_size))
-	    emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
-				   hard_frame_pointer_rtx : stack_pointer_rtx,
-				   GEN_INT (cfun->machine->current_frame_size -
-					    cfun->machine->callee_save_size)));
-	  else
-	    {
-	      rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-	      emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size -
-						cfun->machine->callee_save_size));
-	      emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
-				     hard_frame_pointer_rtx : stack_pointer_rtx,
-				     tmp_reg));
-	    }
+	  xtensa_emit_adjust_stack_ptr (cfun->machine->current_frame_size -
+					cfun->machine->callee_save_size,
+					frame_pointer_needed
+					? ADJUST_SP_FRAME_PTR
+					: ADJUST_SP_NONE);
 	  offset = cfun->machine->callee_save_size - UNITS_PER_WORD;
 	}
       else
@@ -3359,19 +3355,18 @@ xtensa_expand_epilogue (bool sibcall_p)
 	emit_insn (gen_blockage ());
 
       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
-	{
-	  if (xtensa_call_save_reg(regno))
-	    {
-	      rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
-	      rtx reg;
-
-	      offset -= UNITS_PER_WORD;
-	      emit_move_insn (reg = gen_rtx_REG (SImode, regno),
-			      gen_frame_mem (SImode, x));
-	      if (regno == A0_REG && sibcall_p)
-		emit_use (reg);
-	    }
-	}
+	if (xtensa_call_save_reg (regno))
+	  {
+	    rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+	    rtx reg;
+
+	    offset -= UNITS_PER_WORD;
+	    emit_move_insn (reg = gen_rtx_REG (SImode, regno),
+			    gen_frame_mem (SImode, x));
+	    if (regno == A0_REG && sibcall_p)
+	      emit_use (reg);
+	    stack_pointer_used = true;
+	  }
 
       if (cfun->machine->current_frame_size > 0)
 	{
@@ -3384,31 +3379,42 @@ xtensa_expand_epilogue (bool sibcall_p)
 	      else
 		offset = cfun->machine->callee_save_size;
 	      if (offset)
-		emit_insn (gen_addsi3 (stack_pointer_rtx,
-				       stack_pointer_rtx,
-				       GEN_INT (offset)));
+		xtensa_emit_adjust_stack_ptr (offset, ADJUST_SP_NONE);
 	    }
 	  else
-	    {
-	      if (xtensa_simm8x256 (cfun->machine->current_frame_size))
-		emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
-				       GEN_INT (cfun->machine->current_frame_size)));
-	      else
-		{
-		  rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-		  emit_move_insn (tmp_reg,
-				  GEN_INT (cfun->machine->current_frame_size));
-		  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
-					 tmp_reg));
-		}
-	    }
+	    xtensa_emit_adjust_stack_ptr (cfun->machine->current_frame_size,
+					  ADJUST_SP_NONE);
 	}
 
       if (crtl->calls_eh_return)
 	emit_insn (gen_add3_insn (stack_pointer_rtx,
 				  stack_pointer_rtx,
 				  EH_RETURN_STACKADJ_RTX));
+
+      /* Check if the function body uses the stack pointer.  */
+      for (ref = DF_REG_USE_CHAIN (A1_REG);
+	   ref; ref = DF_REF_NEXT_REG (ref))
+	if (DF_REF_CLASS (ref) == DF_REF_REGULAR)
+	  {
+	    stack_pointer_used = true;
+	    break;
+	  }
+
+      /* Undo the insns if the stack pointer is only used for its own
+	 adjustment.  */
+      if (! (stack_pointer_used
+	     || frame_pointer_needed
+	     || crtl->calls_eh_return))
+	for (const auto &insn : *cfun->machine->logues_a1_adjusts)
+	  {
+	    rtx note = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL);
+
+	    if (note)
+	      remove_note (insn, note);
+	    PATTERN (insn) = const0_rtx;
+	  }
     }
+
   cfun->machine->epilogue_done = true;
   if (!sibcall_p)
     emit_jump_insn (gen_return ());
-- 
2.20.1

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH v2 1/2] xtensa: Eliminate unused stack frame allocation/freeing
  2022-09-02 10:18 [PATCH v2 1/2] xtensa: Eliminate unused stack frame allocation/freeing Takayuki 'January June' Suwa
@ 2022-09-05 16:05 ` Max Filippov
  0 siblings, 0 replies; 2+ messages in thread
From: Max Filippov @ 2022-09-05 16:05 UTC (permalink / raw)
  To: Takayuki 'January June' Suwa; +Cc: GCC Patches

Hi Suwa-san,

On Fri, Sep 2, 2022 at 3:57 AM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> Changes from v1:
>   (xtensa_expand_epilogue): Fixed forgetting to consider hard_frame_pointer_rtx when sharing codes.
>
> ---
> In the example below, 'x' is once placed on the stack frame and then read
> into registers as the argument value of bar():
>
>     /* example */
>     struct foo {
>       int a, b;
>     };
>     extern struct foo bar(struct foo);
>     struct foo test(void) {
>       struct foo x = { 0, 1 };
>       return bar(x);
>     }
>
> Thanks to the dead store elimination, the initialization of 'x' turns into
> merely loading the immediates to registers, but corresponding stack frame
> growth is not rolled back.  As a result:
>
>     ;; prereq: the CALL0 ABI
>     ;; before
>     test:
>         addi    sp, sp, -16     // unused stack frame allocation/freeing
>         movi.n  a2, 0
>         movi.n  a3, 1
>         addi    sp, sp, 16      // because no instructions that refer to
>         j.l     bar, a9         // the stack pointer between the two
>
> This patch eliminates such unused stack frame allocation/freeing:
>
>     ;; after
>     test:
>         movi.n  a2, 0
>         movi.n  a3, 1
>         j.l     bar, a9
>
> gcc/ChangeLog:
>
>         * config/xtensa/xtensa.cc (machine_function): New member to track
>         the insns for stack pointer adjustment inside of the pro/epilogue.
>         (xtensa_emit_adjust_stack_ptr): New function to share the common
>         codes and to record the insns for stack pointer adjustment.
>         (xtensa_expand_prologue): Change to use the function mentioned
>         above when using the CALL0 ABI.
>         (xtensa_expand_epilogue): Ditto.
>         And also change to cancel emitting the insns for the stack pointer
>         adjustment if only used for its own.
> ---
>  gcc/config/xtensa/xtensa.cc | 230 ++++++++++++++++++------------------
>  1 file changed, 118 insertions(+), 112 deletions(-)

With this change xtensa-linux-uclibc configured for call0 ABI
produces ICE while building libgcc:

gcc/libgcc/libgcc2.c: In function ‘__divdi3’:
gcc/libgcc/libgcc2.c:1230:1: internal compiler error: Segmentation fault
1230 | }
     | ^
0xea801f crash_signal
       gcc/gcc/toplev.cc:314
0x7f1fb4be5d5f ???
       ./signal/../sysdeps/unix/sysv/linux/x86_64/sigaction.c:0
0x9602e3 lookup_page_table_entry
       gcc/gcc/ggc-page.cc:630
0x9602e3 ggc_set_mark(void const*)
       gcc/gcc/ggc-page.cc:1550
0x121d7bd gt_ggc_mx_hash_set_rtx_insn__(void*)
       ./gt-xtensa.h:38
0xbe6187 gt_ggc_mx_function(void*)
       gcc-13-2434-g2f4d626e13dc-call0-le/gcc/gtype-desc.cc:1694
0xbe6187 gt_ggc_mx_function(void*)
       gcc-13-2434-g2f4d626e13dc-call0-le/gcc/gtype-desc.cc:1678
0x85634a gt_ggc_mx_lang_tree_node(void*)
       ./gt-c-c-decl.h:289
0xbe07d2 gt_ggc_mx_rtx_def(void*)
       gcc-13-2434-g2f4d626e13dc-call0-le/gcc/gtype-desc.cc:725
0xa47074 void gt_ggc_mx<dw_attr_struct>(vec<dw_attr_struct, va_gc, vl_embed>*)
       gcc/gcc/vec.h:1363
0xa47074 gt_ggc_mx_vec_dw_attr_node_va_gc_(void*)
       ./gt-dwarf2out.h:261
0xa47074 gt_ggc_mx_vec_dw_attr_node_va_gc_(void*)
       ./gt-dwarf2out.h:256
0xa47074 gt_ggc_mx_die_struct(void*)
       ./gt-dwarf2out.h:45
0xa470d7 gt_ggc_mx_die_struct(void*)
       ./gt-dwarf2out.h:27
0xa470d7 gt_ggc_mx_die_struct(void*)
       ./gt-dwarf2out.h:47
0xa470d7 gt_ggc_mx_die_struct(void*)
       ./gt-dwarf2out.h:27
0xa470d7 gt_ggc_mx_die_struct(void*)
       ./gt-dwarf2out.h:47
0xa470d7 gt_ggc_mx_die_struct(void*)
       ./gt-dwarf2out.h:27
0xa470d7 gt_ggc_mx_die_struct(void*)
       ./gt-dwarf2out.h:47
0xa470d7 gt_ggc_mx_die_struct(void*)
       ./gt-dwarf2out.h:27

-- 
Thanks.
-- Max

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-09-05 16:05 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-02 10:18 [PATCH v2 1/2] xtensa: Eliminate unused stack frame allocation/freeing Takayuki 'January June' Suwa
2022-09-05 16:05 ` Max Filippov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).