public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2] xtensa: Optimize stack frame adjustment more
       [not found] <19010306-4056-6f84-e555-e744f4f5061e.ref@yahoo.co.jp>
@ 2023-01-07  2:55 ` Takayuki 'January June' Suwa
  2023-01-07 20:23   ` Max Filippov
  0 siblings, 1 reply; 2+ messages in thread
From: Takayuki 'January June' Suwa @ 2023-01-07  2:55 UTC (permalink / raw)
  To: GCC Patches; +Cc: Max Filippov

This patch introduces a convenient helper function for integer immediate
addition with scratch register as needed, that splits and emits either
up to two ADDI/ADDMI machine instructions or an addition by register
following an integer immediate load (which may later be transformed by
constantsynth).

By using the helper function, it makes stack frame adjustment logic
simplified and instruction count less in some cases.

gcc/ChangeLog:

	* config/xtensa/xtensa.cc
	(xtensa_split_imm_two_addends, xtensa_emit_add_imm):
	New helper functions.
	(xtensa_set_return_address, xtensa_output_mi_thunk):
	Change to use the helper function.
	(xtensa_emit_adjust_stack_ptr): Ditto.
	And also change to try reusing the content of scratch register
	A9 if the register is not modified in the function body.
---
 gcc/config/xtensa/xtensa.cc | 151 +++++++++++++++++++++++++-----------
 1 file changed, 106 insertions(+), 45 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index ae44199bc98..a1f184950ae 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -104,6 +104,7 @@ struct GTY(()) machine_function
   bool frame_laid_out;
   bool epilogue_done;
   bool inhibit_logues_a1_adjusts;
+  rtx last_logues_a9_content;
 };
 
 /* Vector, indexed by hard register number, which contains 1 for a
@@ -2518,6 +2519,86 @@ xtensa_split_DI_reg_imm (rtx *operands)
 }
 
 
+/* Try to split an integer value into what are suitable for two consecutive
+   immediate addition instructions, ADDI or ADDMI.  */
+
+static bool
+xtensa_split_imm_two_addends (HOST_WIDE_INT imm, HOST_WIDE_INT v[2])
+{
+  HOST_WIDE_INT v0, v1;
+
+  if (imm < -32768)
+    v0 = -32768, v1 = imm + 32768;
+  else if (imm > 32512)
+    v0 = 32512, v1 = imm - 32512;
+  else if (TARGET_DENSITY && xtensa_simm12b (imm))
+    /* A pair of MOVI(.N) and ADD.N is one or two bytes less than two
+       immediate additions if TARGET_DENSITY.  */
+    return false;
+  else
+    v0 = (imm + 128) & ~255L, v1 = imm - v0;
+
+  if (xtensa_simm8 (v1) || xtensa_simm8x256 (v1))
+    {
+      v[0] = v0, v[1] = v1;
+      return true;
+    }
+
+  return false;
+}
+
+
+/* Helper function for integer immediate addition with scratch register
+   as needed, that splits and emits either up to two ADDI/ADDMI machine
+   instructions or an addition by register following an integer immediate
+   load (which may later be transformed by constantsynth).
+
+   If 'scratch' is NULL_RTX but still needed, a new pseudo-register will
+   be allocated.  Thus, after the reload/LRA pass, the specified scratch
+   register must be a hard one.  */
+
+static bool
+xtensa_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch,
+		     bool need_note)
+{
+  bool retval = false;
+  HOST_WIDE_INT v[2];
+  rtx_insn *insn;
+
+  if (imm == 0)
+    return false;
+
+  if (xtensa_simm8 (imm) || xtensa_simm8x256 (imm))
+    insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
+  else if (xtensa_split_imm_two_addends (imm, v))
+    {
+      if (!scratch)
+	scratch = gen_reg_rtx (SImode);
+      emit_insn (gen_addsi3 (scratch, src, GEN_INT (v[0])));
+      insn = emit_insn (gen_addsi3 (dst, scratch, GEN_INT (v[1])));
+    }
+  else
+    {
+      if (scratch)
+	emit_move_insn (scratch, GEN_INT (imm));
+      else
+	scratch = force_reg (SImode, GEN_INT (imm));
+      retval = true;
+      insn = emit_insn (gen_addsi3 (dst, src, scratch));
+    }
+
+  if (need_note)
+    {
+      rtx note_rtx = gen_rtx_SET (dst, plus_constant (Pmode, src, imm));
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+    }
+
+  return retval;
+}
+
+
 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
 
 static bool
@@ -3245,41 +3326,33 @@ xtensa_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED)
 static void
 xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags)
 {
+  rtx src, scratch;
   rtx_insn *insn;
-  rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx
-					  : stack_pointer_rtx;
 
   if (cfun->machine->inhibit_logues_a1_adjusts)
     return;
 
-  if (xtensa_simm8 (offset)
-      || xtensa_simm8x256 (offset))
-    insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset)));
-  else
-    {
-      rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+  src = (flags & ADJUST_SP_FRAME_PTR)
+	 ? hard_frame_pointer_rtx : stack_pointer_rtx;
+  scratch = gen_rtx_REG (Pmode, A9_REG);
 
-      if (offset < 0)
-	{
-	  emit_move_insn (tmp_reg, GEN_INT (-offset));
-	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg));
-	}
-      else
-	{
-	  emit_move_insn (tmp_reg, GEN_INT (offset));
-	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr,	tmp_reg));
-	}
-    }
-
-  if (flags & ADJUST_SP_NEED_NOTE)
+  if (df && DF_REG_DEF_COUNT (A9_REG) == 0
+      && cfun->machine->last_logues_a9_content
+      && -INTVAL (cfun->machine->last_logues_a9_content) == offset)
     {
-      rtx note_rtx = gen_rtx_SET (stack_pointer_rtx,
-				  plus_constant (Pmode, stack_pointer_rtx,
-						 offset));
+      insn = emit_insn (gen_subsi3 (stack_pointer_rtx, src, scratch));
+      if (flags & ADJUST_SP_NEED_NOTE)
+	{
+	  rtx note_rtx = gen_rtx_SET (stack_pointer_rtx,
+				      plus_constant (Pmode, src, offset));
 
-      RTX_FRAME_RELATED_P (insn) = 1;
-      add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+	}
     }
+  else if (xtensa_emit_add_imm (stack_pointer_rtx, src, offset, scratch,
+				(flags & ADJUST_SP_NEED_NOTE)))
+    cfun->machine->last_logues_a9_content = GEN_INT (offset);
 }
 
 /* minimum frame = reg save area (4 words) plus static chain (1 word)
@@ -3307,8 +3380,9 @@ xtensa_expand_prologue (void)
 	  /* Use a8 as a temporary since a0-a7 may be live.  */
 	  rtx tmp_reg = gen_rtx_REG (Pmode, A8_REG);
 	  emit_insn (gen_entry (GEN_INT (MIN_FRAME_SIZE)));
-	  emit_move_insn (tmp_reg, GEN_INT (total_size - MIN_FRAME_SIZE));
-	  emit_insn (gen_subsi3 (tmp_reg, stack_pointer_rtx, tmp_reg));
+	  xtensa_emit_add_imm (tmp_reg, stack_pointer_rtx,
+			       MIN_FRAME_SIZE - total_size,
+			       tmp_reg, false);
 	  insn = emit_insn (gen_movsi (stack_pointer_rtx, tmp_reg));
 	}
     }
@@ -3540,8 +3614,8 @@ xtensa_set_return_address (rtx address, rtx scratch)
 
   if (total_size > 1024)
     {
-      emit_move_insn (scratch, GEN_INT (total_size - UNITS_PER_WORD));
-      emit_insn (gen_addsi3 (scratch, frame, scratch));
+      xtensa_emit_add_imm (scratch, frame, total_size - UNITS_PER_WORD,
+			   scratch, false);
       a0_addr = scratch;
     }
 
@@ -5101,15 +5175,7 @@ xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
   this_rtx = gen_rtx_REG (Pmode, A0_REG + this_reg_no);
 
   if (delta)
-    {
-      if (xtensa_simm8 (delta))
-	emit_insn (gen_addsi3 (this_rtx, this_rtx, GEN_INT (delta)));
-      else
-	{
-	  emit_move_insn (temp0, GEN_INT (delta));
-	  emit_insn (gen_addsi3 (this_rtx, this_rtx, temp0));
-	}
-    }
+    xtensa_emit_add_imm (this_rtx, this_rtx, delta, temp0, false);
 
   if (vcall_offset)
     {
@@ -5119,13 +5185,8 @@ xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
       emit_move_insn (temp0, gen_rtx_MEM (Pmode, this_rtx));
       if (xtensa_uimm8x4 (vcall_offset))
 	addr = plus_constant (Pmode, temp0, vcall_offset);
-      else if (xtensa_simm8 (vcall_offset))
-	emit_insn (gen_addsi3 (temp1, temp0, GEN_INT (vcall_offset)));
       else
-	{
-	  emit_move_insn (temp1, GEN_INT (vcall_offset));
-	  emit_insn (gen_addsi3 (temp1, temp0, temp1));
-	}
+	xtensa_emit_add_imm (temp1, temp0, vcall_offset, temp1, false);
       emit_move_insn (temp1, gen_rtx_MEM (Pmode, addr));
       emit_insn (gen_add2_insn (this_rtx, temp1));
     }
-- 
2.30.2

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH v2] xtensa: Optimize stack frame adjustment more
  2023-01-07  2:55 ` [PATCH v2] xtensa: Optimize stack frame adjustment more Takayuki 'January June' Suwa
@ 2023-01-07 20:23   ` Max Filippov
  0 siblings, 0 replies; 2+ messages in thread
From: Max Filippov @ 2023-01-07 20:23 UTC (permalink / raw)
  To: Takayuki 'January June' Suwa; +Cc: GCC Patches

On Fri, Jan 6, 2023 at 6:55 PM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> This patch introduces a convenient helper function for integer immediate
> addition with scratch register as needed, that splits and emits either
> up to two ADDI/ADDMI machine instructions or an addition by register
> following an integer immediate load (which may later be transformed by
> constantsynth).
>
> By using the helper function, it makes stack frame adjustment logic
> simplified and instruction count less in some cases.
>
> gcc/ChangeLog:
>
>         * config/xtensa/xtensa.cc
>         (xtensa_split_imm_two_addends, xtensa_emit_add_imm):
>         New helper functions.
>         (xtensa_set_return_address, xtensa_output_mi_thunk):
>         Change to use the helper function.
>         (xtensa_emit_adjust_stack_ptr): Ditto.
>         And also change to try reusing the content of scratch register
>         A9 if the register is not modified in the function body.
> ---
>  gcc/config/xtensa/xtensa.cc | 151 +++++++++++++++++++++++++-----------
>  1 file changed, 106 insertions(+), 45 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-01-07 20:24 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <19010306-4056-6f84-e555-e744f4f5061e.ref@yahoo.co.jp>
2023-01-07  2:55 ` [PATCH v2] xtensa: Optimize stack frame adjustment more Takayuki 'January June' Suwa
2023-01-07 20:23   ` Max Filippov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).