public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v7] xtensa: Eliminate the use of callee-saved register that saves and restores only once
       [not found] <20fe3e31-0660-386c-7e6d-bc0b6c0f64ad.ref@yahoo.co.jp>
@ 2023-02-17  7:54 ` Takayuki 'January June' Suwa
  2023-02-23 22:34   ` Max Filippov
  0 siblings, 1 reply; 2+ messages in thread
From: Takayuki 'January June' Suwa @ 2023-02-17  7:54 UTC (permalink / raw)
  To: GCC Patches; +Cc: Max Filippov

In the case of the CALL0 ABI, values that must be retained before and
after function calls are placed in the callee-saved registers (A12
through A15) and referenced later.  However, it is often the case that
the save and the reference are each only once and a simple register-
register move (with two exceptions; i. the register saved to/restored
from is the stack pointer, ii. the function needs an additional stack
pointer adjustment to grow the stack).

e.g. in the following example, if there are no other occurrences of
register A14:

;; before
	; prologue {
  ...
	s32i.n	a14, sp, 16
  ...				;; no frame pointer needed
				;; no additional stack growth
	; } prologue
  ...
	mov.n	a14, a6		;; A6 is not SP
  ...
	call0	foo
  ...
	mov.n	a8, a14		;; A8 is not SP
  ...
	; epilogue {
  ...
	l32i.n	a14, sp, 16
  ...
	; } epilogue

It can be possible like this:

;; after
	; prologue {
  ...
	(no save needed)
  ...
	; } prologue
  ...
	s32i.n	a6, sp, 16	;; replaced with A14's slot
  ...
	call0	foo
  ...
	l32i.n	a8, sp, 16	;; through SP
  ...
	; epilogue {
  ...
	(no restoration needed)
  ...
	; } epilogue

This patch adds the abovementioned logic to the function prologue/epilogue
RTL expander code.

gcc/ChangeLog:

	* config/xtensa/xtensa.cc (machine_function): Add new member
	'eliminated_callee_saved_regs'.
	(xtensa_can_eliminate_callee_saved_reg_p): New function to
	determine whether the register can be eliminated or not.
	(xtensa_expand_prologue): Add invoking the above function and
	elimination the use of callee-saved register by using its stack
	slot through the stack pointer (or the frame pointer if needed)
	directly.
	(xtensa_expand_prologue): Modify to not emit register restoration
	insn from its stack slot if the register is already eliminated.

gcc/testsuite/ChangeLog:

	* gcc.target/xtensa/elim_callee_saved.c: New.
---
 gcc/config/xtensa/xtensa.cc                   | 134 ++++++++++++++----
 .../gcc.target/xtensa/elim_callee_saved.c     |  37 +++++
 2 files changed, 146 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3e2e22d4cbe..d987f1dfede 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -105,6 +105,7 @@ struct GTY(()) machine_function
   bool epilogue_done;
   bool inhibit_logues_a1_adjusts;
   rtx last_logues_a9_content;
+  bitmap eliminated_callee_saved_regs;
 };
 
 static void xtensa_option_override (void);
@@ -3343,6 +3344,65 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags)
     cfun->machine->last_logues_a9_content = GEN_INT (offset);
 }
 
+static bool
+xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
+					 rtx_insn **p_insnS,
+					 rtx_insn **p_insnR)
+{
+  df_ref ref;
+  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
+  rtx pattern;
+
+  if (!optimize || !df || call_used_or_fixed_reg_p (regno)
+      || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM))
+    return false;
+
+  for (ref = DF_REG_DEF_CHAIN (regno);
+       ref; ref = DF_REF_NEXT_REG (ref))
+    if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+	|| DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+      continue;
+    else if (GET_CODE (pattern = PATTERN (insn)) == SET
+	     && REG_P (SET_DEST (pattern))
+	     && REGNO (SET_DEST (pattern)) == regno
+	     && REG_NREGS (SET_DEST (pattern)) == 1
+	     && REG_P (SET_SRC (pattern)))
+      {
+	if (insnS)
+	  return false;
+	insnS = insn;
+	continue;
+      }
+    else
+      return false;
+
+  for (ref = DF_REG_USE_CHAIN (regno);
+       ref; ref = DF_REF_NEXT_REG (ref))
+    if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+	|| DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+      continue;
+    else if (GET_CODE (pattern = PATTERN (insn)) == SET
+	     && REG_P (SET_SRC (pattern))
+	     && REGNO (SET_SRC (pattern)) == regno
+	     && REG_NREGS (SET_SRC (pattern)) == 1
+	     && REG_P (SET_DEST (pattern)))
+      {
+	if (insnR)
+	  return false;
+	insnR = insn;
+	continue;
+      }
+    else
+      return false;
+
+  if (!insnS || !insnR)
+    return false;
+
+  *p_insnS = insnS, *p_insnR = insnR;
+
+  return true;
+}
+
 /* minimum frame = reg save area (4 words) plus static chain (1 word)
    and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
@@ -3382,6 +3442,7 @@ xtensa_expand_prologue (void)
       df_ref ref;
       bool stack_pointer_needed = frame_pointer_needed
 				  || crtl->calls_eh_return;
+      bool large_stack_needed;
 
       /* Check if the function body really needs the stack pointer.  */
       if (!stack_pointer_needed && df)
@@ -3430,23 +3491,44 @@ xtensa_expand_prologue (void)
 	    }
 	}
 
+      cfun->machine->eliminated_callee_saved_regs
+	= bitmap_alloc (&bitmap_default_obstack);
+      large_stack_needed = total_size > 1024
+			   || (!callee_save_size && total_size > 128);
       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
-	{
-	  if (xtensa_call_save_reg(regno))
-	    {
-	      rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
-	      rtx mem = gen_frame_mem (SImode, x);
-	      rtx reg = gen_rtx_REG (SImode, regno);
+	if (xtensa_call_save_reg(regno))
+	  {
+	    rtx x = gen_rtx_PLUS (Pmode,
+				  stack_pointer_rtx, GEN_INT (offset));
+	    rtx mem = gen_frame_mem (SImode, x);
+	    rtx_insn *insnS, *insnR;
+
+	    if (!large_stack_needed
+		&& xtensa_can_eliminate_callee_saved_reg_p (regno,
+							    &insnS, &insnR))
+	      {
+		if (frame_pointer_needed)
+		  mem = replace_rtx (mem, stack_pointer_rtx,
+				     hard_frame_pointer_rtx);
+		SET_DEST (PATTERN (insnS)) = mem;
+		df_insn_rescan (insnS);
+		SET_SRC (PATTERN (insnR)) = copy_rtx (mem);
+		df_insn_rescan (insnR);
+		bitmap_set_bit (cfun->machine->eliminated_callee_saved_regs,
+				regno);
+	      }
+	    else
+	      {
+		rtx reg = gen_rtx_REG (SImode, regno);
 
-	      offset -= UNITS_PER_WORD;
-	      insn = emit_move_insn (mem, reg);
-	      RTX_FRAME_RELATED_P (insn) = 1;
-	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
-			    gen_rtx_SET (mem, reg));
-	    }
-	}
-      if (total_size > 1024
-	  || (!callee_save_size && total_size > 128))
+		insn = emit_move_insn (mem, reg);
+		RTX_FRAME_RELATED_P (insn) = 1;
+		add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			      gen_rtx_SET (mem, reg));
+	      }
+	    offset -= UNITS_PER_WORD;
+	  }
+      if (large_stack_needed)
 	xtensa_emit_adjust_stack_ptr (callee_save_size - total_size,
 				      ADJUST_SP_NEED_NOTE);
     }
@@ -3535,16 +3617,18 @@ xtensa_expand_epilogue (bool sibcall_p)
 	emit_insn (gen_blockage ());
 
       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
-	{
-	  if (xtensa_call_save_reg(regno))
-	    {
-	      rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
-
-	      offset -= UNITS_PER_WORD;
-	      emit_move_insn (gen_rtx_REG (SImode, regno),
-			      gen_frame_mem (SImode, x));
-	    }
-	}
+	if (xtensa_call_save_reg(regno))
+	  {
+	    if (! bitmap_bit_p (cfun->machine->eliminated_callee_saved_regs,
+				regno))
+	      {
+		rtx x = gen_rtx_PLUS (Pmode,
+				      stack_pointer_rtx, GEN_INT (offset));
+		emit_move_insn (gen_rtx_REG (SImode, regno),
+				gen_frame_mem (SImode, x));
+	      }
+	    offset -= UNITS_PER_WORD;
+	  }
       if (sibcall_p)
 	emit_use (gen_rtx_REG (SImode, A0_REG));
 
diff --git a/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c b/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c
new file mode 100644
index 00000000000..d123e6c01cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=call0" } */
+
+extern void foo(void);
+
+/* eliminated one register (the reservoir of variable 'a') by its stack slot through the stack pointer.  */
+int test0(int a) {
+  int array[252];  /* the maximum bound of non-large stack.  */
+  foo();
+  asm volatile("" : : "m"(array));
+  return a;
+}
+
+/* cannot eliminate if large stack is needed, because the offset from TOS cannot fit into single L32I/S32I instruction.  */
+int test1(int a) {
+  int array[10000];  /* requires large stack.  */
+  foo();
+  asm volatile("" : : "m"(array));
+  return a;
+}
+
+/* register A15 is the reservoir of the stack pointer and cannot be eliminated if the frame pointer is needed.
+   other registers still can be, but through the frame pointer rather the stack pointer.  */
+int test2(int a) {
+  int* p = __builtin_alloca(16);
+  foo();
+  asm volatile("" : : "r"(p));
+  return a;
+}
+
+/* in -O0 the composite hard registers may still remain unsplitted at pro_and_epilogue and must be excluded.  */
+extern double bar(void);
+int __attribute__((optimize(0))) test3(int a) {
+  return bar() + a;
+}
+
+/* { dg-final { scan-assembler-times "a15, 8" 2 } } */
-- 
2.30.2

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH v7] xtensa: Eliminate the use of callee-saved register that saves and restores only once
  2023-02-17  7:54 ` [PATCH v7] xtensa: Eliminate the use of callee-saved register that saves and restores only once Takayuki 'January June' Suwa
@ 2023-02-23 22:34   ` Max Filippov
  0 siblings, 0 replies; 2+ messages in thread
From: Max Filippov @ 2023-02-23 22:34 UTC (permalink / raw)
  To: Takayuki 'January June' Suwa; +Cc: GCC Patches

On Thu, Feb 16, 2023 at 11:54 PM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> In the case of the CALL0 ABI, values that must be retained before and
> after function calls are placed in the callee-saved registers (A12
> through A15) and referenced later.  However, it is often the case that
> the save and the reference are each only once and a simple register-
> register move (with two exceptions; i. the register saved to/restored
> from is the stack pointer, ii. the function needs an additional stack
> pointer adjustment to grow the stack).
>
> e.g. in the following example, if there are no other occurrences of
> register A14:
>
> ;; before
>         ; prologue {
>   ...
>         s32i.n  a14, sp, 16
>   ...                           ;; no frame pointer needed
>                                 ;; no additional stack growth
>         ; } prologue
>   ...
>         mov.n   a14, a6         ;; A6 is not SP
>   ...
>         call0   foo
>   ...
>         mov.n   a8, a14         ;; A8 is not SP
>   ...
>         ; epilogue {
>   ...
>         l32i.n  a14, sp, 16
>   ...
>         ; } epilogue
>
> It can be possible like this:
>
> ;; after
>         ; prologue {
>   ...
>         (no save needed)
>   ...
>         ; } prologue
>   ...
>         s32i.n  a6, sp, 16      ;; replaced with A14's slot
>   ...
>         call0   foo
>   ...
>         l32i.n  a8, sp, 16      ;; through SP
>   ...
>         ; epilogue {
>   ...
>         (no restoration needed)
>   ...
>         ; } epilogue
>
> This patch adds the abovementioned logic to the function prologue/epilogue
> RTL expander code.
>
> gcc/ChangeLog:
>
>         * config/xtensa/xtensa.cc (machine_function): Add new member
>         'eliminated_callee_saved_regs'.
>         (xtensa_can_eliminate_callee_saved_reg_p): New function to
>         determine whether the register can be eliminated or not.
>         (xtensa_expand_prologue): Add invoking the above function and
>         elimination the use of callee-saved register by using its stack
>         slot through the stack pointer (or the frame pointer if needed)
>         directly.
>         (xtensa_expand_prologue): Modify to not emit register restoration
>         insn from its stack slot if the register is already eliminated.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/xtensa/elim_callee_saved.c: New.
> ---
>  gcc/config/xtensa/xtensa.cc                   | 134 ++++++++++++++----
>  .../gcc.target/xtensa/elim_callee_saved.c     |  37 +++++
>  2 files changed, 146 insertions(+), 25 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-02-23 22:34 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20fe3e31-0660-386c-7e6d-bc0b6c0f64ad.ref@yahoo.co.jp>
2023-02-17  7:54 ` [PATCH v7] xtensa: Eliminate the use of callee-saved register that saves and restores only once Takayuki 'January June' Suwa
2023-02-23 22:34   ` Max Filippov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).