public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: optimize stack manipulation in save-restore
@ 2022-11-30  8:37 Fei Gao
  2022-11-30 22:50 ` Palmer Dabbelt
  0 siblings, 1 reply; 4+ messages in thread
From: Fei Gao @ 2022-11-30  8:37 UTC (permalink / raw)
  To: gcc-patches; +Cc: jeffreyalaw, kito.cheng, palmer, Fei Gao

The stack that save-restore reserves is not well accumulated in stack allocation and deallocation.
This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled,
and also a much clear logic for save-restore stack manipulation.

before patch:
	bar:
		call	t0,__riscv_save_4
		addi	sp,sp,-64
		...
		li	t0,-12288
		addi	t0,t0,-1968 # optimized out after patch
		add	sp,sp,t0 # prologue
		...
		li	t0,12288 # epilogue
		addi	t0,t0,2000 # optimized out after patch
		add	sp,sp,t0
		...
		addi	sp,sp,32
		tail	__riscv_restore_4

after patch:
	bar:
		call	t0,__riscv_save_4
		addi	sp,sp,-2032
		...
		li	t0,-12288
		add	sp,sp,t0 # prologue
		...
		li	t0,12288 # epilogue
		add	sp,sp,t0
		...
		addi	sp,sp,2032
		tail	__riscv_restore_4

gcc/ChangeLog:

        * config/riscv/riscv.cc (riscv_first_stack_step): add a new function parameter remaining_size.
        (riscv_compute_frame_info): adapt new riscv_first_stack_step interface.
        (riscv_expand_prologue): consider save-restore in stack allocation.
        (riscv_expand_epilogue): consider save-restore in stack deallocation.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/stack_save_restore.c: New test.
---
 gcc/config/riscv/riscv.cc                     | 58 ++++++++++---------
 .../gcc.target/riscv/stack_save_restore.c     | 40 +++++++++++++
 2 files changed, 70 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 05bdba5ab4d..9e92e729a5f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4634,7 +4634,7 @@ riscv_save_libcall_count (unsigned mask)
    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
    hard_frame_pointer_rtx unchanged.  */
 
-static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame);
+static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
 
 /* Handle stack align for poly_int.  */
 static poly_int64
@@ -4663,7 +4663,7 @@ riscv_compute_frame_info (void)
      save/restore t0.  We check for this before clearing the frame struct.  */
   if (cfun->machine->interrupt_handler_p)
     {
-      HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
+      HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
       if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1)))
 	interrupt_save_prologue_temp = true;
     }
@@ -4913,31 +4913,31 @@ riscv_restore_reg (rtx reg, rtx mem)
    without adding extra instructions.  */
 
 static HOST_WIDE_INT
-riscv_first_stack_step (struct riscv_frame_info *frame)
+riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
 {
-  HOST_WIDE_INT frame_total_constant_size;
-  if (!frame->total_size.is_constant ())
-    frame_total_constant_size
-      = riscv_stack_align (frame->total_size.coeffs[0])
-	- riscv_stack_align (frame->total_size.coeffs[1]);
+  HOST_WIDE_INT remaining_const_size;
+  if (!remaining_size.is_constant ())
+    remaining_const_size
+      = riscv_stack_align (remaining_size.coeffs[0])
+	- riscv_stack_align (remaining_size.coeffs[1]);
   else
-    frame_total_constant_size = frame->total_size.to_constant ();
+    remaining_const_size = remaining_size.to_constant ();
 
-  if (SMALL_OPERAND (frame_total_constant_size))
-    return frame_total_constant_size;
+  if (SMALL_OPERAND (remaining_const_size))
+    return remaining_const_size;
 
   HOST_WIDE_INT min_first_step =
-    RISCV_STACK_ALIGN ((frame->total_size - frame->frame_pointer_offset).to_constant());
+    RISCV_STACK_ALIGN ((remaining_size - frame->frame_pointer_offset).to_constant());
   HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
-  HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step;
+  HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
   gcc_assert (min_first_step <= max_first_step);
 
   /* As an optimization, use the least-significant bits of the total frame
      size, so that the second adjustment step is just LUI + ADD.  */
   if (!SMALL_OPERAND (min_second_step)
-      && frame_total_constant_size % IMM_REACH < IMM_REACH / 2
-      && frame_total_constant_size % IMM_REACH >= min_first_step)
-    return frame_total_constant_size % IMM_REACH;
+      && remaining_const_size % IMM_REACH < IMM_REACH / 2
+      && remaining_const_size % IMM_REACH >= min_first_step)
+    return remaining_const_size % IMM_REACH;
 
   if (TARGET_RVC)
     {
@@ -5037,9 +5037,7 @@ riscv_expand_prologue (void)
   /* Save the registers.  */
   if ((frame->mask | frame->fmask) != 0)
     {
-      HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
-      if (size.is_constant ())
-	step1 = MIN (size.to_constant(), step1);
+      HOST_WIDE_INT step1 = riscv_first_stack_step (frame, size);
 
       insn = gen_add3_insn (stack_pointer_rtx,
 			    stack_pointer_rtx,
@@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style)
   HOST_WIDE_INT step2 = 0;
   bool use_restore_libcall = ((style == NORMAL_RETURN)
 			      && riscv_use_save_libcall (frame));
+  unsigned libcall_size = use_restore_libcall ?
+                            frame->save_libcall_adjustment : 0;
   rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
   rtx insn;
 
@@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style)
       REG_NOTES (insn) = dwarf;
     }
 
+  if (use_restore_libcall)
+    frame->mask = 0; /* Temporarily fib for GPRs.  */
+
   /* If we need to restore registers, deallocate as much stack as
      possible in the second step without going out of range.  */
   if ((frame->mask | frame->fmask) != 0)
-    {
-      step2 = riscv_first_stack_step (frame);
-      step1 -= step2;
-    }
+    step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
+
+  if (use_restore_libcall)
+    frame->mask = mask; /* Undo the above fib.  */
+
+  step1 -= step2 + libcall_size;
 
   /* Set TARGET to BASE + STEP1.  */
   if (known_gt (step1, 0))
@@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style)
     frame->mask = 0; /* Temporarily fib that we need not save GPRs.  */
 
   /* Restore the registers.  */
-  riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg,
+  riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size,
+                            riscv_restore_reg,
 			    true, style == EXCEPTION_RETURN);
 
   if (use_restore_libcall)
-    {
       frame->mask = mask; /* Undo the above fib.  */
-      gcc_assert (step2 >= frame->save_libcall_adjustment);
-      step2 -= frame->save_libcall_adjustment;
-    }
 
   if (need_barrier_p)
     riscv_emit_stack_tie ();
diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c
new file mode 100644
index 00000000000..4695ef9469a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+char my_getchar();
+float getf();
+
+/*
+**bar:
+**	call	t0,__riscv_save_4
+**	addi	sp,sp,-2032
+**	...
+**	li	t0,-12288
+**	add	sp,sp,t0
+**	...
+**	li	t0,12288
+**	add	sp,sp,t0
+**	...
+**	addi	sp,sp,2032
+**	tail	__riscv_restore_4
+*/
+int bar()
+{
+  float volatile farray[3568];
+
+  float sum = 0;
+  float f1 = getf();
+  float f2 = getf();
+  float f3 = getf();
+  float f4 = getf();
+
+  for (int i = 0; i < 3568; i++)
+  {
+    farray[i] = my_getchar() * 1.2;
+    sum += farray[i];
+  }
+
+  return sum + f1 + f2 + f3 + f4;
+}
+
-- 
2.17.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-12-06  1:13 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-30  8:37 [PATCH] RISC-V: optimize stack manipulation in save-restore Fei Gao
2022-11-30 22:50 ` Palmer Dabbelt
2022-12-01  3:07   ` Fei Gao
2022-12-06  1:13     ` Fei Gao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).