public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V modified add3 for large stack frame optimization [PR105733]
@ 2022-09-19 16:07 Kevin Lee
  2022-09-19 22:16 ` Kito Cheng
  0 siblings, 1 reply; 3+ messages in thread
From: Kevin Lee @ 2022-09-19 16:07 UTC (permalink / raw)
  To: gcc-patches; +Cc: gnu-toolchain

[-- Attachment #1: Type: text/plain, Size: 9208 bytes --]

Hello GCC,
 Started from Jim Wilson's patch in
https://github.com/riscv-admin/riscv-code-speed-optimization/blob/main/projects/gcc-optimizations.adoc
for the large stack frame optimization problem, this augmented patch
generates less instructions for cases such as
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105733.
Original:
foo:
li t0,-4096
addi t0,t0,2016
li a4,4096
add sp,sp,t0
li a5,-4096
addi a4,a4,-2032
add a4,a4,a5
addi a5,sp,16
add a5,a4,a5
add a0,a5,a0
li t0,4096
sd a5,8(sp)
sb zero,2032(a0)
addi t0,t0,-2016
add sp,sp,t0
jr ra
After Patch:
foo:
li t0,-4096
addi t0,t0,2032
add sp,sp,t0
addi a5,sp,-2032
add a0,a5,a0
li t0,4096
sb zero,2032(a0)
addi t0,t0,-2032
add sp,sp,t0
jr ra

               ========= Summary of gcc testsuite =========
                            | # of unexpected case / # of unique unexpected
case
                            |          gcc |          g++ |     gfortran |
     rv64gc/  lp64d/ medlow |    4 /     4 |   13 /     4 |    0 /     0 |
No additional failures were created from the testsuite.

gcc/ChangeLog:
   Jim Wilson <jim.wilson.gcc@gmail.com>
   Michael Collison <collison@rivosinc.com>
   Kevin Lee <kevinl@rivosinc.com>

* config/riscv/predicates.md (const_lui_operand): New predicate.
(add_operand): Ditto.
(reg_or_const_int_operand): Ditto.
* config/riscv/riscv-protos.h (riscv_eliminable_reg): New
    function.
* config/riscv/riscv.cc (riscv_eliminable_reg): New Function.
(riscv_adjust_libcall_cfi_prologue): Use gen_rtx_SET and
    gen_rtx_fmt_ee instead of gen_add3_insn.
(riscv_adjust_libcall_cfi_epilogue): ditto.
* config/riscv/riscv.md (addsi3): Remove.
(adddi3): ditto.
(add<mode>3): New instruction for large stack frame optimization.
(add<mode>3_internal): ditto
(add<mode>3_internal2): New instruction for insns generated in
the prologue and epilogue pass.
---
 gcc/config/riscv/predicates.md  | 13 +++++
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv.cc       | 20 ++++++--
 gcc/config/riscv/riscv.md       | 84 ++++++++++++++++++++++++++++-----
 4 files changed, 101 insertions(+), 17 deletions(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 862e72b0983..b98bb5a9768 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -35,6 +35,14 @@ (define_predicate "sfb_alu_operand"
   (ior (match_operand 0 "arith_operand")
        (match_operand 0 "lui_operand")))

+(define_predicate "const_lui_operand"
+  (and (match_code "const_int")
+       (match_test "(INTVAL (op) & 0xFFF) == 0 && INTVAL (op) != 0")))
+
+(define_predicate "add_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_lui_operand")))
+
 (define_predicate "const_csr_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 31)")))
@@ -59,6 +67,11 @@ (define_predicate "reg_or_0_operand"
   (ior (match_operand 0 "const_0_operand")
        (match_operand 0 "register_operand")))

+;; For use in adds, when adding to an eliminable register.
+(define_predicate "reg_or_const_int_operand"
+  (ior (match_code "const_int")
+       (match_operand 0 "register_operand")))
+
 ;; Only use branch-on-bit sequences when the mask is not an ANDI immediate.
 (define_predicate "branch_on_bit_operand"
   (and (match_code "const_int")
diff --git a/gcc/config/riscv/riscv-protos.h
b/gcc/config/riscv/riscv-protos.h
index 649c5c977e1..8f0aa8114be 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -63,6 +63,7 @@ extern void riscv_expand_conditional_move (rtx, rtx, rtx,
rtx_code, rtx, rtx);
 extern rtx riscv_legitimize_call_address (rtx);
 extern void riscv_set_return_address (rtx, rtx);
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_eliminable_reg (rtx);
 extern rtx riscv_return_addr (int, rtx);
 extern poly_int64 riscv_initial_elimination_offset (int, int);
 extern void riscv_expand_prologue (void);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 675d92c0961..b5577a4f366 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4320,6 +4320,16 @@ riscv_initial_elimination_offset (int from, int to)
   return src - dest;
 }

+/* Return true if X is a register that will be eliminated later on.  */
+bool
+riscv_eliminable_reg (rtx x)
+{
+  return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
+       || REGNO (x) == ARG_POINTER_REGNUM
+       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
+   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
+}
+
 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
    previous frame.  */

@@ -4521,8 +4531,9 @@ riscv_adjust_libcall_cfi_prologue ()
       }

   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
- stack_pointer_rtx, GEN_INT (-saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+            gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx),
+          stack_pointer_rtx, GEN_INT (saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
   dwarf);
   return dwarf;
@@ -4624,8 +4635,9 @@ riscv_adjust_libcall_cfi_epilogue ()
   int saved_size = cfun->machine->frame.save_libcall_adjustment;

   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
- stack_pointer_rtx, GEN_INT (saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+            gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx),
+          stack_pointer_rtx, GEN_INT (saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
   dwarf);

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 014206fb8bd..0285ac67b2a 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -438,23 +438,80 @@ (define_insn "add<mode>3"
   [(set_attr "type" "fadd")
    (set_attr "mode" "<UNITMODE>")])

-(define_insn "addsi3"
-  [(set (match_operand:SI          0 "register_operand" "=r,r")
- (plus:SI (match_operand:SI 1 "register_operand" " r,r")
- (match_operand:SI 2 "arith_operand"    " r,I")))]
+(define_expand "add<mode>3"
+  [(parallel
+    [(set (match_operand:GPR          0 "register_operand" "")
+  (plus:GPR (match_operand:GPR 1 "register_operand" "")
+   (match_operand:GPR 2 "add_operand" "")))
+    (clobber (match_scratch:GPR 3 ""))])]
   ""
-  "add%i2%~\t%0,%1,%2"
+{
+  if (riscv_eliminable_reg (operands[1]))
+  {
+    if (splittable_const_int_operand (operands[2], <MODE>mode))
+    {
+      /* The idea here is that we emit
+          add op0, op1, %hi(op2)
+          addi op0, op0, %lo(op2)
+        Then when op1, the eliminable reg, gets replaced with sp+offset,
+        we can simplify the constants.  */
+      HOST_WIDE_INT high_part = CONST_HIGH_PART (INTVAL (operands[2]));
+      emit_insn (gen_add<mode>3_internal (operands[0], operands[1],
+                  GEN_INT (high_part)));
+      operands[1] = operands[0];
+      operands[2] = GEN_INT (INTVAL (operands[2]) - high_part);
+    }
+    else if (! const_arith_operand (operands[2], <MODE>mode))
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+  }
+})
+
+(define_insn_and_split "add<mode>3_internal"
+  [(set (match_operand:GPR          0 "register_operand" "=r,r,&r,!&r")
+ (plus:GPR (match_operand:GPR 1 "register_operand" " %r,r,r,0")
+  (match_operand:GPR 2 "add_operand"      " r,I,L,L")))
+  (clobber (match_scratch:GPR 3 "=X,X,X,&r"))]
+  ""
+{
+  if ((which_alternative == 2) || (which_alternative == 3))
+    return "#";
+
+  if (TARGET_64BIT && <MODE>mode == SImode)
+    return "add%i2w\t%0,%1,%2";
+  else
+    return "add%i2\t%0,%1,%2";
+}
+  "&& reload_completed && const_lui_operand (operands[2], <MODE>mode)"
+  [(const_int 0)]
+{
+  if (REGNO (operands[0]) != REGNO (operands[1]))
+    {
+      emit_insn (gen_mov<mode> (operands[0], operands[2]));
+      emit_insn (gen_add<mode>3_internal (operands[0], operands[0],
operands[1]));
+    }
+  else
+    {
+      emit_insn (gen_mov<mode> (operands[3], operands[2]));
+      emit_insn (gen_add<mode>3_internal (operands[0], operands[0],
operands[3]));
+    }
+  DONE;
+}
   [(set_attr "type" "arith")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])

-(define_insn "adddi3"
-  [(set (match_operand:DI          0 "register_operand" "=r,r")
- (plus:DI (match_operand:DI 1 "register_operand" " r,r")
- (match_operand:DI 2 "arith_operand"    " r,I")))]
-  "TARGET_64BIT"
-  "add%i2\t%0,%1,%2"
+(define_insn "add<mode>3_internal2"
+  [(set (match_operand:GPR          0 "register_operand" "=r,r")
+ (plus:GPR (match_operand:GPR 1 "register_operand" " %r,r")
+  (match_operand:GPR 2 "arith_operand"    " r,I")))]
+  ""
+ {
+   if (TARGET_64BIT && <MODE>mode == SImode)
+     return "add%i2w\t%0,%1,%2";
+   else
+     return "add%i2\t%0,%1,%2";
+ }
   [(set_attr "type" "arith")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "<MODE>")])

 (define_expand "addv<mode>4"
   [(set (match_operand:GPR           0 "register_operand" "=r,r")
@@ -500,6 +557,7 @@ (define_expand "addv<mode>4"
   DONE;
 })

+
 (define_expand "uaddv<mode>4"
   [(set (match_operand:GPR           0 "register_operand" "=r,r")
  (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
-- 
2.25.1

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-09-21  0:04 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-19 16:07 [PATCH] RISC-V modified add3 for large stack frame optimization [PR105733] Kevin Lee
2022-09-19 22:16 ` Kito Cheng
2022-09-21  0:04   ` Kevin Lee

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).