public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v3] RISC-V modified add3 for large stack frame optimization [PR105733]
@ 2022-11-04  0:53 Kevin Lee
  2022-11-04  0:59 ` Kito Cheng
  2022-11-23  5:07 ` Jeff Law
  0 siblings, 2 replies; 3+ messages in thread
From: Kevin Lee @ 2022-11-04  0:53 UTC (permalink / raw)
  To: gcc-patches; +Cc: gnu-toolchain, Kevin Lee

This is the identical patch with https://gcc.gnu.org/pipermail/gcc-patches/2022-November/604814.html, but with the correct plaintext format. 

>The loop still seems a bit odd which may point to further improvements 
>that could be made to this patch.  Consider this fragment of the loop:

Thank you for the review Jeff! I am currently looking into this issue
in a different patch. I'll come back with some improvement.
 
gcc/ChangeLog:
   Jim Wilson <jim.wilson.gcc@gmail.com>
   Michael Collison <collison@rivosinc.com>
   Kevin Lee <kevinl@rivosinc.com>
   
	* config/riscv/predicates.md (const_lui_operand): New Predicate.
	(add_operand): Ditto.
	(reg_or_const_int_operand): Ditto.
	* config/riscv/riscv-protos.h (riscv_eliminable_reg): New 
   function.
	* config/riscv/riscv-selftests.cc (calculate_x_in_sequence):
   Consider Parallel insns.
	* config/riscv/riscv.cc (riscv_eliminable_reg): New function.
	(riscv_adjust_libcall_cfi_prologue): Use gen_rtx_SET and
   gen_rtx_fmt_ee instead of gen_add3_insn.
	(riscv_adjust_libcall_cfi_epilogue): Ditto.
	* config/riscv/riscv.md (addsi3): Remove.
	(add<mode>3): New instruction for large stack frame
   optimization.
	(add<mode>3_internal): Ditto.
	(adddi3): Remove.
	(add<mode>3_internal2): New instruction for insns generated in
   the prologue and epilogue pass.
---
 gcc/config/riscv/predicates.md      | 13 +++++
 gcc/config/riscv/riscv-protos.h     |  1 +
 gcc/config/riscv/riscv-selftests.cc |  3 ++
 gcc/config/riscv/riscv.cc           | 20 +++++--
 gcc/config/riscv/riscv.md           | 84 ++++++++++++++++++++++++-----
 5 files changed, 104 insertions(+), 17 deletions(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index c2ff41bb0fd..3149f7227ac 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -35,6 +35,14 @@
   (ior (match_operand 0 "arith_operand")
        (match_operand 0 "lui_operand")))
 
+(define_predicate "const_lui_operand"
+  (and (match_code "const_int")
+       (match_test "(INTVAL (op) & 0xFFF) == 0 && INTVAL (op) != 0")))
+
+(define_predicate "add_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_lui_operand")))
+
 (define_predicate "const_csr_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 31)")))
@@ -59,6 +67,11 @@
   (ior (match_operand 0 "const_0_operand")
        (match_operand 0 "register_operand")))
 
+;; For use in adds, when adding to an eliminable register.
+(define_predicate "reg_or_const_int_operand"
+  (ior (match_code "const_int")
+       (match_operand 0 "register_operand")))
+
 ;; Only use branch-on-bit sequences when the mask is not an ANDI immediate.
 (define_predicate "branch_on_bit_operand"
   (and (match_code "const_int")
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a718bb62b4..9348ac71956 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -63,6 +63,7 @@ extern void riscv_expand_conditional_move (rtx, rtx, rtx, rtx_code, rtx, rtx);
 extern rtx riscv_legitimize_call_address (rtx);
 extern void riscv_set_return_address (rtx, rtx);
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_eliminable_reg (rtx);
 extern rtx riscv_return_addr (int, rtx);
 extern poly_int64 riscv_initial_elimination_offset (int, int);
 extern void riscv_expand_prologue (void);
diff --git a/gcc/config/riscv/riscv-selftests.cc b/gcc/config/riscv/riscv-selftests.cc
index 636874ebc0f..50457db708e 100644
--- a/gcc/config/riscv/riscv-selftests.cc
+++ b/gcc/config/riscv/riscv-selftests.cc
@@ -116,6 +116,9 @@ calculate_x_in_sequence (rtx reg)
       rtx pat = PATTERN (insn);
       rtx dest = SET_DEST (pat);
 
+      if (GET_CODE (pat) == PARALLEL)
+	dest = SET_DEST (XVECEXP (pat, 0, 0));
+
       if (GET_CODE (pat) == CLOBBER)
 	continue;
 
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 32f9ef9ade9..de9344b37a3 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4686,6 +4686,16 @@ riscv_initial_elimination_offset (int from, int to)
   return src - dest;
 }
 
+/* Return true if X is a register that will be eliminated later on.  */
+bool
+riscv_eliminable_reg (rtx x)
+{
+  return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
+		       || REGNO (x) == ARG_POINTER_REGNUM
+		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
+			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
+}
+
 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
    previous frame.  */
 
@@ -4887,8 +4897,9 @@ riscv_adjust_libcall_cfi_prologue ()
       }
 
   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-				 stack_pointer_rtx, GEN_INT (-saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+				    gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx),
+						  stack_pointer_rtx, GEN_INT (saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
 			  dwarf);
   return dwarf;
@@ -4990,8 +5001,9 @@ riscv_adjust_libcall_cfi_epilogue ()
   int saved_size = cfun->machine->frame.save_libcall_adjustment;
 
   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-				 stack_pointer_rtx, GEN_INT (saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+				    gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx),
+						  stack_pointer_rtx, GEN_INT (saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
 			  dwarf);
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 798f7370a08..985dbdd50c4 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -446,23 +446,80 @@
   [(set_attr "type" "fadd")
    (set_attr "mode" "<UNITMODE>")])
 
-(define_insn "addsi3"
-  [(set (match_operand:SI          0 "register_operand" "=r,r")
-	(plus:SI (match_operand:SI 1 "register_operand" " r,r")
-		 (match_operand:SI 2 "arith_operand"    " r,I")))]
+(define_expand "add<mode>3"
+  [(parallel
+    [(set (match_operand:GPR          0 "register_operand" "")
+	  (plus:GPR (match_operand:GPR 1 "register_operand" "")
+		   (match_operand:GPR 2 "add_operand" "")))
+    (clobber (match_scratch:GPR 3 ""))])]
   ""
-  "add%i2%~\t%0,%1,%2"
+{
+  if (riscv_eliminable_reg (operands[1]))
+  {
+    if (splittable_const_int_operand (operands[2], <MODE>mode))
+    {
+      /* The idea here is that we emit
+          add op0, op1, %hi(op2)
+          addi op0, op0, %lo(op2)
+        Then when op1, the eliminable reg, gets replaced with sp+offset,
+        we can simplify the constants.  */
+      HOST_WIDE_INT high_part = CONST_HIGH_PART (INTVAL (operands[2]));
+      emit_insn (gen_add<mode>3_internal (operands[0], operands[1],
+                  GEN_INT (high_part)));
+      operands[1] = operands[0];
+      operands[2] = GEN_INT (INTVAL (operands[2]) - high_part);
+    }
+    else if (! const_arith_operand (operands[2], <MODE>mode))
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+  }
+})
+
+(define_insn_and_split "add<mode>3_internal"
+  [(set (match_operand:GPR          0 "register_operand" "=r,r,&r,!&r")
+	(plus:GPR (match_operand:GPR 1 "register_operand" " %r,r,r,0")
+		  (match_operand:GPR 2 "add_operand"      " r,I,L,L")))
+  (clobber (match_scratch:GPR 3 "=X,X,X,&r"))]
+  ""
+{
+  if ((which_alternative == 2) || (which_alternative == 3))
+    return "#";
+
+  if (TARGET_64BIT && <MODE>mode == SImode)
+    return "add%i2w\t%0,%1,%2";
+  else
+    return "add%i2\t%0,%1,%2";
+}
+  "&& reload_completed && const_lui_operand (operands[2], <MODE>mode)"
+  [(const_int 0)]
+{
+  if (REGNO (operands[0]) != REGNO (operands[1]))
+    {
+      emit_insn (gen_mov<mode> (operands[0], operands[2]));
+      emit_insn (gen_add<mode>3_internal (operands[0], operands[0], operands[1]));
+    }
+  else
+    {
+      emit_insn (gen_mov<mode> (operands[3], operands[2]));
+      emit_insn (gen_add<mode>3_internal (operands[0], operands[0], operands[3]));
+    }
+  DONE;
+}
   [(set_attr "type" "arith")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "adddi3"
-  [(set (match_operand:DI          0 "register_operand" "=r,r")
-	(plus:DI (match_operand:DI 1 "register_operand" " r,r")
-		 (match_operand:DI 2 "arith_operand"    " r,I")))]
-  "TARGET_64BIT"
-  "add%i2\t%0,%1,%2"
+(define_insn "add<mode>3_internal2"
+  [(set (match_operand:GPR          0 "register_operand" "=r,r")
+	(plus:GPR (match_operand:GPR 1 "register_operand" " %r,r")
+		  (match_operand:GPR 2 "arith_operand"    " r,I")))]
+  ""
+ {
+   if (TARGET_64BIT && <MODE>mode == SImode)
+     return "add%i2w\t%0,%1,%2";
+   else
+     return "add%i2\t%0,%1,%2";
+ }
   [(set_attr "type" "arith")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_expand "addv<mode>4"
   [(set (match_operand:GPR           0 "register_operand" "=r,r")
@@ -508,6 +565,7 @@
   DONE;
 })
 
+
 (define_expand "uaddv<mode>4"
   [(set (match_operand:GPR           0 "register_operand" "=r,r")
 	(plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
-- 
2.25.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] RISC-V modified add3 for large stack frame optimization [PR105733]
  2022-11-04  0:53 [PATCH v3] RISC-V modified add3 for large stack frame optimization [PR105733] Kevin Lee
@ 2022-11-04  0:59 ` Kito Cheng
  2022-11-23  5:07 ` Jeff Law
  1 sibling, 0 replies; 3+ messages in thread
From: Kito Cheng @ 2022-11-04  0:59 UTC (permalink / raw)
  To: Kevin Lee; +Cc: gcc-patches, gnu-toolchain, Jeff Law

I would like to see some benchmark results instead of just a simple
case, to make sure everything is alright, the add pattern is used
literally anywhere, my most worry is the clobber might bring some
negative impact like cause register pressure estimation get higher,
and then result worse code gen.

Dynamic instruction count is good enough and no cycle count or
complete spec score.

On Thu, Nov 3, 2022 at 5:57 PM Kevin Lee <kevinl@rivosinc.com> wrote:
>
> This is the identical patch with https://gcc.gnu.org/pipermail/gcc-patches/2022-November/604814.html, but with the correct plaintext format.
>
> >The loop still seems a bit odd which may point to further improvements
> >that could be made to this patch.  Consider this fragment of the loop:
>
> Thank you for the review Jeff! I am currently looking into this issue
> in a different patch. I'll come back with some improvement.
>
> gcc/ChangeLog:
>    Jim Wilson <jim.wilson.gcc@gmail.com>
>    Michael Collison <collison@rivosinc.com>
>    Kevin Lee <kevinl@rivosinc.com>
>
>         * config/riscv/predicates.md (const_lui_operand): New Predicate.
>         (add_operand): Ditto.
>         (reg_or_const_int_operand): Ditto.
>         * config/riscv/riscv-protos.h (riscv_eliminable_reg): New
>    function.
>         * config/riscv/riscv-selftests.cc (calculate_x_in_sequence):
>    Consider Parallel insns.
>         * config/riscv/riscv.cc (riscv_eliminable_reg): New function.
>         (riscv_adjust_libcall_cfi_prologue): Use gen_rtx_SET and
>    gen_rtx_fmt_ee instead of gen_add3_insn.
>         (riscv_adjust_libcall_cfi_epilogue): Ditto.
>         * config/riscv/riscv.md (addsi3): Remove.
>         (add<mode>3): New instruction for large stack frame
>    optimization.
>         (add<mode>3_internal): Ditto.
>         (adddi3): Remove.
>         (add<mode>3_internal2): New instruction for insns generated in
>    the prologue and epilogue pass.
> ---
>  gcc/config/riscv/predicates.md      | 13 +++++
>  gcc/config/riscv/riscv-protos.h     |  1 +
>  gcc/config/riscv/riscv-selftests.cc |  3 ++
>  gcc/config/riscv/riscv.cc           | 20 +++++--
>  gcc/config/riscv/riscv.md           | 84 ++++++++++++++++++++++++-----
>  5 files changed, 104 insertions(+), 17 deletions(-)
>
> diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
> index c2ff41bb0fd..3149f7227ac 100644
> --- a/gcc/config/riscv/predicates.md
> +++ b/gcc/config/riscv/predicates.md
> @@ -35,6 +35,14 @@
>    (ior (match_operand 0 "arith_operand")
>         (match_operand 0 "lui_operand")))
>
> +(define_predicate "const_lui_operand"
> +  (and (match_code "const_int")
> +       (match_test "(INTVAL (op) & 0xFFF) == 0 && INTVAL (op) != 0")))
> +
> +(define_predicate "add_operand"
> +  (ior (match_operand 0 "arith_operand")
> +       (match_operand 0 "const_lui_operand")))
> +
>  (define_predicate "const_csr_operand"
>    (and (match_code "const_int")
>         (match_test "IN_RANGE (INTVAL (op), 0, 31)")))
> @@ -59,6 +67,11 @@
>    (ior (match_operand 0 "const_0_operand")
>         (match_operand 0 "register_operand")))
>
> +;; For use in adds, when adding to an eliminable register.
> +(define_predicate "reg_or_const_int_operand"
> +  (ior (match_code "const_int")
> +       (match_operand 0 "register_operand")))
> +
>  ;; Only use branch-on-bit sequences when the mask is not an ANDI immediate.
>  (define_predicate "branch_on_bit_operand"
>    (and (match_code "const_int")
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index 5a718bb62b4..9348ac71956 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -63,6 +63,7 @@ extern void riscv_expand_conditional_move (rtx, rtx, rtx, rtx_code, rtx, rtx);
>  extern rtx riscv_legitimize_call_address (rtx);
>  extern void riscv_set_return_address (rtx, rtx);
>  extern bool riscv_expand_block_move (rtx, rtx, rtx);
> +extern bool riscv_eliminable_reg (rtx);
>  extern rtx riscv_return_addr (int, rtx);
>  extern poly_int64 riscv_initial_elimination_offset (int, int);
>  extern void riscv_expand_prologue (void);
> diff --git a/gcc/config/riscv/riscv-selftests.cc b/gcc/config/riscv/riscv-selftests.cc
> index 636874ebc0f..50457db708e 100644
> --- a/gcc/config/riscv/riscv-selftests.cc
> +++ b/gcc/config/riscv/riscv-selftests.cc
> @@ -116,6 +116,9 @@ calculate_x_in_sequence (rtx reg)
>        rtx pat = PATTERN (insn);
>        rtx dest = SET_DEST (pat);
>
> +      if (GET_CODE (pat) == PARALLEL)
> +       dest = SET_DEST (XVECEXP (pat, 0, 0));
> +
>        if (GET_CODE (pat) == CLOBBER)
>         continue;
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 32f9ef9ade9..de9344b37a3 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -4686,6 +4686,16 @@ riscv_initial_elimination_offset (int from, int to)
>    return src - dest;
>  }
>
> +/* Return true if X is a register that will be eliminated later on.  */
> +bool
> +riscv_eliminable_reg (rtx x)
> +{
> +  return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
> +                      || REGNO (x) == ARG_POINTER_REGNUM
> +                      || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
> +                          && REGNO (x) <= LAST_VIRTUAL_REGISTER));
> +}
> +
>  /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
>     previous frame.  */
>
> @@ -4887,8 +4897,9 @@ riscv_adjust_libcall_cfi_prologue ()
>        }
>
>    /* Debug info for adjust sp.  */
> -  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
> -                                stack_pointer_rtx, GEN_INT (-saved_size));
> +  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
> +                                   gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx),
> +                                                 stack_pointer_rtx, GEN_INT (saved_size)));
>    dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
>                           dwarf);
>    return dwarf;
> @@ -4990,8 +5001,9 @@ riscv_adjust_libcall_cfi_epilogue ()
>    int saved_size = cfun->machine->frame.save_libcall_adjustment;
>
>    /* Debug info for adjust sp.  */
> -  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
> -                                stack_pointer_rtx, GEN_INT (saved_size));
> +  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
> +                                   gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx),
> +                                                 stack_pointer_rtx, GEN_INT (saved_size)));
>    dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
>                           dwarf);
>
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 798f7370a08..985dbdd50c4 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -446,23 +446,80 @@
>    [(set_attr "type" "fadd")
>     (set_attr "mode" "<UNITMODE>")])
>
> -(define_insn "addsi3"
> -  [(set (match_operand:SI          0 "register_operand" "=r,r")
> -       (plus:SI (match_operand:SI 1 "register_operand" " r,r")
> -                (match_operand:SI 2 "arith_operand"    " r,I")))]
> +(define_expand "add<mode>3"
> +  [(parallel
> +    [(set (match_operand:GPR          0 "register_operand" "")
> +         (plus:GPR (match_operand:GPR 1 "register_operand" "")
> +                  (match_operand:GPR 2 "add_operand" "")))
> +    (clobber (match_scratch:GPR 3 ""))])]
>    ""
> -  "add%i2%~\t%0,%1,%2"
> +{
> +  if (riscv_eliminable_reg (operands[1]))
> +  {
> +    if (splittable_const_int_operand (operands[2], <MODE>mode))
> +    {
> +      /* The idea here is that we emit
> +          add op0, op1, %hi(op2)
> +          addi op0, op0, %lo(op2)
> +        Then when op1, the eliminable reg, gets replaced with sp+offset,
> +        we can simplify the constants.  */
> +      HOST_WIDE_INT high_part = CONST_HIGH_PART (INTVAL (operands[2]));
> +      emit_insn (gen_add<mode>3_internal (operands[0], operands[1],
> +                  GEN_INT (high_part)));
> +      operands[1] = operands[0];
> +      operands[2] = GEN_INT (INTVAL (operands[2]) - high_part);
> +    }
> +    else if (! const_arith_operand (operands[2], <MODE>mode))
> +      operands[2] = force_reg (<MODE>mode, operands[2]);
> +  }
> +})
> +
> +(define_insn_and_split "add<mode>3_internal"
> +  [(set (match_operand:GPR          0 "register_operand" "=r,r,&r,!&r")
> +       (plus:GPR (match_operand:GPR 1 "register_operand" " %r,r,r,0")
> +                 (match_operand:GPR 2 "add_operand"      " r,I,L,L")))
> +  (clobber (match_scratch:GPR 3 "=X,X,X,&r"))]
> +  ""
> +{
> +  if ((which_alternative == 2) || (which_alternative == 3))
> +    return "#";
> +
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +    return "add%i2w\t%0,%1,%2";
> +  else
> +    return "add%i2\t%0,%1,%2";
> +}
> +  "&& reload_completed && const_lui_operand (operands[2], <MODE>mode)"
> +  [(const_int 0)]
> +{
> +  if (REGNO (operands[0]) != REGNO (operands[1]))
> +    {
> +      emit_insn (gen_mov<mode> (operands[0], operands[2]));
> +      emit_insn (gen_add<mode>3_internal (operands[0], operands[0], operands[1]));
> +    }
> +  else
> +    {
> +      emit_insn (gen_mov<mode> (operands[3], operands[2]));
> +      emit_insn (gen_add<mode>3_internal (operands[0], operands[0], operands[3]));
> +    }
> +  DONE;
> +}
>    [(set_attr "type" "arith")
> -   (set_attr "mode" "SI")])
> +   (set_attr "mode" "<MODE>")])
>
> -(define_insn "adddi3"
> -  [(set (match_operand:DI          0 "register_operand" "=r,r")
> -       (plus:DI (match_operand:DI 1 "register_operand" " r,r")
> -                (match_operand:DI 2 "arith_operand"    " r,I")))]
> -  "TARGET_64BIT"
> -  "add%i2\t%0,%1,%2"
> +(define_insn "add<mode>3_internal2"
> +  [(set (match_operand:GPR          0 "register_operand" "=r,r")
> +       (plus:GPR (match_operand:GPR 1 "register_operand" " %r,r")
> +                 (match_operand:GPR 2 "arith_operand"    " r,I")))]
> +  ""
> + {
> +   if (TARGET_64BIT && <MODE>mode == SImode)
> +     return "add%i2w\t%0,%1,%2";
> +   else
> +     return "add%i2\t%0,%1,%2";
> + }
>    [(set_attr "type" "arith")
> -   (set_attr "mode" "DI")])
> +   (set_attr "mode" "<MODE>")])
>
>  (define_expand "addv<mode>4"
>    [(set (match_operand:GPR           0 "register_operand" "=r,r")
> @@ -508,6 +565,7 @@
>    DONE;
>  })
>
> +
>  (define_expand "uaddv<mode>4"
>    [(set (match_operand:GPR           0 "register_operand" "=r,r")
>         (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] RISC-V modified add3 for large stack frame optimization [PR105733]
  2022-11-04  0:53 [PATCH v3] RISC-V modified add3 for large stack frame optimization [PR105733] Kevin Lee
  2022-11-04  0:59 ` Kito Cheng
@ 2022-11-23  5:07 ` Jeff Law
  1 sibling, 0 replies; 3+ messages in thread
From: Jeff Law @ 2022-11-23  5:07 UTC (permalink / raw)
  To: Kevin Lee, gcc-patches; +Cc: gnu-toolchain


On 11/3/22 18:53, Kevin Lee wrote:
> This is the identical patch with https://gcc.gnu.org/pipermail/gcc-patches/2022-November/604814.html, but with the correct plaintext format.
>
>> The loop still seems a bit odd which may point to further improvements
>> that could be made to this patch.  Consider this fragment of the loop:
> Thank you for the review Jeff! I am currently looking into this issue
> in a different patch. I'll come back with some improvement.
>   
> gcc/ChangeLog:
>     Jim Wilson <jim.wilson.gcc@gmail.com>
>     Michael Collison <collison@rivosinc.com>
>     Kevin Lee <kevinl@rivosinc.com>
>     
> 	* config/riscv/predicates.md (const_lui_operand): New Predicate.
> 	(add_operand): Ditto.
> 	(reg_or_const_int_operand): Ditto.
> 	* config/riscv/riscv-protos.h (riscv_eliminable_reg): New
>     function.
> 	* config/riscv/riscv-selftests.cc (calculate_x_in_sequence):
>     Consider Parallel insns.
> 	* config/riscv/riscv.cc (riscv_eliminable_reg): New function.
> 	(riscv_adjust_libcall_cfi_prologue): Use gen_rtx_SET and
>     gen_rtx_fmt_ee instead of gen_add3_insn.
> 	(riscv_adjust_libcall_cfi_epilogue): Ditto.
> 	* config/riscv/riscv.md (addsi3): Remove.
> 	(add<mode>3): New instruction for large stack frame
>     optimization.
> 	(add<mode>3_internal): Ditto.
> 	(adddi3): Remove.
> 	(add<mode>3_internal2): New instruction for insns generated in
>     the prologue and epilogue pass.
> ---
>
> diff --git a/gcc/config/riscv/riscv-selftests.cc b/gcc/config/riscv/riscv-selftests.cc
> index 636874ebc0f..50457db708e 100644
> --- a/gcc/config/riscv/riscv-selftests.cc
> +++ b/gcc/config/riscv/riscv-selftests.cc
> @@ -116,6 +116,9 @@ calculate_x_in_sequence (rtx reg)
>         rtx pat = PATTERN (insn);
>         rtx dest = SET_DEST (pat);
>   
> +      if (GET_CODE (pat) == PARALLEL)
> +	dest = SET_DEST (XVECEXP (pat, 0, 0));

So this assumes you've got a parallel where the first vector is a SET.  
That may well be true, but it's probably safer to verify with something like


     gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET)


That way we're not surprised in the future if we have more patterns that 
use PARALLEL, perhaps with the first not being a simple set.


> +{
> +  return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
> +		       || REGNO (x) == ARG_POINTER_REGNUM
> +		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
> +			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));

Instead I'd write it as


   return (REG_P (x)
           && (REGNO (x) == FRAME_POINTER_REGNUM
               || REGNO (x) == ARG_POINTER_REGNUM
               || (REGNO (x) >= FIRST_VIRUTAL_REGISTER
                   && REGNO (x) <= LAST_VIRTUAL_REGISTER)));


That's just the style most GCC folks are used to reading.


> @@ -4887,8 +4897,9 @@ riscv_adjust_libcall_cfi_prologue ()
>         }
>   
>     /* Debug info for adjust sp.  */
> -  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
> -				 stack_pointer_rtx, GEN_INT (-saved_size));
> +  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
> +				    gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx),
> +						  stack_pointer_rtx, GEN_INT (saved_size)));
>     dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
>   			  dwarf);
>     return dwarf;
> @@ -4990,8 +5001,9 @@ riscv_adjust_libcall_cfi_epilogue ()
>     int saved_size = cfun->machine->frame.save_libcall_adjustment;
>   
>     /* Debug info for adjust sp.  */
> -  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
> -				 stack_pointer_rtx, GEN_INT (saved_size));
> +  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
> +				    gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx),
> +						  stack_pointer_rtx, GEN_INT (saved_size)));
>     dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
>   			  dwarf);

I think this duplicates a change from Phillip & his team. This as to fix 
ICEs in the dwarf2 CFI generator, right?  Please double check as remove 
if it does duplicate a change from Philipp & his team.



> +
> +(define_insn_and_split "add<mode>3_internal"
> +  [(set (match_operand:GPR          0 "register_operand" "=r,r,&r,!&r")
> +	(plus:GPR (match_operand:GPR 1 "register_operand" " %r,r,r,0")
> +		  (match_operand:GPR 2 "add_operand"      " r,I,L,L")))
> +  (clobber (match_scratch:GPR 3 "=X,X,X,&r"))]
> +  ""
> +{
> +  if ((which_alternative == 2) || (which_alternative == 3))
> +    return "#";
> +
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +    return "add%i2w\t%0,%1,%2";
> +  else
> +    return "add%i2\t%0,%1,%2";
> +}
> +  "&& reload_completed && const_lui_operand (operands[2], <MODE>mode)"
> +  [(const_int 0)]
> +{
> +  if (REGNO (operands[0]) != REGNO (operands[1]))
> +    {
> +      emit_insn (gen_mov<mode> (operands[0], operands[2]));
> +      emit_insn (gen_add<mode>3_internal (operands[0], operands[0], operands[1]));
> +    }
> +  else
> +    {
> +      emit_insn (gen_mov<mode> (operands[3], operands[2]));
> +      emit_insn (gen_add<mode>3_internal (operands[0], operands[0], operands[3]));
> +    }
> +  DONE;
> +}
>     [(set_attr "type" "arith")
> -   (set_attr "mode" "SI")])
> +   (set_attr "mode" "<MODE>")])

So I think it was Kito that was concerned about potential performance 
impacts, particularly with the clobber expression.      Probably the 
best thing I can think of to minimize the potential for codegen changes 
would be to leave your existing add<mode>3 pattern alone.


Instead define an addptr pattern.  That one is special in that it's only 
used by LRA.  So the potential for impacting other code is minimized.


Rather than checking REGNO (operands[0]) != REGNO (operands[1]) use 
reg_overlap_mentioned_p instead.  The only notable difference for this 
code is that reg_overlap_mentioned_p will work with modes that cover 
multiple hard regs as well as subregs.

Jeff

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-11-23  5:07 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-04  0:53 [PATCH v3] RISC-V modified add3 for large stack frame optimization [PR105733] Kevin Lee
2022-11-04  0:59 ` Kito Cheng
2022-11-23  5:07 ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).