public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/vrull/heads/for-upstream)] RISC-V: Optimise adding a (larger than simm12) constant to a register
@ 2022-11-15 14:00 Philipp Tomsich
  0 siblings, 0 replies; 5+ messages in thread
From: Philipp Tomsich @ 2022-11-15 14:00 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:c5d23bb9549f6f021a56fd4b15cbaa7063c5ac49

commit c5d23bb9549f6f021a56fd4b15cbaa7063c5ac49
Author: Philipp Tomsich <philipp.tomsich@vrull.eu>
Date:   Wed Oct 12 14:45:42 2022 +0200

    RISC-V: Optimise adding a (larger than simm12) constant to a register
    
    Handling the register-const_int addition has very quickly escalated to
    creating a full sign-extended 32bit constant and performing a
    register-register for RISC-V in GCC so far, resulting in sequences like
    (for the case of "a + 2048"):
            li      a5,4096
            addi    a5,a5,-2048
            add     a0,a0,a5
    
    By adding an expansion for add<mode>3, we can emit optimised RTL that
    matches the capabilities of RISC-V better by adding support for the
    following, previously unoptimised cases:
      - addi + addi
            addi    a0,a0,2047
            addi    a0,a0,1
      - li + sh[123]add (if Zba is enabled)
            li      a5,960
            sh3add  a0,a5,a0
    
    With this commit, we also fix up riscv_adjust_libcall_cfi_prologue()
    and riscv_adjust_libcall_cfi_epilogue() to not use gen_add3_insn, as
    the expander will otherwise wrap the resulting set-expression in an
    insn (causing an ICE at dwarf2-time) when invoked with -msave-restore.
    
    This closes the gap to LLVM, which has already been emitting these
    optimised sequences.
    
    Note that this benefits is perlbench (in SPEC CPU 2017), which needs
    to add the constant 3840.
    
    gcc/ChangeLog:
    
            * config/riscv/bitmanip.md (*shNadd): Rename.
            (riscv_shNadd<X:mode>): Expose as gen_riscv_shNadd{di/si}.
            * config/riscv/predicates.md (const_arith_shifted123_operand):
            New predicate (for constants that are a simm12, shifted by
            1, 2 or 3).
            (const_arith_2simm12_operand): New predicate (that can be
            expressed by adding 2 simm12 together).
            (addi_operand): New predicate (an immedaite operand suitable
            for the new add<mode>3 expansion).
            * config/riscv/riscv.cc (riscv_adjust_libcall_cfi_prologue):
            Don't use gen_add3_insn, where a RTX instead of an INSN is
            required (otherwise this will break as soon as we have a
            define_expand for add<mode>3).
            (riscv_adjust_libcall_cfi_epilogue): Same.
            * config/riscv/riscv.md (addsi3): Rename.
            (riscv_addsi3): New name for addsi3.
            (adddi3): Rename.
            (riscv_adddi3): New name for adddi3.
            (add<mode>3): New expander that handles the basic and fancy
            (such as li+sh[123]add, addi+addi, ...) cases for adding
            register-register and register-const_int.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/addi.c: New test.
            * gcc.target/riscv/zba-shNadd-06.c: New test.
    
    Series-to: gcc-patches@gcc.gnu.org
    Series-cc: Palmer Dabbelt <palmer@rivosinc.com>
    Series-cc: Vineet Gupta <vineetg@rivosinc.com>
    Series-cc: Christoph Muellner <christoph.muellner@vrull.eu>
    Series-cc: Kito Cheng <kito.cheng@gmail.com>
    Series-cc: Jeff Law <jlaw@ventanamicro.com>

Diff:
---
 gcc/config/riscv/bitmanip.md                   |  2 +-
 gcc/config/riscv/predicates.md                 | 28 +++++++++++++
 gcc/config/riscv/riscv.cc                      | 10 +++--
 gcc/config/riscv/riscv.md                      | 58 +++++++++++++++++++++++++-
 gcc/testsuite/gcc.target/riscv/addi.c          | 39 +++++++++++++++++
 gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c | 11 +++++
 6 files changed, 141 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 8b6a7cad3f6..7e059fb5903 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -29,7 +29,7 @@
   [(set_attr "type" "bitmanip,load")
    (set_attr "mode" "DI")])
 
-(define_insn "*shNadd"
+(define_insn "riscv_shNadd<X:mode>"
   [(set (match_operand:X 0 "register_operand" "=r")
 	(plus:X (ashift:X (match_operand:X 1 "register_operand" "r")
 			  (match_operand:QI 2 "imm123_operand" "Ds3"))
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 6772228e5b6..c56bfa99339 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -308,3 +308,31 @@
 		 (match_test "INTVAL (op) > 0")))
        (ior (match_test "SMALL_OPERAND (UINTVAL (op) & ~(HOST_WIDE_INT_1U << floor_log2 (UINTVAL (op))))")
 	    (match_test "popcount_hwi (UINTVAL (op)) == 2"))))
+
+;; A CONST_INT that can be shifted down by 1, 2 or 3 bits (i.e., has
+;; these bits clear) and will then form a SMALL_OPERAND.
+(define_predicate "const_arith_shifted123_operand"
+  (and (match_code "const_int")
+       (not (match_test "SMALL_OPERAND (INTVAL (op))")))
+{
+  HOST_WIDE_INT val = INTVAL (op);
+  int trailing = ctz_hwi (val);
+
+  /* Clamp to 3, as we have sh[123]add instructions only. */
+  if (trailing > 3)
+     trailing = 3;
+
+  return trailing > 0 && SMALL_OPERAND (val >> trailing);
+})
+
+;; A CONST_INT that can formed by adding two SMALL_OPERANDs together
+(define_predicate "const_arith_2simm12_operand"
+  (and (match_code "const_int")
+       (ior (match_test "SMALL_OPERAND(INTVAL (op) - ~(HOST_WIDE_INT_M1U << (IMM_BITS - 1)))")
+	    (match_test "SMALL_OPERAND(INTVAL (op) -  (HOST_WIDE_INT_M1U << (IMM_BITS - 1)))"))))
+
+(define_predicate "addi_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_arith_2simm12_operand")
+       (and (match_operand 0 "const_arith_shifted123_operand")
+	    (match_test "TARGET_ZBA"))))
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 737ec51b593..451ec7034c6 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4955,8 +4955,9 @@ riscv_adjust_libcall_cfi_prologue ()
       }
 
   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-				 stack_pointer_rtx, GEN_INT (-saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+			       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					     GEN_INT (-saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
 			  dwarf);
   return dwarf;
@@ -5077,8 +5078,9 @@ riscv_adjust_libcall_cfi_epilogue ()
   int saved_size = cfun->machine->frame.save_libcall_adjustment;
 
   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-				 stack_pointer_rtx, GEN_INT (saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+			       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					     GEN_INT (saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
 			  dwarf);
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 8c4961315a0..64bf30f7720 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -449,7 +449,7 @@
   [(set_attr "type" "fadd")
    (set_attr "mode" "<UNITMODE>")])
 
-(define_insn "addsi3"
+(define_insn "riscv_addsi3"
   [(set (match_operand:SI          0 "register_operand" "=r,r")
 	(plus:SI (match_operand:SI 1 "register_operand" " r,r")
 		 (match_operand:SI 2 "arith_operand"    " r,I")))]
@@ -458,7 +458,7 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "SI")])
 
-(define_insn "adddi3"
+(define_insn "riscv_adddi3"
   [(set (match_operand:DI          0 "register_operand" "=r,r")
 	(plus:DI (match_operand:DI 1 "register_operand" " r,r")
 		 (match_operand:DI 2 "arith_operand"    " r,I")))]
@@ -467,6 +467,60 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "DI")])
 
+(define_expand "add<mode>3"
+  [(set (match_operand:GPR           0 "register_operand"      "=r,r")
+	(plus:GPR (match_operand:GPR 1 "register_operand"      " r,r")
+		  (match_operand:GPR 2 "addi_operand"          " r,I")))]
+  ""
+{
+  if (arith_operand (operands[2], <MODE>mode))
+    emit_insn (gen_riscv_add<mode>3 (operands[0], operands[1], operands[2]));
+  else if (const_arith_2simm12_operand (operands[2], <MODE>mode))
+    {
+      /* Split into two immediates that add up to the desired value:
+       * e.g., break up "a + 2445" into:
+       *         addi	a0,a0,2047
+       *	 addi	a0,a0,398
+       */
+
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+      if (val >= 0)
+	 saturated = ~saturated;
+
+      val -= saturated;
+
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_riscv_add<mode>3 (tmp, operands[1], GEN_INT (saturated)));
+      emit_insn (gen_riscv_add<mode>3 (operands[0], tmp, GEN_INT (val)));
+    }
+  else if (<MODE>mode == word_mode
+	   && const_arith_shifted123_operand (operands[2], <MODE>mode))
+    {
+      /* Use a sh[123]add and an immediate shifted down by 1, 2, or 3. */
+
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      int shamt = ctz_hwi (val);
+
+      if (shamt > 3)
+	shamt = 3;
+
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_rtx_SET (tmp, GEN_INT (val >> shamt)));
+
+      /* We don't use gen_riscv_shNadd here, as it will only exist for
+	 <X:mode>.  Instead we build up its canonical form directly.  */
+      rtx shifted_imm = gen_rtx_ASHIFT (<MODE>mode, tmp, GEN_INT (shamt));
+      rtx shNadd = gen_rtx_PLUS (<MODE>mode, shifted_imm, operands[1]);
+      emit_insn (gen_rtx_SET (operands[0], shNadd));
+    }
+  else
+    FAIL;
+
+  DONE;
+})
+
 (define_expand "addv<mode>4"
   [(set (match_operand:GPR           0 "register_operand" "=r,r")
 	(plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
diff --git a/gcc/testsuite/gcc.target/riscv/addi.c b/gcc/testsuite/gcc.target/riscv/addi.c
new file mode 100644
index 00000000000..01339e44697
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/addi.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f (long long a)
+{
+  //	addi	a0,a0,2047
+  //	addi	a0,a0,1
+
+  return a + 2048;
+}
+
+long long f2 (long long a)
+{
+  //	addi	a0,a0,2047
+  //	addi	a0,a0,398
+
+  return a + 2445;
+}
+
+long long f3 (long long a)
+{
+  //  	addi	a0,a0,-2048
+  //	addi	a0,a0,-397
+
+  return a - 2445;
+}
+
+long long f6 (long long a)
+{
+  //  	li	a5,1179648
+  //	add	a0,a0,a5
+
+  return a + (0x12 << 16);
+}
+
+/* { dg-final { scan-assembler-times "addi\t" 6 } } */
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "add\t" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c b/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c
new file mode 100644
index 00000000000..c55f05ed1d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f (long long a)
+{
+  return a + 7680;
+}
+
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "sh3add\t" 1 } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] RISC-V: Optimise adding a (larger than simm12) constant to a register
@ 2022-11-18 20:25 Philipp Tomsich
  0 siblings, 0 replies; 5+ messages in thread
From: Philipp Tomsich @ 2022-11-18 20:25 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:a9926c5f054f5d3ca801c0b610703a1132866b73

commit a9926c5f054f5d3ca801c0b610703a1132866b73
Author: Philipp Tomsich <philipp.tomsich@vrull.eu>
Date:   Wed Oct 12 14:45:42 2022 +0200

    RISC-V: Optimise adding a (larger than simm12) constant to a register
    
    Handling the register-const_int addition has very quickly escalated to
    creating a full sign-extended 32bit constant and performing a
    register-register for RISC-V in GCC so far, resulting in sequences like
    (for the case of "a + 2048"):
            li      a5,4096
            addi    a5,a5,-2048
            add     a0,a0,a5
    
    By adding an expansion for add<mode>3, we can emit optimised RTL that
    matches the capabilities of RISC-V better by adding support for the
    following, previously unoptimised cases:
      - addi + addi
            addi    a0,a0,2047
            addi    a0,a0,1
      - li + sh[123]add (if Zba is enabled)
            li      a5,960
            sh3add  a0,a5,a0
    
    With this commit, we also fix up riscv_adjust_libcall_cfi_prologue()
    and riscv_adjust_libcall_cfi_epilogue() to not use gen_add3_insn, as
    the expander will otherwise wrap the resulting set-expression in an
    insn (causing an ICE at dwarf2-time) when invoked with -msave-restore.
    
    This closes the gap to LLVM, which has already been emitting these
    optimised sequences.
    
    Note that this benefits is perlbench (in SPEC CPU 2017), which needs
    to add the constant 3840.
    
    gcc/ChangeLog:
    
            * config/riscv/bitmanip.md (*shNadd): Rename.
            (riscv_shNadd<X:mode>): Expose as gen_riscv_shNadd{di/si}.
            * config/riscv/predicates.md (const_arith_shifted123_operand):
            New predicate (for constants that are a simm12, shifted by
            1, 2 or 3).
            (const_arith_2simm12_operand): New predicate (that can be
            expressed by adding 2 simm12 together).
            (addi_operand): New predicate (an immedaite operand suitable
            for the new add<mode>3 expansion).
            * config/riscv/riscv.cc (riscv_adjust_libcall_cfi_prologue):
            Don't use gen_add3_insn, where a RTX instead of an INSN is
            required (otherwise this will break as soon as we have a
            define_expand for add<mode>3).
            (riscv_adjust_libcall_cfi_epilogue): Same.
            * config/riscv/riscv.md (addsi3): Rename.
            (riscv_addsi3): New name for addsi3.
            (adddi3): Rename.
            (riscv_adddi3): New name for adddi3.
            (add<mode>3): New expander that handles the basic and fancy
            (such as li+sh[123]add, addi+addi, ...) cases for adding
            register-register and register-const_int.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/addi.c: New test.
            * gcc.target/riscv/zba-shNadd-06.c: New test.
    
    Series-to: gcc-patches@gcc.gnu.org
    Series-cc: Palmer Dabbelt <palmer@rivosinc.com>
    Series-cc: Vineet Gupta <vineetg@rivosinc.com>
    Series-cc: Christoph Muellner <christoph.muellner@vrull.eu>
    Series-cc: Kito Cheng <kito.cheng@gmail.com>
    Series-cc: Jeff Law <jlaw@ventanamicro.com>

Diff:
---
 gcc/config/riscv/bitmanip.md                   |  2 +-
 gcc/config/riscv/predicates.md                 | 28 +++++++++++++
 gcc/config/riscv/riscv.cc                      | 10 +++--
 gcc/config/riscv/riscv.md                      | 58 +++++++++++++++++++++++++-
 gcc/testsuite/gcc.target/riscv/addi.c          | 39 +++++++++++++++++
 gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c | 11 +++++
 6 files changed, 141 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index d17133d58c1..26d74e3ec81 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -29,7 +29,7 @@
   [(set_attr "type" "bitmanip,load")
    (set_attr "mode" "DI")])
 
-(define_insn "*shNadd"
+(define_insn "riscv_shNadd<X:mode>"
   [(set (match_operand:X 0 "register_operand" "=r")
 	(plus:X (ashift:X (match_operand:X 1 "register_operand" "r")
 			  (match_operand:QI 2 "imm123_operand" "Ds3"))
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index dfd98761b8b..19132cc5c99 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -337,3 +337,31 @@
   (and (match_code "const_int")
        (ior (match_operand 0 "not_uimm_extra_bit_operand")
 	    (match_operand 0 "const_nottwobits_operand"))))
+
+;; A CONST_INT that can be shifted down by 1, 2 or 3 bits (i.e., has
+;; these bits clear) and will then form a SMALL_OPERAND.
+(define_predicate "const_arith_shifted123_operand"
+  (and (match_code "const_int")
+       (not (match_test "SMALL_OPERAND (INTVAL (op))")))
+{
+  HOST_WIDE_INT val = INTVAL (op);
+  int trailing = ctz_hwi (val);
+
+  /* Clamp to 3, as we have sh[123]add instructions only. */
+  if (trailing > 3)
+     trailing = 3;
+
+  return trailing > 0 && SMALL_OPERAND (val >> trailing);
+})
+
+;; A CONST_INT that can formed by adding two SMALL_OPERANDs together
+(define_predicate "const_arith_2simm12_operand"
+  (and (match_code "const_int")
+       (ior (match_test "SMALL_OPERAND(INTVAL (op) - ~(HOST_WIDE_INT_M1U << (IMM_BITS - 1)))")
+	    (match_test "SMALL_OPERAND(INTVAL (op) -  (HOST_WIDE_INT_M1U << (IMM_BITS - 1)))"))))
+
+(define_predicate "addi_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_arith_2simm12_operand")
+       (and (match_operand 0 "const_arith_shifted123_operand")
+	    (match_test "TARGET_ZBA"))))
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7ec4ce97e6c..9a4dad40193 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4964,8 +4964,9 @@ riscv_adjust_libcall_cfi_prologue ()
       }
 
   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-				 stack_pointer_rtx, GEN_INT (-saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+			       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					     GEN_INT (-saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
 			  dwarf);
   return dwarf;
@@ -5086,8 +5087,9 @@ riscv_adjust_libcall_cfi_epilogue ()
   int saved_size = cfun->machine->frame.save_libcall_adjustment;
 
   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-				 stack_pointer_rtx, GEN_INT (saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+			       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					     GEN_INT (saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
 			  dwarf);
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index b616c1915df..20cdb7ecfe0 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -452,7 +452,7 @@
   [(set_attr "type" "fadd")
    (set_attr "mode" "<UNITMODE>")])
 
-(define_insn "addsi3"
+(define_insn "riscv_addsi3"
   [(set (match_operand:SI          0 "register_operand" "=r,r")
 	(plus:SI (match_operand:SI 1 "register_operand" " r,r")
 		 (match_operand:SI 2 "arith_operand"    " r,I")))]
@@ -461,7 +461,7 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "SI")])
 
-(define_insn "adddi3"
+(define_insn "riscv_adddi3"
   [(set (match_operand:DI          0 "register_operand" "=r,r")
 	(plus:DI (match_operand:DI 1 "register_operand" " r,r")
 		 (match_operand:DI 2 "arith_operand"    " r,I")))]
@@ -470,6 +470,60 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "DI")])
 
+(define_expand "add<mode>3"
+  [(set (match_operand:GPR           0 "register_operand"      "=r,r")
+	(plus:GPR (match_operand:GPR 1 "register_operand"      " r,r")
+		  (match_operand:GPR 2 "addi_operand"          " r,I")))]
+  ""
+{
+  if (arith_operand (operands[2], <MODE>mode))
+    emit_insn (gen_riscv_add<mode>3 (operands[0], operands[1], operands[2]));
+  else if (const_arith_2simm12_operand (operands[2], <MODE>mode))
+    {
+      /* Split into two immediates that add up to the desired value:
+       * e.g., break up "a + 2445" into:
+       *         addi	a0,a0,2047
+       *	 addi	a0,a0,398
+       */
+
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+      if (val >= 0)
+	 saturated = ~saturated;
+
+      val -= saturated;
+
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_riscv_add<mode>3 (tmp, operands[1], GEN_INT (saturated)));
+      emit_insn (gen_riscv_add<mode>3 (operands[0], tmp, GEN_INT (val)));
+    }
+  else if (<MODE>mode == word_mode
+	   && const_arith_shifted123_operand (operands[2], <MODE>mode))
+    {
+      /* Use a sh[123]add and an immediate shifted down by 1, 2, or 3. */
+
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      int shamt = ctz_hwi (val);
+
+      if (shamt > 3)
+	shamt = 3;
+
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_rtx_SET (tmp, GEN_INT (val >> shamt)));
+
+      /* We don't use gen_riscv_shNadd here, as it will only exist for
+	 <X:mode>.  Instead we build up its canonical form directly.  */
+      rtx shifted_imm = gen_rtx_ASHIFT (<MODE>mode, tmp, GEN_INT (shamt));
+      rtx shNadd = gen_rtx_PLUS (<MODE>mode, shifted_imm, operands[1]);
+      emit_insn (gen_rtx_SET (operands[0], shNadd));
+    }
+  else
+    FAIL;
+
+  DONE;
+})
+
 (define_expand "addv<mode>4"
   [(set (match_operand:GPR           0 "register_operand" "=r,r")
 	(plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
diff --git a/gcc/testsuite/gcc.target/riscv/addi.c b/gcc/testsuite/gcc.target/riscv/addi.c
new file mode 100644
index 00000000000..01339e44697
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/addi.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f (long long a)
+{
+  //	addi	a0,a0,2047
+  //	addi	a0,a0,1
+
+  return a + 2048;
+}
+
+long long f2 (long long a)
+{
+  //	addi	a0,a0,2047
+  //	addi	a0,a0,398
+
+  return a + 2445;
+}
+
+long long f3 (long long a)
+{
+  //  	addi	a0,a0,-2048
+  //	addi	a0,a0,-397
+
+  return a - 2445;
+}
+
+long long f6 (long long a)
+{
+  //  	li	a5,1179648
+  //	add	a0,a0,a5
+
+  return a + (0x12 << 16);
+}
+
+/* { dg-final { scan-assembler-times "addi\t" 6 } } */
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "add\t" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c b/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c
new file mode 100644
index 00000000000..c55f05ed1d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f (long long a)
+{
+  return a + 7680;
+}
+
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "sh3add\t" 1 } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] RISC-V: Optimise adding a (larger than simm12) constant to a register
@ 2022-11-18 20:22 Philipp Tomsich
  0 siblings, 0 replies; 5+ messages in thread
From: Philipp Tomsich @ 2022-11-18 20:22 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:efd58b6dbc7ce3f3f6d863c8e98a1b8dab2ff74c

commit efd58b6dbc7ce3f3f6d863c8e98a1b8dab2ff74c
Author: Philipp Tomsich <philipp.tomsich@vrull.eu>
Date:   Wed Oct 12 14:45:42 2022 +0200

    RISC-V: Optimise adding a (larger than simm12) constant to a register
    
    Handling the register-const_int addition has very quickly escalated to
    creating a full sign-extended 32bit constant and performing a
    register-register for RISC-V in GCC so far, resulting in sequences like
    (for the case of "a + 2048"):
            li      a5,4096
            addi    a5,a5,-2048
            add     a0,a0,a5
    
    By adding an expansion for add<mode>3, we can emit optimised RTL that
    matches the capabilities of RISC-V better by adding support for the
    following, previously unoptimised cases:
      - addi + addi
            addi    a0,a0,2047
            addi    a0,a0,1
      - li + sh[123]add (if Zba is enabled)
            li      a5,960
            sh3add  a0,a5,a0
    
    With this commit, we also fix up riscv_adjust_libcall_cfi_prologue()
    and riscv_adjust_libcall_cfi_epilogue() to not use gen_add3_insn, as
    the expander will otherwise wrap the resulting set-expression in an
    insn (causing an ICE at dwarf2-time) when invoked with -msave-restore.
    
    This closes the gap to LLVM, which has already been emitting these
    optimised sequences.
    
    Note that this benefits is perlbench (in SPEC CPU 2017), which needs
    to add the constant 3840.
    
    gcc/ChangeLog:
    
            * config/riscv/bitmanip.md (*shNadd): Rename.
            (riscv_shNadd<X:mode>): Expose as gen_riscv_shNadd{di/si}.
            * config/riscv/predicates.md (const_arith_shifted123_operand):
            New predicate (for constants that are a simm12, shifted by
            1, 2 or 3).
            (const_arith_2simm12_operand): New predicate (that can be
            expressed by adding 2 simm12 together).
            (addi_operand): New predicate (an immedaite operand suitable
            for the new add<mode>3 expansion).
            * config/riscv/riscv.cc (riscv_adjust_libcall_cfi_prologue):
            Don't use gen_add3_insn, where a RTX instead of an INSN is
            required (otherwise this will break as soon as we have a
            define_expand for add<mode>3).
            (riscv_adjust_libcall_cfi_epilogue): Same.
            * config/riscv/riscv.md (addsi3): Rename.
            (riscv_addsi3): New name for addsi3.
            (adddi3): Rename.
            (riscv_adddi3): New name for adddi3.
            (add<mode>3): New expander that handles the basic and fancy
            (such as li+sh[123]add, addi+addi, ...) cases for adding
            register-register and register-const_int.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/addi.c: New test.
            * gcc.target/riscv/zba-shNadd-06.c: New test.
    
    Series-to: gcc-patches@gcc.gnu.org
    Series-cc: Palmer Dabbelt <palmer@rivosinc.com>
    Series-cc: Vineet Gupta <vineetg@rivosinc.com>
    Series-cc: Christoph Muellner <christoph.muellner@vrull.eu>
    Series-cc: Kito Cheng <kito.cheng@gmail.com>
    Series-cc: Jeff Law <jlaw@ventanamicro.com>

Diff:
---
 gcc/config/riscv/bitmanip.md                   |  2 +-
 gcc/config/riscv/predicates.md                 | 28 +++++++++++++
 gcc/config/riscv/riscv.cc                      | 10 +++--
 gcc/config/riscv/riscv.md                      | 58 +++++++++++++++++++++++++-
 gcc/testsuite/gcc.target/riscv/addi.c          | 39 +++++++++++++++++
 gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c | 11 +++++
 6 files changed, 141 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index d17133d58c1..26d74e3ec81 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -29,7 +29,7 @@
   [(set_attr "type" "bitmanip,load")
    (set_attr "mode" "DI")])
 
-(define_insn "*shNadd"
+(define_insn "riscv_shNadd<X:mode>"
   [(set (match_operand:X 0 "register_operand" "=r")
 	(plus:X (ashift:X (match_operand:X 1 "register_operand" "r")
 			  (match_operand:QI 2 "imm123_operand" "Ds3"))
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index dfd98761b8b..19132cc5c99 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -337,3 +337,31 @@
   (and (match_code "const_int")
        (ior (match_operand 0 "not_uimm_extra_bit_operand")
 	    (match_operand 0 "const_nottwobits_operand"))))
+
+;; A CONST_INT that can be shifted down by 1, 2 or 3 bits (i.e., has
+;; these bits clear) and will then form a SMALL_OPERAND.
+(define_predicate "const_arith_shifted123_operand"
+  (and (match_code "const_int")
+       (not (match_test "SMALL_OPERAND (INTVAL (op))")))
+{
+  HOST_WIDE_INT val = INTVAL (op);
+  int trailing = ctz_hwi (val);
+
+  /* Clamp to 3, as we have sh[123]add instructions only. */
+  if (trailing > 3)
+     trailing = 3;
+
+  return trailing > 0 && SMALL_OPERAND (val >> trailing);
+})
+
+;; A CONST_INT that can formed by adding two SMALL_OPERANDs together
+(define_predicate "const_arith_2simm12_operand"
+  (and (match_code "const_int")
+       (ior (match_test "SMALL_OPERAND(INTVAL (op) - ~(HOST_WIDE_INT_M1U << (IMM_BITS - 1)))")
+	    (match_test "SMALL_OPERAND(INTVAL (op) -  (HOST_WIDE_INT_M1U << (IMM_BITS - 1)))"))))
+
+(define_predicate "addi_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_arith_2simm12_operand")
+       (and (match_operand 0 "const_arith_shifted123_operand")
+	    (match_test "TARGET_ZBA"))))
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7ec4ce97e6c..9a4dad40193 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4964,8 +4964,9 @@ riscv_adjust_libcall_cfi_prologue ()
       }
 
   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-				 stack_pointer_rtx, GEN_INT (-saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+			       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					     GEN_INT (-saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
 			  dwarf);
   return dwarf;
@@ -5086,8 +5087,9 @@ riscv_adjust_libcall_cfi_epilogue ()
   int saved_size = cfun->machine->frame.save_libcall_adjustment;
 
   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-				 stack_pointer_rtx, GEN_INT (saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+			       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					     GEN_INT (saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
 			  dwarf);
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index b616c1915df..20cdb7ecfe0 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -452,7 +452,7 @@
   [(set_attr "type" "fadd")
    (set_attr "mode" "<UNITMODE>")])
 
-(define_insn "addsi3"
+(define_insn "riscv_addsi3"
   [(set (match_operand:SI          0 "register_operand" "=r,r")
 	(plus:SI (match_operand:SI 1 "register_operand" " r,r")
 		 (match_operand:SI 2 "arith_operand"    " r,I")))]
@@ -461,7 +461,7 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "SI")])
 
-(define_insn "adddi3"
+(define_insn "riscv_adddi3"
   [(set (match_operand:DI          0 "register_operand" "=r,r")
 	(plus:DI (match_operand:DI 1 "register_operand" " r,r")
 		 (match_operand:DI 2 "arith_operand"    " r,I")))]
@@ -470,6 +470,60 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "DI")])
 
+(define_expand "add<mode>3"
+  [(set (match_operand:GPR           0 "register_operand"      "=r,r")
+	(plus:GPR (match_operand:GPR 1 "register_operand"      " r,r")
+		  (match_operand:GPR 2 "addi_operand"          " r,I")))]
+  ""
+{
+  if (arith_operand (operands[2], <MODE>mode))
+    emit_insn (gen_riscv_add<mode>3 (operands[0], operands[1], operands[2]));
+  else if (const_arith_2simm12_operand (operands[2], <MODE>mode))
+    {
+      /* Split into two immediates that add up to the desired value:
+       * e.g., break up "a + 2445" into:
+       *         addi	a0,a0,2047
+       *	 addi	a0,a0,398
+       */
+
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+      if (val >= 0)
+	 saturated = ~saturated;
+
+      val -= saturated;
+
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_riscv_add<mode>3 (tmp, operands[1], GEN_INT (saturated)));
+      emit_insn (gen_riscv_add<mode>3 (operands[0], tmp, GEN_INT (val)));
+    }
+  else if (<MODE>mode == word_mode
+	   && const_arith_shifted123_operand (operands[2], <MODE>mode))
+    {
+      /* Use a sh[123]add and an immediate shifted down by 1, 2, or 3. */
+
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      int shamt = ctz_hwi (val);
+
+      if (shamt > 3)
+	shamt = 3;
+
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_rtx_SET (tmp, GEN_INT (val >> shamt)));
+
+      /* We don't use gen_riscv_shNadd here, as it will only exist for
+	 <X:mode>.  Instead we build up its canonical form directly.  */
+      rtx shifted_imm = gen_rtx_ASHIFT (<MODE>mode, tmp, GEN_INT (shamt));
+      rtx shNadd = gen_rtx_PLUS (<MODE>mode, shifted_imm, operands[1]);
+      emit_insn (gen_rtx_SET (operands[0], shNadd));
+    }
+  else
+    FAIL;
+
+  DONE;
+})
+
 (define_expand "addv<mode>4"
   [(set (match_operand:GPR           0 "register_operand" "=r,r")
 	(plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
diff --git a/gcc/testsuite/gcc.target/riscv/addi.c b/gcc/testsuite/gcc.target/riscv/addi.c
new file mode 100644
index 00000000000..01339e44697
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/addi.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f (long long a)
+{
+  //	addi	a0,a0,2047
+  //	addi	a0,a0,1
+
+  return a + 2048;
+}
+
+long long f2 (long long a)
+{
+  //	addi	a0,a0,2047
+  //	addi	a0,a0,398
+
+  return a + 2445;
+}
+
+long long f3 (long long a)
+{
+  //  	addi	a0,a0,-2048
+  //	addi	a0,a0,-397
+
+  return a - 2445;
+}
+
+long long f6 (long long a)
+{
+  //  	li	a5,1179648
+  //	add	a0,a0,a5
+
+  return a + (0x12 << 16);
+}
+
+/* { dg-final { scan-assembler-times "addi\t" 6 } } */
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "add\t" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c b/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c
new file mode 100644
index 00000000000..c55f05ed1d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f (long long a)
+{
+  return a + 7680;
+}
+
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "sh3add\t" 1 } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] RISC-V: Optimise adding a (larger than simm12) constant to a register
@ 2022-11-18 11:34 Philipp Tomsich
  0 siblings, 0 replies; 5+ messages in thread
From: Philipp Tomsich @ 2022-11-18 11:34 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:32dcd209673f3f043b85df3c676887696f76ed79

commit 32dcd209673f3f043b85df3c676887696f76ed79
Author: Philipp Tomsich <philipp.tomsich@vrull.eu>
Date:   Wed Oct 12 14:45:42 2022 +0200

    RISC-V: Optimise adding a (larger than simm12) constant to a register
    
    Handling the register-const_int addition has very quickly escalated to
    creating a full sign-extended 32bit constant and performing a
    register-register for RISC-V in GCC so far, resulting in sequences like
    (for the case of "a + 2048"):
            li      a5,4096
            addi    a5,a5,-2048
            add     a0,a0,a5
    
    By adding an expansion for add<mode>3, we can emit optimised RTL that
    matches the capabilities of RISC-V better by adding support for the
    following, previously unoptimised cases:
      - addi + addi
            addi    a0,a0,2047
            addi    a0,a0,1
      - li + sh[123]add (if Zba is enabled)
            li      a5,960
            sh3add  a0,a5,a0
    
    With this commit, we also fix up riscv_adjust_libcall_cfi_prologue()
    and riscv_adjust_libcall_cfi_epilogue() to not use gen_add3_insn, as
    the expander will otherwise wrap the resulting set-expression in an
    insn (causing an ICE at dwarf2-time) when invoked with -msave-restore.
    
    This closes the gap to LLVM, which has already been emitting these
    optimised sequences.
    
    Note that this benefits is perlbench (in SPEC CPU 2017), which needs
    to add the constant 3840.
    
    gcc/ChangeLog:
    
            * config/riscv/bitmanip.md (*shNadd): Rename.
            (riscv_shNadd<X:mode>): Expose as gen_riscv_shNadd{di/si}.
            * config/riscv/predicates.md (const_arith_shifted123_operand):
            New predicate (for constants that are a simm12, shifted by
            1, 2 or 3).
            (const_arith_2simm12_operand): New predicate (that can be
            expressed by adding 2 simm12 together).
            (addi_operand): New predicate (an immedaite operand suitable
            for the new add<mode>3 expansion).
            * config/riscv/riscv.cc (riscv_adjust_libcall_cfi_prologue):
            Don't use gen_add3_insn, where a RTX instead of an INSN is
            required (otherwise this will break as soon as we have a
            define_expand for add<mode>3).
            (riscv_adjust_libcall_cfi_epilogue): Same.
            * config/riscv/riscv.md (addsi3): Rename.
            (riscv_addsi3): New name for addsi3.
            (adddi3): Rename.
            (riscv_adddi3): New name for adddi3.
            (add<mode>3): New expander that handles the basic and fancy
            (such as li+sh[123]add, addi+addi, ...) cases for adding
            register-register and register-const_int.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/addi.c: New test.
            * gcc.target/riscv/zba-shNadd-06.c: New test.
    
    Series-to: gcc-patches@gcc.gnu.org
    Series-cc: Palmer Dabbelt <palmer@rivosinc.com>
    Series-cc: Vineet Gupta <vineetg@rivosinc.com>
    Series-cc: Christoph Muellner <christoph.muellner@vrull.eu>
    Series-cc: Kito Cheng <kito.cheng@gmail.com>
    Series-cc: Jeff Law <jlaw@ventanamicro.com>

Diff:
---
 gcc/config/riscv/bitmanip.md                   |  2 +-
 gcc/config/riscv/predicates.md                 | 28 +++++++++++++
 gcc/config/riscv/riscv.cc                      | 10 +++--
 gcc/config/riscv/riscv.md                      | 58 +++++++++++++++++++++++++-
 gcc/testsuite/gcc.target/riscv/addi.c          | 39 +++++++++++++++++
 gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c | 11 +++++
 6 files changed, 141 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index db16f83b46e..ff3d5ff6fbb 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -29,7 +29,7 @@
   [(set_attr "type" "bitmanip,load")
    (set_attr "mode" "DI")])
 
-(define_insn "*shNadd"
+(define_insn "riscv_shNadd<X:mode>"
   [(set (match_operand:X 0 "register_operand" "=r")
 	(plus:X (ashift:X (match_operand:X 1 "register_operand" "r")
 			  (match_operand:QI 2 "imm123_operand" "Ds3"))
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index d5c097e259f..7bb780dc3de 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -313,3 +313,31 @@
 		 (match_test "INTVAL (op) > 0")))
        (ior (match_test "SMALL_OPERAND (UINTVAL (op) & ~(HOST_WIDE_INT_1U << floor_log2 (UINTVAL (op))))")
 	    (match_test "popcount_hwi (UINTVAL (op)) == 2"))))
+
+;; A CONST_INT that can be shifted down by 1, 2 or 3 bits (i.e., has
+;; these bits clear) and will then form a SMALL_OPERAND.
+(define_predicate "const_arith_shifted123_operand"
+  (and (match_code "const_int")
+       (not (match_test "SMALL_OPERAND (INTVAL (op))")))
+{
+  HOST_WIDE_INT val = INTVAL (op);
+  int trailing = ctz_hwi (val);
+
+  /* Clamp to 3, as we have sh[123]add instructions only. */
+  if (trailing > 3)
+     trailing = 3;
+
+  return trailing > 0 && SMALL_OPERAND (val >> trailing);
+})
+
+;; A CONST_INT that can formed by adding two SMALL_OPERANDs together
+(define_predicate "const_arith_2simm12_operand"
+  (and (match_code "const_int")
+       (ior (match_test "SMALL_OPERAND(INTVAL (op) - ~(HOST_WIDE_INT_M1U << (IMM_BITS - 1)))")
+	    (match_test "SMALL_OPERAND(INTVAL (op) -  (HOST_WIDE_INT_M1U << (IMM_BITS - 1)))"))))
+
+(define_predicate "addi_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_arith_2simm12_operand")
+       (and (match_operand 0 "const_arith_shifted123_operand")
+	    (match_test "TARGET_ZBA"))))
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5dff5cd634b..56e6c7ff9a2 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4964,8 +4964,9 @@ riscv_adjust_libcall_cfi_prologue ()
       }
 
   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-				 stack_pointer_rtx, GEN_INT (-saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+			       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					     GEN_INT (-saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
 			  dwarf);
   return dwarf;
@@ -5086,8 +5087,9 @@ riscv_adjust_libcall_cfi_epilogue ()
   int saved_size = cfun->machine->frame.save_libcall_adjustment;
 
   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-				 stack_pointer_rtx, GEN_INT (saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+			       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					     GEN_INT (saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
 			  dwarf);
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index b616c1915df..20cdb7ecfe0 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -452,7 +452,7 @@
   [(set_attr "type" "fadd")
    (set_attr "mode" "<UNITMODE>")])
 
-(define_insn "addsi3"
+(define_insn "riscv_addsi3"
   [(set (match_operand:SI          0 "register_operand" "=r,r")
 	(plus:SI (match_operand:SI 1 "register_operand" " r,r")
 		 (match_operand:SI 2 "arith_operand"    " r,I")))]
@@ -461,7 +461,7 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "SI")])
 
-(define_insn "adddi3"
+(define_insn "riscv_adddi3"
   [(set (match_operand:DI          0 "register_operand" "=r,r")
 	(plus:DI (match_operand:DI 1 "register_operand" " r,r")
 		 (match_operand:DI 2 "arith_operand"    " r,I")))]
@@ -470,6 +470,60 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "DI")])
 
+(define_expand "add<mode>3"
+  [(set (match_operand:GPR           0 "register_operand"      "=r,r")
+	(plus:GPR (match_operand:GPR 1 "register_operand"      " r,r")
+		  (match_operand:GPR 2 "addi_operand"          " r,I")))]
+  ""
+{
+  if (arith_operand (operands[2], <MODE>mode))
+    emit_insn (gen_riscv_add<mode>3 (operands[0], operands[1], operands[2]));
+  else if (const_arith_2simm12_operand (operands[2], <MODE>mode))
+    {
+      /* Split into two immediates that add up to the desired value:
+       * e.g., break up "a + 2445" into:
+       *         addi	a0,a0,2047
+       *	 addi	a0,a0,398
+       */
+
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+      if (val >= 0)
+	 saturated = ~saturated;
+
+      val -= saturated;
+
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_riscv_add<mode>3 (tmp, operands[1], GEN_INT (saturated)));
+      emit_insn (gen_riscv_add<mode>3 (operands[0], tmp, GEN_INT (val)));
+    }
+  else if (<MODE>mode == word_mode
+	   && const_arith_shifted123_operand (operands[2], <MODE>mode))
+    {
+      /* Use a sh[123]add and an immediate shifted down by 1, 2, or 3. */
+
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      int shamt = ctz_hwi (val);
+
+      if (shamt > 3)
+	shamt = 3;
+
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_rtx_SET (tmp, GEN_INT (val >> shamt)));
+
+      /* We don't use gen_riscv_shNadd here, as it will only exist for
+	 <X:mode>.  Instead we build up its canonical form directly.  */
+      rtx shifted_imm = gen_rtx_ASHIFT (<MODE>mode, tmp, GEN_INT (shamt));
+      rtx shNadd = gen_rtx_PLUS (<MODE>mode, shifted_imm, operands[1]);
+      emit_insn (gen_rtx_SET (operands[0], shNadd));
+    }
+  else
+    FAIL;
+
+  DONE;
+})
+
 (define_expand "addv<mode>4"
   [(set (match_operand:GPR           0 "register_operand" "=r,r")
 	(plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
diff --git a/gcc/testsuite/gcc.target/riscv/addi.c b/gcc/testsuite/gcc.target/riscv/addi.c
new file mode 100644
index 00000000000..01339e44697
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/addi.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f (long long a)
+{
+  //	addi	a0,a0,2047
+  //	addi	a0,a0,1
+
+  return a + 2048;
+}
+
+long long f2 (long long a)
+{
+  //	addi	a0,a0,2047
+  //	addi	a0,a0,398
+
+  return a + 2445;
+}
+
+long long f3 (long long a)
+{
+  //  	addi	a0,a0,-2048
+  //	addi	a0,a0,-397
+
+  return a - 2445;
+}
+
+long long f6 (long long a)
+{
+  //  	li	a5,1179648
+  //	add	a0,a0,a5
+
+  return a + (0x12 << 16);
+}
+
+/* { dg-final { scan-assembler-times "addi\t" 6 } } */
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "add\t" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c b/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c
new file mode 100644
index 00000000000..c55f05ed1d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f (long long a)
+{
+  return a + 7680;
+}
+
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "sh3add\t" 1 } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] RISC-V: Optimise adding a (larger than simm12) constant to a register
@ 2022-11-17 22:25 Philipp Tomsich
  0 siblings, 0 replies; 5+ messages in thread
From: Philipp Tomsich @ 2022-11-17 22:25 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4aeab042715de2082a4ac40d72226b3f50cb0230

commit 4aeab042715de2082a4ac40d72226b3f50cb0230
Author: Philipp Tomsich <philipp.tomsich@vrull.eu>
Date:   Wed Oct 12 14:45:42 2022 +0200

    RISC-V: Optimise adding a (larger than simm12) constant to a register
    
    Handling the register-const_int addition has very quickly escalated to
    creating a full sign-extended 32bit constant and performing a
    register-register for RISC-V in GCC so far, resulting in sequences like
    (for the case of "a + 2048"):
            li      a5,4096
            addi    a5,a5,-2048
            add     a0,a0,a5
    
    By adding an expansion for add<mode>3, we can emit optimised RTL that
    matches the capabilities of RISC-V better by adding support for the
    following, previously unoptimised cases:
      - addi + addi
            addi    a0,a0,2047
            addi    a0,a0,1
      - li + sh[123]add (if Zba is enabled)
            li      a5,960
            sh3add  a0,a5,a0
    
    With this commit, we also fix up riscv_adjust_libcall_cfi_prologue()
    and riscv_adjust_libcall_cfi_epilogue() to not use gen_add3_insn, as
    the expander will otherwise wrap the resulting set-expression in an
    insn (causing an ICE at dwarf2-time) when invoked with -msave-restore.
    
    This closes the gap to LLVM, which has already been emitting these
    optimised sequences.
    
    Note that this benefits is perlbench (in SPEC CPU 2017), which needs
    to add the constant 3840.
    
    gcc/ChangeLog:
    
            * config/riscv/bitmanip.md (*shNadd): Rename.
            (riscv_shNadd<X:mode>): Expose as gen_riscv_shNadd{di/si}.
            * config/riscv/predicates.md (const_arith_shifted123_operand):
            New predicate (for constants that are a simm12, shifted by
            1, 2 or 3).
            (const_arith_2simm12_operand): New predicate (that can be
            expressed by adding 2 simm12 together).
            (addi_operand): New predicate (an immedaite operand suitable
            for the new add<mode>3 expansion).
            * config/riscv/riscv.cc (riscv_adjust_libcall_cfi_prologue):
            Don't use gen_add3_insn, where a RTX instead of an INSN is
            required (otherwise this will break as soon as we have a
            define_expand for add<mode>3).
            (riscv_adjust_libcall_cfi_epilogue): Same.
            * config/riscv/riscv.md (addsi3): Rename.
            (riscv_addsi3): New name for addsi3.
            (adddi3): Rename.
            (riscv_adddi3): New name for adddi3.
            (add<mode>3): New expander that handles the basic and fancy
            (such as li+sh[123]add, addi+addi, ...) cases for adding
            register-register and register-const_int.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/addi.c: New test.
            * gcc.target/riscv/zba-shNadd-06.c: New test.
    
    Series-to: gcc-patches@gcc.gnu.org
    Series-cc: Palmer Dabbelt <palmer@rivosinc.com>
    Series-cc: Vineet Gupta <vineetg@rivosinc.com>
    Series-cc: Christoph Muellner <christoph.muellner@vrull.eu>
    Series-cc: Kito Cheng <kito.cheng@gmail.com>
    Series-cc: Jeff Law <jlaw@ventanamicro.com>

Diff:
---
 gcc/config/riscv/bitmanip.md                   |  2 +-
 gcc/config/riscv/predicates.md                 | 28 +++++++++++++
 gcc/config/riscv/riscv.cc                      | 10 +++--
 gcc/config/riscv/riscv.md                      | 58 +++++++++++++++++++++++++-
 gcc/testsuite/gcc.target/riscv/addi.c          | 39 +++++++++++++++++
 gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c | 11 +++++
 6 files changed, 141 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index db16f83b46e..ff3d5ff6fbb 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -29,7 +29,7 @@
   [(set_attr "type" "bitmanip,load")
    (set_attr "mode" "DI")])
 
-(define_insn "*shNadd"
+(define_insn "riscv_shNadd<X:mode>"
   [(set (match_operand:X 0 "register_operand" "=r")
 	(plus:X (ashift:X (match_operand:X 1 "register_operand" "r")
 			  (match_operand:QI 2 "imm123_operand" "Ds3"))
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index d5c097e259f..7bb780dc3de 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -313,3 +313,31 @@
 		 (match_test "INTVAL (op) > 0")))
        (ior (match_test "SMALL_OPERAND (UINTVAL (op) & ~(HOST_WIDE_INT_1U << floor_log2 (UINTVAL (op))))")
 	    (match_test "popcount_hwi (UINTVAL (op)) == 2"))))
+
+;; A CONST_INT that can be shifted down by 1, 2 or 3 bits (i.e., has
+;; these bits clear) and will then form a SMALL_OPERAND.
+(define_predicate "const_arith_shifted123_operand"
+  (and (match_code "const_int")
+       (not (match_test "SMALL_OPERAND (INTVAL (op))")))
+{
+  HOST_WIDE_INT val = INTVAL (op);
+  int trailing = ctz_hwi (val);
+
+  /* Clamp to 3, as we have sh[123]add instructions only. */
+  if (trailing > 3)
+     trailing = 3;
+
+  return trailing > 0 && SMALL_OPERAND (val >> trailing);
+})
+
+;; A CONST_INT that can formed by adding two SMALL_OPERANDs together
+(define_predicate "const_arith_2simm12_operand"
+  (and (match_code "const_int")
+       (ior (match_test "SMALL_OPERAND(INTVAL (op) - ~(HOST_WIDE_INT_M1U << (IMM_BITS - 1)))")
+	    (match_test "SMALL_OPERAND(INTVAL (op) -  (HOST_WIDE_INT_M1U << (IMM_BITS - 1)))"))))
+
+(define_predicate "addi_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_arith_2simm12_operand")
+       (and (match_operand 0 "const_arith_shifted123_operand")
+	    (match_test "TARGET_ZBA"))))
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5dff5cd634b..56e6c7ff9a2 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4964,8 +4964,9 @@ riscv_adjust_libcall_cfi_prologue ()
       }
 
   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-				 stack_pointer_rtx, GEN_INT (-saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+			       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					     GEN_INT (-saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
 			  dwarf);
   return dwarf;
@@ -5086,8 +5087,9 @@ riscv_adjust_libcall_cfi_epilogue ()
   int saved_size = cfun->machine->frame.save_libcall_adjustment;
 
   /* Debug info for adjust sp.  */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-				 stack_pointer_rtx, GEN_INT (saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+			       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					     GEN_INT (saved_size)));
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
 			  dwarf);
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index b616c1915df..20cdb7ecfe0 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -452,7 +452,7 @@
   [(set_attr "type" "fadd")
    (set_attr "mode" "<UNITMODE>")])
 
-(define_insn "addsi3"
+(define_insn "riscv_addsi3"
   [(set (match_operand:SI          0 "register_operand" "=r,r")
 	(plus:SI (match_operand:SI 1 "register_operand" " r,r")
 		 (match_operand:SI 2 "arith_operand"    " r,I")))]
@@ -461,7 +461,7 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "SI")])
 
-(define_insn "adddi3"
+(define_insn "riscv_adddi3"
   [(set (match_operand:DI          0 "register_operand" "=r,r")
 	(plus:DI (match_operand:DI 1 "register_operand" " r,r")
 		 (match_operand:DI 2 "arith_operand"    " r,I")))]
@@ -470,6 +470,60 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "DI")])
 
+(define_expand "add<mode>3"
+  [(set (match_operand:GPR           0 "register_operand"      "=r,r")
+	(plus:GPR (match_operand:GPR 1 "register_operand"      " r,r")
+		  (match_operand:GPR 2 "addi_operand"          " r,I")))]
+  ""
+{
+  if (arith_operand (operands[2], <MODE>mode))
+    emit_insn (gen_riscv_add<mode>3 (operands[0], operands[1], operands[2]));
+  else if (const_arith_2simm12_operand (operands[2], <MODE>mode))
+    {
+      /* Split into two immediates that add up to the desired value:
+       * e.g., break up "a + 2445" into:
+       *         addi	a0,a0,2047
+       *	 addi	a0,a0,398
+       */
+
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+      if (val >= 0)
+	 saturated = ~saturated;
+
+      val -= saturated;
+
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_riscv_add<mode>3 (tmp, operands[1], GEN_INT (saturated)));
+      emit_insn (gen_riscv_add<mode>3 (operands[0], tmp, GEN_INT (val)));
+    }
+  else if (<MODE>mode == word_mode
+	   && const_arith_shifted123_operand (operands[2], <MODE>mode))
+    {
+      /* Use a sh[123]add and an immediate shifted down by 1, 2, or 3. */
+
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      int shamt = ctz_hwi (val);
+
+      if (shamt > 3)
+	shamt = 3;
+
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_rtx_SET (tmp, GEN_INT (val >> shamt)));
+
+      /* We don't use gen_riscv_shNadd here, as it will only exist for
+	 <X:mode>.  Instead we build up its canonical form directly.  */
+      rtx shifted_imm = gen_rtx_ASHIFT (<MODE>mode, tmp, GEN_INT (shamt));
+      rtx shNadd = gen_rtx_PLUS (<MODE>mode, shifted_imm, operands[1]);
+      emit_insn (gen_rtx_SET (operands[0], shNadd));
+    }
+  else
+    FAIL;
+
+  DONE;
+})
+
 (define_expand "addv<mode>4"
   [(set (match_operand:GPR           0 "register_operand" "=r,r")
 	(plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
diff --git a/gcc/testsuite/gcc.target/riscv/addi.c b/gcc/testsuite/gcc.target/riscv/addi.c
new file mode 100644
index 00000000000..01339e44697
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/addi.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f (long long a)
+{
+  //	addi	a0,a0,2047
+  //	addi	a0,a0,1
+
+  return a + 2048;
+}
+
+long long f2 (long long a)
+{
+  //	addi	a0,a0,2047
+  //	addi	a0,a0,398
+
+  return a + 2445;
+}
+
+long long f3 (long long a)
+{
+  //  	addi	a0,a0,-2048
+  //	addi	a0,a0,-397
+
+  return a - 2445;
+}
+
+long long f6 (long long a)
+{
+  //  	li	a5,1179648
+  //	add	a0,a0,a5
+
+  return a + (0x12 << 16);
+}
+
+/* { dg-final { scan-assembler-times "addi\t" 6 } } */
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "add\t" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c b/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c
new file mode 100644
index 00000000000..c55f05ed1d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zba-shNadd-06.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f (long long a)
+{
+  return a + 7680;
+}
+
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "sh3add\t" 1 } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-11-18 20:25 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-15 14:00 [gcc(refs/vendors/vrull/heads/for-upstream)] RISC-V: Optimise adding a (larger than simm12) constant to a register Philipp Tomsich
2022-11-17 22:25 Philipp Tomsich
2022-11-18 11:34 Philipp Tomsich
2022-11-18 20:22 Philipp Tomsich
2022-11-18 20:25 Philipp Tomsich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).