commit a16dc729fda9fabd6472d50cce45791cb3b6ada8 Author: Jivan Hakobyan Date: Wed Aug 9 13:26:58 2023 -0600 RISC-V: Folding memory for FP + constant case Accessing local arrays element turned into load form (fp + (index << C1)) + C2 address. In the case when access is in the loop we got loop invariant computation. For some reason, moving out that part cannot be done in loop-invariant passes. But we can handle that in target-specific hook (legitimize_address). That provides an opportunity to rewrite memory access more suitable for the target architecture. This patch solves the mentioned case by rewriting mentioned case to ((fp + C2) + (index << C1)) I have evaluated it on SPEC2017 and got an improvement on leela (over 7b instructions, .39% of the dynamic count) and dwarfs the regression for gcc (14m instructions, .0012% of the dynamic count). gcc/ChangeLog: * config/riscv/riscv.cc (riscv_legitimize_address): Handle folding. (mem_shadd_or_shadd_rtx_p): New function. diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 77892da2920..7f2041a54ba 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -1805,6 +1805,22 @@ riscv_shorten_lw_offset (rtx base, HOST_WIDE_INT offset) return addr; } +/* Helper for riscv_legitimize_address. Given X, return true if it + is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8. + + This respectively represent canonical shift-add rtxs or scaled + memory addresses. */ +static bool +mem_shadd_or_shadd_rtx_p (rtx x) +{ + return ((GET_CODE (x) == ASHIFT + || GET_CODE (x) == MULT) + && CONST_INT_P (XEXP (x, 1)) + && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3)) + || (GET_CODE (x) == MULT + && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3)))); +} + /* This function is used to implement LEGITIMIZE_ADDRESS. If X can be legitimized in a way that the generic machinery might not expect, return a new address, otherwise return NULL. MODE is the mode of @@ -1830,6 +1846,32 @@ riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, rtx base = XEXP (x, 0); HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); + /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */ + if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0)) + && SMALL_OPERAND (offset)) + { + rtx index = XEXP (base, 0); + rtx fp = XEXP (base, 1); + if (REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM) + { + + /* If we were given a MULT, we must fix the constant + as we're going to create the ASHIFT form. */ + int shift_val = INTVAL (XEXP (index, 1)); + if (GET_CODE (index) == MULT) + shift_val = exact_log2 (shift_val); + + rtx reg1 = gen_reg_rtx (Pmode); + rtx reg2 = gen_reg_rtx (Pmode); + rtx reg3 = gen_reg_rtx (Pmode); + riscv_emit_binary (PLUS, reg1, fp, GEN_INT (offset)); + riscv_emit_binary (ASHIFT, reg2, XEXP (index, 0), GEN_INT (shift_val)); + riscv_emit_binary (PLUS, reg3, reg2, reg1); + + return reg3; + } + } + if (!riscv_valid_base_register_p (base, mode, false)) base = copy_to_mode_reg (Pmode, base); if (optimize_function_for_size_p (cfun)