public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2] LoongArch: Modify the address calculation logic for obtaining array element values through fp.
@ 2024-01-30  7:55 Lulu Cheng
  2024-02-02  1:15 ` [pushed][PATCH " chenglulu
  0 siblings, 1 reply; 2+ messages in thread
From: Lulu Cheng @ 2024-01-30  7:55 UTC (permalink / raw)
  To: gcc-patches; +Cc: xry111, i, xuchenghua, chenglulu

Modify address calculation logic from (((a x C) + fp) + offset) to ((fp + offset) + a x C).
Thereby modifying the register dependencies and optimizing the code.
The value of C is 2 4 or 8.

The following is the assembly code before and after a loop modification in spec2006 401.bzip:

                 old                      |                 new
 735 .L71:                                |  735 .L71:
 736         slli.d  $r12,$r15,2          |  736         slli.d  $r12,$r15,2
 737         ldx.w   $r13,$r22,$r12       |  737         ldx.w   $r13,$r22,$r12
 738         addi.d  $r15,$r15,-1         |  738         addi.d  $r15,$r15,-1
 739         slli.w  $r16,$r15,0          |  739         slli.w  $r16,$r15,0
 740         addi.w  $r13,$r13,-1         |  740         addi.w  $r13,$r13,-1
 741         slti    $r14,$r13,0          |  741         slti    $r14,$r13,0
 742         add.w   $r12,$r26,$r13       |  742         add.w   $r12,$r26,$r13
 743         maskeqz $r12,$r12,$r14       |  743         maskeqz $r12,$r12,$r14
 744         masknez $r14,$r13,$r14       |  744         masknez $r14,$r13,$r14
 745         or      $r12,$r12,$r14       |  745         or      $r12,$r12,$r14
 746         ldx.bu  $r14,$r30,$r12       |  746         ldx.bu  $r14,$r30,$r12
 747         lu12i.w $r13,4096>>12        |  747         alsl.d  $r14,$r14,$r18,2
 748         ori     $r13,$r13,432        |  748         ldptr.w $r13,$r14,0
 749         add.d   $r13,$r13,$r3        |  749         addi.w  $r17,$r13,-1
 750         alsl.d  $r14,$r14,$r13,2     |  750         stptr.w $r17,$r14,0
 751         ldptr.w $r13,$r14,-1968      |  751         slli.d  $r13,$r13,2
 752         addi.w  $r17,$r13,-1         |  752         stx.w   $r12,$r22,$r13
 753         st.w    $r17,$r14,-1968      |  753         ldptr.w $r12,$r19,0
 754         slli.d  $r13,$r13,2          |  754         blt     $r12,$r16,.L71
 755         stx.w   $r12,$r22,$r13       |  755         .align  4
 756         ldptr.w $r12,$r18,-2048      |  756
 757         blt     $r12,$r16,.L71       |  757
 758         .align  4                    |  758

This patch is ported from riscv's commit r14-3111.

gcc/ChangeLog:

	* config/loongarch/loongarch.cc (mem_shadd_or_shadd_rtx_p): New function.
	(loongarch_legitimize_address): Add logical transformation code.

---
v1 -> v2:
  Modify code format and comment information.

---
 gcc/config/loongarch/loongarch.cc | 43 +++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index b494040d165..b8f6f6689bb 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3219,6 +3219,22 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
   return true;
 }
 
+/* Helper loongarch_legitimize_address.  Given X, return true if it
+   is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
+
+   This respectively represent canonical shift-add rtxs or scaled
+   memory addresses.  */
+static bool
+mem_shadd_or_shadd_rtx_p (rtx x)
+{
+  return ((GET_CODE (x) == ASHIFT
+	   || GET_CODE (x) == MULT)
+	  && CONST_INT_P (XEXP (x, 1))
+	  && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
+	      || (GET_CODE (x) == MULT
+		  && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3))));
+}
+
 /* This function is used to implement LEGITIMIZE_ADDRESS.  If X can
    be legitimized in a way that the generic machinery might not expect,
    return a new address, otherwise return NULL.  MODE is the mode of
@@ -3242,6 +3258,33 @@ loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
   loongarch_split_plus (x, &base, &offset);
   if (offset != 0)
     {
+      /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case.  */
+      if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
+	  && IMM12_OPERAND (offset))
+	{
+	  rtx index = XEXP (base, 0);
+	  rtx fp = XEXP (base, 1);
+
+	  if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
+	    {
+	      /* If we were given a MULT, we must fix the constant
+		 as we're going to create the ASHIFT form.  */
+	      int shift_val = INTVAL (XEXP (index, 1));
+	      if (GET_CODE (index) == MULT)
+		shift_val = exact_log2 (shift_val);
+
+	      rtx reg1 = gen_reg_rtx (Pmode);
+	      rtx reg3 = gen_reg_rtx (Pmode);
+	      loongarch_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
+	      loongarch_emit_binary (PLUS, reg3,
+				     gen_rtx_ASHIFT (Pmode, XEXP (index, 0),
+						     GEN_INT (shift_val)),
+				     reg1);
+
+	      return reg3;
+	    }
+	}
+
       if (!loongarch_valid_base_register_p (base, mode, false))
 	base = copy_to_mode_reg (Pmode, base);
       addr = loongarch_add_offset (NULL, base, offset);
-- 
2.39.3


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [pushed][PATCH v2] LoongArch: Modify the address calculation logic for obtaining array element values through fp.
  2024-01-30  7:55 [PATCH v2] LoongArch: Modify the address calculation logic for obtaining array element values through fp Lulu Cheng
@ 2024-02-02  1:15 ` chenglulu
  0 siblings, 0 replies; 2+ messages in thread
From: chenglulu @ 2024-02-02  1:15 UTC (permalink / raw)
  To: gcc-patches; +Cc: xry111, i, xuchenghua

Pushed to r14-8716.

在 2024/1/30 下午3:55, Lulu Cheng 写道:
> Modify address calculation logic from (((a x C) + fp) + offset) to ((fp + offset) + a x C).
> Thereby modifying the register dependencies and optimizing the code.
> The value of C is 2 4 or 8.
>
> The following is the assembly code before and after a loop modification in spec2006 401.bzip:
>
>                   old                      |                 new
>   735 .L71:                                |  735 .L71:
>   736         slli.d  $r12,$r15,2          |  736         slli.d  $r12,$r15,2
>   737         ldx.w   $r13,$r22,$r12       |  737         ldx.w   $r13,$r22,$r12
>   738         addi.d  $r15,$r15,-1         |  738         addi.d  $r15,$r15,-1
>   739         slli.w  $r16,$r15,0          |  739         slli.w  $r16,$r15,0
>   740         addi.w  $r13,$r13,-1         |  740         addi.w  $r13,$r13,-1
>   741         slti    $r14,$r13,0          |  741         slti    $r14,$r13,0
>   742         add.w   $r12,$r26,$r13       |  742         add.w   $r12,$r26,$r13
>   743         maskeqz $r12,$r12,$r14       |  743         maskeqz $r12,$r12,$r14
>   744         masknez $r14,$r13,$r14       |  744         masknez $r14,$r13,$r14
>   745         or      $r12,$r12,$r14       |  745         or      $r12,$r12,$r14
>   746         ldx.bu  $r14,$r30,$r12       |  746         ldx.bu  $r14,$r30,$r12
>   747         lu12i.w $r13,4096>>12        |  747         alsl.d  $r14,$r14,$r18,2
>   748         ori     $r13,$r13,432        |  748         ldptr.w $r13,$r14,0
>   749         add.d   $r13,$r13,$r3        |  749         addi.w  $r17,$r13,-1
>   750         alsl.d  $r14,$r14,$r13,2     |  750         stptr.w $r17,$r14,0
>   751         ldptr.w $r13,$r14,-1968      |  751         slli.d  $r13,$r13,2
>   752         addi.w  $r17,$r13,-1         |  752         stx.w   $r12,$r22,$r13
>   753         st.w    $r17,$r14,-1968      |  753         ldptr.w $r12,$r19,0
>   754         slli.d  $r13,$r13,2          |  754         blt     $r12,$r16,.L71
>   755         stx.w   $r12,$r22,$r13       |  755         .align  4
>   756         ldptr.w $r12,$r18,-2048      |  756
>   757         blt     $r12,$r16,.L71       |  757
>   758         .align  4                    |  758
>
> This patch is ported from riscv's commit r14-3111.
>
> gcc/ChangeLog:
>
> 	* config/loongarch/loongarch.cc (mem_shadd_or_shadd_rtx_p): New function.
> 	(loongarch_legitimize_address): Add logical transformation code.
>
> ---
> v1 -> v2:
>    Modify code format and comment information.
>
> ---
>   gcc/config/loongarch/loongarch.cc | 43 +++++++++++++++++++++++++++++++
>   1 file changed, 43 insertions(+)
>
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index b494040d165..b8f6f6689bb 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -3219,6 +3219,22 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
>     return true;
>   }
>   
> +/* Helper loongarch_legitimize_address.  Given X, return true if it
> +   is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
> +
> +   This respectively represent canonical shift-add rtxs or scaled
> +   memory addresses.  */
> +static bool
> +mem_shadd_or_shadd_rtx_p (rtx x)
> +{
> +  return ((GET_CODE (x) == ASHIFT
> +	   || GET_CODE (x) == MULT)
> +	  && CONST_INT_P (XEXP (x, 1))
> +	  && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
> +	      || (GET_CODE (x) == MULT
> +		  && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3))));
> +}
> +
>   /* This function is used to implement LEGITIMIZE_ADDRESS.  If X can
>      be legitimized in a way that the generic machinery might not expect,
>      return a new address, otherwise return NULL.  MODE is the mode of
> @@ -3242,6 +3258,33 @@ loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
>     loongarch_split_plus (x, &base, &offset);
>     if (offset != 0)
>       {
> +      /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case.  */
> +      if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
> +	  && IMM12_OPERAND (offset))
> +	{
> +	  rtx index = XEXP (base, 0);
> +	  rtx fp = XEXP (base, 1);
> +
> +	  if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
> +	    {
> +	      /* If we were given a MULT, we must fix the constant
> +		 as we're going to create the ASHIFT form.  */
> +	      int shift_val = INTVAL (XEXP (index, 1));
> +	      if (GET_CODE (index) == MULT)
> +		shift_val = exact_log2 (shift_val);
> +
> +	      rtx reg1 = gen_reg_rtx (Pmode);
> +	      rtx reg3 = gen_reg_rtx (Pmode);
> +	      loongarch_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
> +	      loongarch_emit_binary (PLUS, reg3,
> +				     gen_rtx_ASHIFT (Pmode, XEXP (index, 0),
> +						     GEN_INT (shift_val)),
> +				     reg1);
> +
> +	      return reg3;
> +	    }
> +	}
> +
>         if (!loongarch_valid_base_register_p (base, mode, false))
>   	base = copy_to_mode_reg (Pmode, base);
>         addr = loongarch_add_offset (NULL, base, offset);


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-02-02  1:15 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-30  7:55 [PATCH v2] LoongArch: Modify the address calculation logic for obtaining array element values through fp Lulu Cheng
2024-02-02  1:15 ` [pushed][PATCH " chenglulu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).