public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] LoongArch: Redundant sign extension elimination optimization.
@ 2024-01-11 11:36 Li Wei
  2024-01-12  1:49 ` chenglulu
  0 siblings, 1 reply; 2+ messages in thread
From: Li Wei @ 2024-01-11 11:36 UTC (permalink / raw)
  To: gcc-patches; +Cc: xry111, i, xuchenghua, chenglulu, Li Wei

We found that the current combine optimization pass in gcc cannot handle
the following redundant sign extension situations:

(insn 77 76 78 5 (set (reg:SI 143)
        (plus:SI (subreg/s/u:SI (reg/v:DI 104 [ len ]) 0)
            (const_int 1 [0x1]))) {addsi3}
    (expr_list:REG_DEAD (reg/v:DI 104 [ len ])
        (nil)))
(insn 78 77 82 5 (set (reg/v:DI 104 [ len ])
        (sign_extend:DI (reg:SI 143))) {extendsidi2}
        (nil))

Because reg:SI 143 is not died or set in insn 78, no replacement merge will
be performed for the insn sequence. We adjusted the add template to eliminate
redundant sign extensions during the expand pass.
Adjusted based on upstream comments:
https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641988.html

gcc/ChangeLog:

	* config/loongarch/loongarch.md (add<mode>3): Removed.
	(*addsi3): New.
	(addsi3): Ditto.
	(adddi3): Ditto.
	(*addsi3_extended): Removed.
	(addsi3_extended): New.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/sign-extend.c: Moved to...
	* gcc.target/loongarch/sign-extend-1.c: ...here.
	* gcc.target/loongarch/sign-extend-2.c: New test.
---
 gcc/config/loongarch/loongarch.md             | 93 ++++++++++++++-----
 .../{sign-extend.c => sign-extend-1.c}        |  0
 .../gcc.target/loongarch/sign-extend-2.c      | 59 ++++++++++++
 3 files changed, 128 insertions(+), 24 deletions(-)
 rename gcc/testsuite/gcc.target/loongarch/{sign-extend.c => sign-extend-1.c} (100%)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-2.c

diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 497a72e165c..ebc0476ea6f 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -657,42 +657,87 @@ (define_insn "add<mode>3"
   [(set_attr "type" "fadd")
    (set_attr "mode" "<UNITMODE>")])
 
-(define_insn_and_split "add<mode>3"
-  [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
-	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r")
-		  (match_operand:GPR 2 "plus_<mode>_operand"
-				       "r,I,La,Lb,Lc,Ld,Le")))]
+(define_insn_and_split "*addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
+		  (match_operand:SI 2 "plus_si_operand"
+				       "r,I,La,Lb,Le")))]
   ""
   "@
-   add.<d>\t%0,%1,%2
-   addi.<d>\t%0,%1,%2
+   add.w\t%0,%1,%2
+   addi.w\t%0,%1,%2
    #
    * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
      return \"addu16i.d\t%0,%1,%2\";
+   #"
+  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
+   && !ADDU16I_OPERAND (INTVAL (operands[2]))"
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))]
+  {
+    loongarch_split_plus_constant (&operands[2], SImode);
+  }
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "SI")
+   (set_attr "insn_count" "1,1,2,1,2")])
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
+		 (match_operand:SI 2 "plus_si_operand"  "r,I,La,Le,Lb")))]
+  "TARGET_64BIT"
+{
+  if (CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])
+      && ADDU16I_OPERAND (INTVAL (operands[2])))
+    {
+      rtx t1 = gen_reg_rtx (DImode);
+      rtx t2 = gen_reg_rtx (DImode);
+      rtx t3 = gen_reg_rtx (DImode);
+      emit_insn (gen_extend_insn (t1, operands[1], DImode, SImode, 0));
+      t2 = operands[2];
+      emit_insn (gen_adddi3 (t3, t1, t2));
+      t3 = gen_lowpart (SImode, t3);
+      emit_move_insn (operands[0], t3);
+      DONE;
+    }
+  else
+    {
+      rtx t = gen_reg_rtx (DImode);
+      emit_insn (gen_addsi3_extended (t, operands[1], operands[2]));
+      t = gen_lowpart (SImode, t);
+      SUBREG_PROMOTED_VAR_P (t) = 1;
+      SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+      emit_move_insn (operands[0], t);
+      DONE;
+    }
+})
+
+(define_insn_and_split "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r,r,r,r,r,r")
+		  (match_operand:DI 2 "plus_di_operand"
+				       "r,I,La,Lb,Lc,Ld")))]
+  "TARGET_64BIT"
+  "@
+   add.d\t%0,%1,%2
+   addi.d\t%0,%1,%2
    #
+   * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
+     return \"addu16i.d\t%0,%1,%2\";
    #
    #"
-  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
+  "&& CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
    && !ADDU16I_OPERAND (INTVAL (operands[2]))"
-  [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
   {
-    loongarch_split_plus_constant (&operands[2], <MODE>mode);
+    loongarch_split_plus_constant (&operands[2], DImode);
   }
   [(set_attr "alu_type" "add")
-   (set_attr "mode" "<MODE>")
-   (set_attr "insn_count" "1,1,2,1,2,2,2")
-   (set (attr "enabled")
-      (cond
-	[(match_test "<MODE>mode != DImode && which_alternative == 4")
-	 (const_string "no")
-	 (match_test "<MODE>mode != DImode && which_alternative == 5")
-	 (const_string "no")
-	 (match_test "<MODE>mode != SImode && which_alternative == 6")
-	 (const_string "no")]
-	(const_string "yes")))])
-
-(define_insn_and_split "*addsi3_extended"
+   (set_attr "mode" "DI")
+   (set_attr "insn_count" "1,1,2,1,2,2")])
+
+(define_insn_and_split "addsi3_extended"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
 	(sign_extend:DI
 	     (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-1.c
similarity index 100%
rename from gcc/testsuite/gcc.target/loongarch/sign-extend.c
rename to gcc/testsuite/gcc.target/loongarch/sign-extend-1.c
diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
new file mode 100644
index 00000000000..a45dde4f73f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler-times "slli.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,0" 1 } } */
+
+#include <stdint.h>
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+void
+bt_skip_func (const uint32_t len_limit, const uint32_t pos,
+              const uint8_t *const cur, uint32_t cur_match,
+              uint32_t *const son, const uint32_t cyclic_pos,
+              const uint32_t cyclic_size)
+{
+  uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
+  uint32_t *ptr1 = son + (cyclic_pos << 1);
+
+  uint32_t len0 = 0;
+  uint32_t len1 = 0;
+
+  while (1)
+    {
+      const uint32_t delta = pos - cur_match;
+      uint32_t *pair
+          = son
+            + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0))
+               << 1);
+      const uint8_t *pb = cur - delta;
+      uint32_t len = my_min (len0, len1);
+
+      if (pb[len] == cur[len])
+        {
+          while (++len != len_limit)
+            if (pb[len] != cur[len])
+              break;
+
+          if (len == len_limit)
+            {
+              *ptr1 = pair[0];
+              *ptr0 = pair[1];
+              return;
+            }
+        }
+
+      if (pb[len] < cur[len])
+        {
+          *ptr1 = cur_match;
+          ptr1 = pair + 1;
+          cur_match = *ptr1;
+          len1 = len;
+        }
+      else
+        {
+          *ptr0 = cur_match;
+          ptr0 = pair;
+          cur_match = *ptr0;
+          len0 = len;
+        }
+    }
+}
-- 
2.39.3


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re:[pushed] [PATCH v2 1/2] LoongArch: Redundant sign extension elimination optimization.
  2024-01-11 11:36 [PATCH v2 1/2] LoongArch: Redundant sign extension elimination optimization Li Wei
@ 2024-01-12  1:49 ` chenglulu
  0 siblings, 0 replies; 2+ messages in thread
From: chenglulu @ 2024-01-12  1:49 UTC (permalink / raw)
  To: Li Wei, gcc-patches; +Cc: xry111, i, xuchenghua

Pushed to r14-7160 and r14-7161.

在 2024/1/11 下午7:36, Li Wei 写道:
> We found that the current combine optimization pass in gcc cannot handle
> the following redundant sign extension situations:
>
> (insn 77 76 78 5 (set (reg:SI 143)
>          (plus:SI (subreg/s/u:SI (reg/v:DI 104 [ len ]) 0)
>              (const_int 1 [0x1]))) {addsi3}
>      (expr_list:REG_DEAD (reg/v:DI 104 [ len ])
>          (nil)))
> (insn 78 77 82 5 (set (reg/v:DI 104 [ len ])
>          (sign_extend:DI (reg:SI 143))) {extendsidi2}
>          (nil))
>
> Because reg:SI 143 is not died or set in insn 78, no replacement merge will
> be performed for the insn sequence. We adjusted the add template to eliminate
> redundant sign extensions during the expand pass.
> Adjusted based on upstream comments:
> https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641988.html
>
> gcc/ChangeLog:
>
> 	* config/loongarch/loongarch.md (add<mode>3): Removed.
> 	(*addsi3): New.
> 	(addsi3): Ditto.
> 	(adddi3): Ditto.
> 	(*addsi3_extended): Removed.
> 	(addsi3_extended): New.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/loongarch/sign-extend.c: Moved to...
> 	* gcc.target/loongarch/sign-extend-1.c: ...here.
> 	* gcc.target/loongarch/sign-extend-2.c: New test.
> ---
>   gcc/config/loongarch/loongarch.md             | 93 ++++++++++++++-----
>   .../{sign-extend.c => sign-extend-1.c}        |  0
>   .../gcc.target/loongarch/sign-extend-2.c      | 59 ++++++++++++
>   3 files changed, 128 insertions(+), 24 deletions(-)
>   rename gcc/testsuite/gcc.target/loongarch/{sign-extend.c => sign-extend-1.c} (100%)
>   create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
>
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index 497a72e165c..ebc0476ea6f 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -657,42 +657,87 @@ (define_insn "add<mode>3"
>     [(set_attr "type" "fadd")
>      (set_attr "mode" "<UNITMODE>")])
>   
> -(define_insn_and_split "add<mode>3"
> -  [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
> -	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r")
> -		  (match_operand:GPR 2 "plus_<mode>_operand"
> -				       "r,I,La,Lb,Lc,Ld,Le")))]
> +(define_insn_and_split "*addsi3"
> +  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
> +	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
> +		  (match_operand:SI 2 "plus_si_operand"
> +				       "r,I,La,Lb,Le")))]
>     ""
>     "@
> -   add.<d>\t%0,%1,%2
> -   addi.<d>\t%0,%1,%2
> +   add.w\t%0,%1,%2
> +   addi.w\t%0,%1,%2
>      #
>      * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
>        return \"addu16i.d\t%0,%1,%2\";
> +   #"
> +  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
> +   && !ADDU16I_OPERAND (INTVAL (operands[2]))"
> +  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))
> +   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))]
> +  {
> +    loongarch_split_plus_constant (&operands[2], SImode);
> +  }
> +  [(set_attr "alu_type" "add")
> +   (set_attr "mode" "SI")
> +   (set_attr "insn_count" "1,1,2,1,2")])
> +
> +(define_expand "addsi3"
> +  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
> +	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
> +		 (match_operand:SI 2 "plus_si_operand"  "r,I,La,Le,Lb")))]
> +  "TARGET_64BIT"
> +{
> +  if (CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])
> +      && ADDU16I_OPERAND (INTVAL (operands[2])))
> +    {
> +      rtx t1 = gen_reg_rtx (DImode);
> +      rtx t2 = gen_reg_rtx (DImode);
> +      rtx t3 = gen_reg_rtx (DImode);
> +      emit_insn (gen_extend_insn (t1, operands[1], DImode, SImode, 0));
> +      t2 = operands[2];
> +      emit_insn (gen_adddi3 (t3, t1, t2));
> +      t3 = gen_lowpart (SImode, t3);
> +      emit_move_insn (operands[0], t3);
> +      DONE;
> +    }
> +  else
> +    {
> +      rtx t = gen_reg_rtx (DImode);
> +      emit_insn (gen_addsi3_extended (t, operands[1], operands[2]));
> +      t = gen_lowpart (SImode, t);
> +      SUBREG_PROMOTED_VAR_P (t) = 1;
> +      SUBREG_PROMOTED_SET (t, SRP_SIGNED);
> +      emit_move_insn (operands[0], t);
> +      DONE;
> +    }
> +})
> +
> +(define_insn_and_split "adddi3"
> +  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r")
> +	(plus:DI (match_operand:DI 1 "register_operand" "r,r,r,r,r,r")
> +		  (match_operand:DI 2 "plus_di_operand"
> +				       "r,I,La,Lb,Lc,Ld")))]
> +  "TARGET_64BIT"
> +  "@
> +   add.d\t%0,%1,%2
> +   addi.d\t%0,%1,%2
>      #
> +   * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
> +     return \"addu16i.d\t%0,%1,%2\";
>      #
>      #"
> -  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
> +  "&& CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
>      && !ADDU16I_OPERAND (INTVAL (operands[2]))"
> -  [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
> -   (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
> +  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3)))
> +   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
>     {
> -    loongarch_split_plus_constant (&operands[2], <MODE>mode);
> +    loongarch_split_plus_constant (&operands[2], DImode);
>     }
>     [(set_attr "alu_type" "add")
> -   (set_attr "mode" "<MODE>")
> -   (set_attr "insn_count" "1,1,2,1,2,2,2")
> -   (set (attr "enabled")
> -      (cond
> -	[(match_test "<MODE>mode != DImode && which_alternative == 4")
> -	 (const_string "no")
> -	 (match_test "<MODE>mode != DImode && which_alternative == 5")
> -	 (const_string "no")
> -	 (match_test "<MODE>mode != SImode && which_alternative == 6")
> -	 (const_string "no")]
> -	(const_string "yes")))])
> -
> -(define_insn_and_split "*addsi3_extended"
> +   (set_attr "mode" "DI")
> +   (set_attr "insn_count" "1,1,2,1,2,2")])
> +
> +(define_insn_and_split "addsi3_extended"
>     [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
>   	(sign_extend:DI
>   	     (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
> diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-1.c
> similarity index 100%
> rename from gcc/testsuite/gcc.target/loongarch/sign-extend.c
> rename to gcc/testsuite/gcc.target/loongarch/sign-extend-1.c
> diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
> new file mode 100644
> index 00000000000..a45dde4f73f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
> @@ -0,0 +1,59 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mabi=lp64d -O2" } */
> +/* { dg-final { scan-assembler-times "slli.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,0" 1 } } */
> +
> +#include <stdint.h>
> +#define my_min(x, y) ((x) < (y) ? (x) : (y))
> +
> +void
> +bt_skip_func (const uint32_t len_limit, const uint32_t pos,
> +              const uint8_t *const cur, uint32_t cur_match,
> +              uint32_t *const son, const uint32_t cyclic_pos,
> +              const uint32_t cyclic_size)
> +{
> +  uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
> +  uint32_t *ptr1 = son + (cyclic_pos << 1);
> +
> +  uint32_t len0 = 0;
> +  uint32_t len1 = 0;
> +
> +  while (1)
> +    {
> +      const uint32_t delta = pos - cur_match;
> +      uint32_t *pair
> +          = son
> +            + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0))
> +               << 1);
> +      const uint8_t *pb = cur - delta;
> +      uint32_t len = my_min (len0, len1);
> +
> +      if (pb[len] == cur[len])
> +        {
> +          while (++len != len_limit)
> +            if (pb[len] != cur[len])
> +              break;
> +
> +          if (len == len_limit)
> +            {
> +              *ptr1 = pair[0];
> +              *ptr0 = pair[1];
> +              return;
> +            }
> +        }
> +
> +      if (pb[len] < cur[len])
> +        {
> +          *ptr1 = cur_match;
> +          ptr1 = pair + 1;
> +          cur_match = *ptr1;
> +          len1 = len;
> +        }
> +      else
> +        {
> +          *ptr0 = cur_match;
> +          ptr0 = pair;
> +          cur_match = *ptr0;
> +          len0 = len;
> +        }
> +    }
> +}


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-01-12  1:50 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-11 11:36 [PATCH v2 1/2] LoongArch: Redundant sign extension elimination optimization Li Wei
2024-01-12  1:49 ` chenglulu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).