public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCHv2, rs6000] Add two peephole2 patterns for mr. insn
@ 2023-06-12  2:34 HAO CHEN GUI
  2023-06-13  2:17 ` Kewen.Lin
  0 siblings, 1 reply; 2+ messages in thread
From: HAO CHEN GUI @ 2023-06-12  2:34 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, David, Kewen.Lin, Peter Bergner

Hi,
  This patch adds two peephole2 patterns which help convert certain insn
sequences to "mr." instruction. These insn sequences can't be combined in
combine pass.

  Compared to last version, it adds a new mode iterator "Q" which should
be used for dot instruction. With "-m32/-mpowerpc64" set, the dot
instruction should compare DImode with 0, not the SImode.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen


ChangeLog
rs6000: Add two peephole patterns for "mr." insn

When investigating the issue mentioned in PR87871#c30 - if compare
and move pattern benefits before RA, I checked the assembly generated
for SPEC2017 and found that certain insn sequences aren't converted to
"mr." instructions.
Following two sequence are never to be combined to "mr." pattern as
there is no register link between them. This patch adds two peephole2
patterns to convert them to "mr." instructions.

cmp 0,3,0
mr 4,3

mr 4,3
cmp 0,3,0

The patch also creates a new mode iterator which decided by
TARGET_POWERPC64.  This mode iterator is used in "mr." and its split
pattern.  The original P iterator is wrong when -m32/-mpowerpc64 is set.
In this situation, the "mr." should compares the whole 64-bit register
with 0 other than the low 32-bit one.

gcc/
	* config/rs6000/rs6000.md (peephole2 for compare_and_move): New.
	(peephole2 for move_and_compare): New.
	(mode_iterator Q): New.  Set the mode to SI/DImode by
	TARGET_POWERPC64.
	(*mov<mode>_internal2): Change the mode iterator from P to Q.
	(split pattern for compare_and_move): Likewise.

gcc/testsuite/
	* gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New.
	* gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b0db8ae508d..fdb5b6ed22a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -491,6 +491,7 @@ (define_mode_iterator SDI [SI DI])
 ; The size of a pointer.  Also, the size of the value that a record-condition
 ; (one with a '.') will compare; and the size used for arithmetic carries.
 (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
+(define_mode_iterator Q [(SI "!TARGET_POWERPC64") (DI "TARGET_POWERPC64")])

 ; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
 ; PTImode is GPR only)
@@ -7879,9 +7880,9 @@ (define_split

 (define_insn "*mov<mode>_internal2"
   [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y")
-	(compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r")
+	(compare:CC (match_operand:Q 1 "gpc_reg_operand" "0,r,r")
 		    (const_int 0)))
-   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
+   (set (match_operand:Q 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
   ""
   "@
    cmp<wd>i %2,%0,0
@@ -7891,11 +7892,41 @@ (define_insn "*mov<mode>_internal2"
    (set_attr "dot" "yes")
    (set_attr "length" "4,4,8")])

+(define_peephole2
+  [(set (match_operand:CC 2 "cc_reg_operand" "")
+	(compare:CC (match_operand:Q 1 "int_reg_operand" "")
+		    (const_int 0)))
+   (set (match_operand:Q 0 "int_reg_operand" "")
+	(match_dup 1))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+		   (compare:CC (match_operand:Q 1 "int_reg_operand" "r")
+		   (const_int 0)))
+	      (set (match_operand:Q 0 "int_reg_operand" "=r")
+		   (match_dup 1))])]
+  ""
+)
+
+(define_peephole2
+  [(set (match_operand:Q 0 "int_reg_operand" "")
+	(match_operand:Q 1 "int_reg_operand" ""))
+   (set (match_operand:CC 2 "cc_reg_operand" "")
+	(compare:CC (match_dup 1)
+		    (const_int 0)))]
+  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
+  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+		   (compare:CC (match_operand:GPR 1 "int_reg_operand" "r")
+		   (const_int 0)))
+	      (set (match_operand:Q 0 "int_reg_operand" "=r")
+		   (match_dup 1))])]
+  ""
+)
+
 (define_split
   [(set (match_operand:CC 2 "cc_reg_not_cr0_operand")
-	(compare:CC (match_operand:P 1 "gpc_reg_operand")
+	(compare:CC (match_operand:Q 1 "gpc_reg_operand")
 		    (const_int 0)))
-   (set (match_operand:P 0 "gpc_reg_operand") (match_dup 1))]
+   (set (match_operand:Q 0 "gpc_reg_operand") (match_dup 1))]
   "reload_completed"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2)
diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
new file mode 100644
index 00000000000..29234dea7c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
@@ -0,0 +1,60 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
+/* { dg-options "-O2 -mregnames" } */
+
+/* Following instruction sequence is found in assembly of
+   Perl_block_start, which is a function of op.c in SPEC2017
+   perlbench.  It can be never combined to a move and compare
+   instruction in combine pass.  A peephole pattern is needed to
+   converted the sequence to a "mr." instruction.
+
+	cmpdi 0,9,0
+	mr 12,9
+
+   This test case is an analogue of the source code and verifies
+   if the peephole2 patterns work.
+*/
+
+int __RTL (startwith ("peephole2")) compare_move_peephole ()
+{
+(function "compare_move_peephole"
+  (insn-chain
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 8 (set (reg:CC %cr0)
+                    (compare:CC (reg:SI %r3)
+                        (const_int 0))))
+      (cinsn 2 (set (reg:SI %r4)
+                    (reg:SI %r3)))
+      ;; Extra insn to avoid the above being deleted by DCE.
+      (cinsn 18 (use (reg:SI %r4)))
+      (cinsn 19 (use (reg:CC %cr0)))
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+) ;; function "main"
+}
+
+int __RTL (startwith ("peephole2")) move_compare_peephole ()
+{
+(function "move_compare_peephole"
+  (insn-chain
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 2 (set (reg:SI %r4)
+                    (reg:SI %r3)))
+      (cinsn 8 (set (reg:CC %cr0)
+                    (compare:CC (reg:SI %r3)
+                        (const_int 0))))
+      ;; Extra insn to avoid the above being deleted by DCE.
+      (cinsn 18 (use (reg:SI %r4)))
+      (cinsn 19 (use (reg:CC %cr0)))
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+) ;; function "main"
+}
+
+/* { dg-final { scan-assembler-times {\mmr\.} 2 } } */
diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_64.c b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_64.c
new file mode 100644
index 00000000000..dd360033dbd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_64.c
@@ -0,0 +1,60 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-O2 -mregnames" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+/* Following instruction sequence is found in assembly of
+   Perl_block_start, which is a function of op.c in SPEC2017
+   perlbench.  It can be never combined to a move and compare
+   instruction in combine pass.  A peephole pattern is needed to
+   converted the sequence to a "mr." instruction.
+
+	cmpdi 0,9,0
+	mr 12,9
+
+   This test case is an analogue of the source code and verifies
+   if the peephole2 patterns work.
+*/
+
+int __RTL (startwith ("peephole2")) compare_move_peephole ()
+{
+(function "compare_move_peephole"
+  (insn-chain
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 8 (set (reg:CC %cr0)
+                    (compare:CC (reg:DI %r3)
+                        (const_int 0))))
+      (cinsn 2 (set (reg:DI %r4)
+                    (reg:DI %r3)))
+      ;; Extra insn to avoid the above being deleted by DCE.
+      (cinsn 18 (use (reg:DI %r4)))
+      (cinsn 19 (use (reg:CC %cr0)))
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+) ;; function "main"
+}
+
+int __RTL (startwith ("peephole2")) move_compare_peephole ()
+{
+(function "move_compare_peephole"
+  (insn-chain
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 2 (set (reg:DI %r4)
+                    (reg:DI %r3)))
+      (cinsn 8 (set (reg:CC %cr0)
+                    (compare:CC (reg:DI %r3)
+                        (const_int 0))))
+      ;; Extra insn to avoid the above being deleted by DCE.
+      (cinsn 18 (use (reg:DI %r4)))
+      (cinsn 19 (use (reg:CC %cr0)))
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+) ;; function "main"
+}
+
+/* { dg-final { scan-assembler-times {\mmr\.} 2 } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCHv2, rs6000] Add two peephole2 patterns for mr. insn
  2023-06-12  2:34 [PATCHv2, rs6000] Add two peephole2 patterns for mr. insn HAO CHEN GUI
@ 2023-06-13  2:17 ` Kewen.Lin
  0 siblings, 0 replies; 2+ messages in thread
From: Kewen.Lin @ 2023-06-13  2:17 UTC (permalink / raw)
  To: HAO CHEN GUI; +Cc: Segher Boessenkool, David, Peter Bergner, gcc-patches

Hi Haochen,

on 2023/6/12 10:34, HAO CHEN GUI wrote:
> Hi,
>   This patch adds two peephole2 patterns which help convert certain insn
> sequences to "mr." instruction. These insn sequences can't be combined in
> combine pass.
> 
>   Compared to last version, it adds a new mode iterator "Q" which should
> be used for dot instruction. With "-m32/-mpowerpc64" set, the dot
> instruction should compare DImode with 0, not the SImode.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> 
> Thanks
> Gui Haochen
> 
> 
> ChangeLog
> rs6000: Add two peephole patterns for "mr." insn
> 
> When investigating the issue mentioned in PR87871#c30 - if compare
> and move pattern benefits before RA, I checked the assembly generated
> for SPEC2017 and found that certain insn sequences aren't converted to
> "mr." instructions.
> Following two sequence are never to be combined to "mr." pattern as
> there is no register link between them. This patch adds two peephole2
> patterns to convert them to "mr." instructions.
> 
> cmp 0,3,0
> mr 4,3
> 
> mr 4,3
> cmp 0,3,0
> 
> The patch also creates a new mode iterator which decided by
> TARGET_POWERPC64.  This mode iterator is used in "mr." and its split
> pattern.  The original P iterator is wrong when -m32/-mpowerpc64 is set.
> In this situation, the "mr." should compares the whole 64-bit register
> with 0 other than the low 32-bit one.
> 
> gcc/
> 	* config/rs6000/rs6000.md (peephole2 for compare_and_move): New.
> 	(peephole2 for move_and_compare): New.
> 	(mode_iterator Q): New.  Set the mode to SI/DImode by
> 	TARGET_POWERPC64.
> 	(*mov<mode>_internal2): Change the mode iterator from P to Q.
> 	(split pattern for compare_and_move): Likewise.
> 
> gcc/testsuite/
> 	* gcc.dg/rtl/powerpc/move_compare_peephole_32.c: New.
> 	* gcc.dg/rtl/powerpc/move_compare_peephole_64.c: New.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index b0db8ae508d..fdb5b6ed22a 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -491,6 +491,7 @@ (define_mode_iterator SDI [SI DI])
>  ; The size of a pointer.  Also, the size of the value that a record-condition
>  ; (one with a '.') will compare; and the size used for arithmetic carries.
>  (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
> +(define_mode_iterator Q [(SI "!TARGET_POWERPC64") (DI "TARGET_POWERPC64")])

The P mode iterator performs like Pmode, so it's named as P, but here the
proposed Q looks random?  It's a bad name IMHO, since you want this mode
iterator to perform like word_mode, I'd suggest WORD instead of Q.

> 
>  ; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
>  ; PTImode is GPR only)
> @@ -7879,9 +7880,9 @@ (define_split
> 
>  (define_insn "*mov<mode>_internal2"
>    [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y")
> -	(compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r")
> +	(compare:CC (match_operand:Q 1 "gpc_reg_operand" "0,r,r")
>  		    (const_int 0)))
> -   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
> +   (set (match_operand:Q 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
>    ""
>    "@
>     cmp<wd>i %2,%0,0
> @@ -7891,11 +7892,41 @@ (define_insn "*mov<mode>_internal2"
>     (set_attr "dot" "yes")
>     (set_attr "length" "4,4,8")])
> 
> +(define_peephole2
> +  [(set (match_operand:CC 2 "cc_reg_operand" "")
> +	(compare:CC (match_operand:Q 1 "int_reg_operand" "")
> +		    (const_int 0)))
> +   (set (match_operand:Q 0 "int_reg_operand" "")
> +	(match_dup 1))]
> +  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
> +  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> +		   (compare:CC (match_operand:Q 1 "int_reg_operand" "r")
> +		   (const_int 0)))
> +	      (set (match_operand:Q 0 "int_reg_operand" "=r")
> +		   (match_dup 1))])]
> +  ""
> +)
> +
> +(define_peephole2
> +  [(set (match_operand:Q 0 "int_reg_operand" "")
> +	(match_operand:Q 1 "int_reg_operand" ""))
> +   (set (match_operand:CC 2 "cc_reg_operand" "")
> +	(compare:CC (match_dup 1)
> +		    (const_int 0)))]
> +  "!cc_reg_not_cr0_operand (operands[2], CCmode)"
> +  [(parallel [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> +		   (compare:CC (match_operand:GPR 1 "int_reg_operand" "r")
> +		   (const_int 0)))
> +	      (set (match_operand:Q 0 "int_reg_operand" "=r")
> +		   (match_dup 1))])]
> +  ""
> +)
> +
>  (define_split
>    [(set (match_operand:CC 2 "cc_reg_not_cr0_operand")
> -	(compare:CC (match_operand:P 1 "gpc_reg_operand")
> +	(compare:CC (match_operand:Q 1 "gpc_reg_operand")
>  		    (const_int 0)))
> -   (set (match_operand:P 0 "gpc_reg_operand") (match_dup 1))]
> +   (set (match_operand:Q 0 "gpc_reg_operand") (match_dup 1))]
>    "reload_completed"
>    [(set (match_dup 0) (match_dup 1))
>     (set (match_dup 2)
> diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
> new file mode 100644
> index 00000000000..29234dea7c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_32.c
> @@ -0,0 +1,60 @@
> +/* { dg-do compile { target powerpc*-*-* } } */
> +/* { dg-skip-if "" { has_arch_ppc64 } } */
> +/* { dg-options "-O2 -mregnames" } */
> +
> +/* Following instruction sequence is found in assembly of
> +   Perl_block_start, which is a function of op.c in SPEC2017
> +   perlbench.  It can be never combined to a move and compare
> +   instruction in combine pass.  A peephole pattern is needed to
> +   converted the sequence to a "mr." instruction.
> +
> +	cmpdi 0,9,0
> +	mr 12,9
> +
> +   This test case is an analogue of the source code and verifies
> +   if the peephole2 patterns work.
> +*/

Thanks for adding this.  The others look good to me!

BR,
Kewen

> +
> +int __RTL (startwith ("peephole2")) compare_move_peephole ()
> +{
> +(function "compare_move_peephole"
> +  (insn-chain
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 8 (set (reg:CC %cr0)
> +                    (compare:CC (reg:SI %r3)
> +                        (const_int 0))))
> +      (cinsn 2 (set (reg:SI %r4)
> +                    (reg:SI %r3)))
> +      ;; Extra insn to avoid the above being deleted by DCE.
> +      (cinsn 18 (use (reg:SI %r4)))
> +      (cinsn 19 (use (reg:CC %cr0)))
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +) ;; function "main"
> +}
> +
> +int __RTL (startwith ("peephole2")) move_compare_peephole ()
> +{
> +(function "move_compare_peephole"
> +  (insn-chain
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 2 (set (reg:SI %r4)
> +                    (reg:SI %r3)))
> +      (cinsn 8 (set (reg:CC %cr0)
> +                    (compare:CC (reg:SI %r3)
> +                        (const_int 0))))
> +      ;; Extra insn to avoid the above being deleted by DCE.
> +      (cinsn 18 (use (reg:SI %r4)))
> +      (cinsn 19 (use (reg:CC %cr0)))
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +) ;; function "main"
> +}
> +
> +/* { dg-final { scan-assembler-times {\mmr\.} 2 } } */
> diff --git a/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_64.c b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_64.c
> new file mode 100644
> index 00000000000..dd360033dbd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/rtl/powerpc/move_compare_peephole_64.c
> @@ -0,0 +1,60 @@
> +/* { dg-do compile { target powerpc*-*-* } } */
> +/* { dg-options "-O2 -mregnames" } */
> +/* { dg-require-effective-target has_arch_ppc64 } */
> +
> +/* Following instruction sequence is found in assembly of
> +   Perl_block_start, which is a function of op.c in SPEC2017
> +   perlbench.  It can be never combined to a move and compare
> +   instruction in combine pass.  A peephole pattern is needed to
> +   converted the sequence to a "mr." instruction.
> +
> +	cmpdi 0,9,0
> +	mr 12,9
> +
> +   This test case is an analogue of the source code and verifies
> +   if the peephole2 patterns work.
> +*/
> +
> +int __RTL (startwith ("peephole2")) compare_move_peephole ()
> +{
> +(function "compare_move_peephole"
> +  (insn-chain
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 8 (set (reg:CC %cr0)
> +                    (compare:CC (reg:DI %r3)
> +                        (const_int 0))))
> +      (cinsn 2 (set (reg:DI %r4)
> +                    (reg:DI %r3)))
> +      ;; Extra insn to avoid the above being deleted by DCE.
> +      (cinsn 18 (use (reg:DI %r4)))
> +      (cinsn 19 (use (reg:CC %cr0)))
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +) ;; function "main"
> +}
> +
> +int __RTL (startwith ("peephole2")) move_compare_peephole ()
> +{
> +(function "move_compare_peephole"
> +  (insn-chain
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 2 (set (reg:DI %r4)
> +                    (reg:DI %r3)))
> +      (cinsn 8 (set (reg:CC %cr0)
> +                    (compare:CC (reg:DI %r3)
> +                        (const_int 0))))
> +      ;; Extra insn to avoid the above being deleted by DCE.
> +      (cinsn 18 (use (reg:DI %r4)))
> +      (cinsn 19 (use (reg:CC %cr0)))
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +) ;; function "main"
> +}
> +
> +/* { dg-final { scan-assembler-times {\mmr\.} 2 } } */


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-06-13  2:18 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-12  2:34 [PATCHv2, rs6000] Add two peephole2 patterns for mr. insn HAO CHEN GUI
2023-06-13  2:17 ` Kewen.Lin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).