public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH-4, rs6000] Optimize single cc bit reverse implementation
@ 2024-04-30  7:20 HAO CHEN GUI
  0 siblings, 0 replies; only message in thread
From: HAO CHEN GUI @ 2024-04-30  7:20 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, David, Kewen.Lin, Peter Bergner

Hi,
  It's the forth patch of a series of patches optimizing CC modes on
rs6000.

  The single CC bit reverse can be implemented by setbcr on Power10 or
isel on Power9 or mfcr on Power8 and below. Originally CCFP is not
supported for isel and setbcr as bcd insns use CCFP and its bit reverse
is not the same as normal CCFP mode. Previous patches add new CC modes
according to the usage of CC bits. So now single CC bit reverse can be
supported on all CC modes with a uniform pattern.

  This patch removes unordered and ordered from codes list of CCFP with
finite_math_only set. These two are no needed as bcd insns use a separate
CC mode now. reverse_condition is replaced with rs6000_reverse_condition
as all CC modes can be reversed. A new isel version single CC bit reverse
pattern is added. fp and bcd CC reverse pattern are removed and a uniform
single CC bit reverse pattern is added, which is mfcr version.

  The new test cases illustrate the different implementation of single cc
bit reverse test.

  Bootstrapped and tested on powerpc64-linux BE and LE with no
regressions. Is it OK for the trunk?

Thanks
Gui Haochen

ChangeLog
rs6000: Optimize single cc bit reverse implementation

This patch implements single cc bit reverse by mfcr (on Power8 and below)
or isel (on Power9) or setbcr (on Power10) with all CC modes.

gcc/
	* config/rs6000/predicates.md (branch_comparison_operator): Remove
	unordered and ordered from CCFP with finite_math_only.
	(scc_comparison_operator): Add unle and unge.
	* config/rs6000/rs6000.md (CCANY): Add CCFP, CCBCD, CCLTEQ and CCEQ.
	(*isel_reversed_<CCANY:mode>_<GPR:mode>): Replace reverse_condition
	with rs6000_reverse_condition.
	(*set<GPR:mode><CCANY:mode>_rev): New insn_and_split pattern for
	single cc bit reverse P9 version.
	(fp_rev, ccbcd_rev): Remove.
	(*<code><mode>_cc): Remove the pattern for CCFP and CCBCD.  Merge
	them to...
	(*set<GPR:mode><CCANY:mode>_rev): ...this, the new insn_and_split
	pattern for single cc bit reverse P8 and below version.

gcc/testsuite/
	* gcc.target/powerpc/cc_rev.h: New.
	* gcc.target/powerpc/cc_rev_1.c: New.
	* gcc.target/powerpc/cc_rev_2.c: New.
	* gcc.target/powerpc/cc_rev_3.c: New.

patch.diff
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 322e7639fd4..ddb46799bff 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1348,7 +1348,7 @@ (define_predicate "branch_comparison_operator"
 	(match_test "GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC")
 	(if_then_else (match_test "GET_MODE (XEXP (op, 0)) == CCFPmode")
 	  (if_then_else (match_test "flag_finite_math_only")
-	    (match_code "lt,le,gt,ge,eq,ne,unordered,ordered")
+	    (match_code "lt,le,gt,ge,eq,ne")
 	    (match_code "lt,gt,eq,unordered,unge,unle,ne,ordered"))
 	  (if_then_else (match_test "GET_MODE (XEXP (op, 0)) == CCBCDmode")
 	    (match_code "lt,le,gt,ge,eq,ne,unordered,ordered")
@@ -1397,7 +1397,7 @@ (define_predicate "scc_comparison_operator"
 ;; an SCC insn.
 (define_predicate "scc_rev_comparison_operator"
   (and (match_operand 0 "branch_comparison_operator")
-       (match_code "ne,le,ge,leu,geu,ordered")))
+       (match_code "ne,le,ge,leu,geu,ordered,unle,unge")))

 ;; Return 1 if OP is a comparison operator suitable for floating point
 ;; vector/scalar comparisons that generate a -1/0 mask.
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 2c6255395d1..ccf392b6409 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5509,7 +5509,7 @@ (define_expand "mov<mode>cc"
 ;; leave out the mode in operand 4 and use one pattern, but reload can
 ;; change the mode underneath our feet and then gets confused trying
 ;; to reload the value.
-(define_mode_iterator CCANY [CC CCUNS])
+(define_mode_iterator CCANY [CC CCUNS CCFP CCBCD CCLTEQ CCEQ])
 (define_insn "isel_<CCANY:mode>_<GPR:mode>"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
 	(if_then_else:GPR
@@ -5536,7 +5536,8 @@ (define_insn "*isel_reversed_<CCANY:mode>_<GPR:mode>"
 	 (match_operand:GPR 3 "reg_or_zero_operand" "O,b")))]
   "TARGET_ISEL"
 {
-  PUT_CODE (operands[1], reverse_condition (GET_CODE (operands[1])));
+  PUT_CODE (operands[1], rs6000_reverse_condition (<CCANY:MODE>mode,
+						   GET_CODE (operands[1])));
   return "isel %0,%3,%2,%j1";
 }
   [(set_attr "type" "isel")])
@@ -12764,6 +12765,27 @@ (define_insn "set<mode>_cc"
 	(const_string "mfcr")))
    (set_attr "length" "8")])

+(define_insn_and_split "*set<GPR:mode><CCANY:mode>_rev"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(match_operator:GPR 1 "scc_rev_comparison_operator"
+			[(match_operand:CCANY 2 "cc_reg_operand" "y")
+			 (const_int 0)]))]
+  "TARGET_ISEL
+   && !TARGET_POWER10"
+  "#"
+  "&& 1"
+  [(set (match_dup 2)
+	(const_int 1))
+   (set (match_dup 0)
+	(if_then_else:GPR
+	  (match_dup 1)
+	  (match_dup 2)
+	  (const_int 0)))]
+{
+  operands[2] = can_create_pseudo_p ()
+		? operands[0] : gen_reg_rtx (<GPR:MODE>mode);
+}
+  [(set_attr "type" "isel")])

 (define_code_iterator cmp [eq ne lt ltu gt gtu le leu ge geu])
 (define_code_attr UNS [(eq "CC")
@@ -13243,42 +13265,25 @@ (define_insn_and_split "*nesi3_ext<mode>"
 		      (const_string "16")))])


-(define_code_iterator fp_rev [ordered ne unle unge])
 (define_code_iterator fp_two [ltgt le ge unlt ungt uneq])
-(define_code_iterator ccbcd_rev [ordered ne le ge])

-(define_insn_and_split "*<code><mode>_cc"
+(define_insn_and_split "*set<GPR:mode><CCANY:mode>_rev"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
-	(fp_rev:GPR (match_operand:CCFP 1 "cc_reg_operand" "y")
-		    (const_int 0)))]
-  "!flag_finite_math_only"
-  "#"
-  "&& 1"
-  [(pc)]
-{
-  rtx_code revcode = reverse_condition_maybe_unordered (<CODE>);
-  rtx eq = gen_rtx_fmt_ee (revcode, <MODE>mode, operands[1], const0_rtx);
-  rtx tmp = gen_reg_rtx (<MODE>mode);
-  emit_move_insn (tmp, eq);
-  emit_insn (gen_xor<mode>3 (operands[0], tmp, const1_rtx));
-  DONE;
-}
-  [(set_attr "length" "12")])
-
-(define_insn_and_split "*<code><mode>_cc"
-  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
-	(ccbcd_rev:GPR (match_operand:CCBCD 1 "cc_reg_operand" "y")
-		    (const_int 0)))]
-  ""
+	(match_operator:GPR 1 "scc_rev_comparison_operator"
+			[(match_operand:CCANY 2 "cc_reg_operand" "y")
+			 (const_int 0)]))]
+  "!TARGET_ISEL
+   && !TARGET_POWER10"
   "#"
   "&& 1"
   [(pc)]
 {
-  rtx_code revcode = reverse_condition (<CODE>);
-  rtx eq = gen_rtx_fmt_ee (revcode, <MODE>mode, operands[1], const0_rtx);
-  rtx tmp = gen_reg_rtx (<MODE>mode);
+  rtx_code revcode = rs6000_reverse_condition (<CCANY:MODE>mode,
+					       GET_CODE (operands[1]));
+  rtx eq = gen_rtx_fmt_ee (revcode, <GPR:MODE>mode, operands[2], const0_rtx);
+  rtx tmp = gen_reg_rtx (<GPR:MODE>mode);
   emit_move_insn (tmp, eq);
-  emit_insn (gen_xor<mode>3 (operands[0], tmp, const1_rtx));
+  emit_insn (gen_xor<GPR:mode>3 (operands[0], tmp, const1_rtx));
   DONE;
 }
   [(set_attr "length" "12")])
diff --git a/gcc/testsuite/gcc.target/powerpc/cc_rev.h b/gcc/testsuite/gcc.target/powerpc/cc_rev.h
new file mode 100644
index 00000000000..777718a8546
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/cc_rev.h
@@ -0,0 +1,11 @@
+#include <altivec.h>
+
+int test1 (vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_bcdcmpge (a, b);
+}
+
+int test2 (vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_bcdcmple (a, b);
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/cc_rev_1.c b/gcc/testsuite/gcc.target/powerpc/cc_rev_1.c
new file mode 100644
index 00000000000..b1125531e9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/cc_rev_1.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+/* { dg-final { scan-assembler-times {\mmfcr\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxori\M} 2 } } */
+
+#include "cc_rev.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/cc_rev_2.c b/gcc/testsuite/gcc.target/powerpc/cc_rev_2.c
new file mode 100644
index 00000000000..5938cedf230
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/cc_rev_2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-final { scan-assembler-times {\misel\M} 2 } } */
+
+#include "cc_rev.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/cc_rev_3.c b/gcc/testsuite/gcc.target/powerpc/cc_rev_3.c
new file mode 100644
index 00000000000..954ecf0586f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/cc_rev_3.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-final { scan-assembler-times {\msetbcr\M} 2 } } */
+
+#include "cc_rev.h"

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-04-30  7:20 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-30  7:20 [PATCH-4, rs6000] Optimize single cc bit reverse implementation HAO CHEN GUI

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).