public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2, rs6000] Use CC for BCD operations [PR100736]
@ 2022-06-22  8:26 HAO CHEN GUI
  2022-07-04  6:33 ` HAO CHEN GUI
  0 siblings, 1 reply; 4+ messages in thread
From: HAO CHEN GUI @ 2022-06-22  8:26 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, David, Kewen.Lin, Peter Bergner

Hi,
  This patch uses CC instead of CCFP for all BCD operations. Thus, infinite
math flag has no impact on BCD operations. To support BCD overflow and
invalid coding, an UNSPEC is defined to move the bit to a general register.
The patterns of condition branch and return with overflow bit are defined as
the UNSPEC and branch/return can be combined to one jump insn. The split
pattern of overflow bit extension is define for optimization.

  This patch also replaces bcdadd with bcdsub for BCD invaliding coding
expand.

ChangeLog
2022-06-22 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
	PR target/100736
	* config/rs6000/altivec.md (BCD_TEST): Remove unordered.
	(bcd<bcd_add_sub>_<mode>): Replace CCFP with CC.
	(*bcd<bcd_add_sub>_test_<mode>): Replace CCFP with CC.  Generate
	condition insn with CC mode.
	(bcd<bcd_add_sub>_overflow_<mode>): New.
	(*bcdoverflow_<mode>): New.
	(*bcdinvalid_<mode>): Removed.
	(bcdinvalid_<mode>): Implement by UNSPEC_BCDSUB and UNSPEC_BCD_OVERFLOW.
	(nuun): New.
	(*overflow_cbranch): New.
	(*overflow_creturn): New.
	(*overflow_extendsidi): New.
	(bcdshift_v16qi): Replace CCFP with CC.
	(bcdmul10_v16qi): Likewise.
	(bcddiv10_v16qi): Likewise.
	(peephole for bcd_add/sub): Likewise.
	* config/rs6000/rs6000-builtins.def (__builtin_bcdadd_ov_v1ti): Set
	pattern to bcdadd_overflow_v1ti.
	(__builtin_bcdadd_ov_v16qi): Set pattern to bcdadd_overflow_v16qi.
	(__builtin_bcdsub_ov_v1ti): Set pattern to bcdsub_overflow_v1ti.
	(__builtin_bcdsub_ov_v16qi): Set pattern to bcdsub_overflow_v16qi.

gcc/testsuite/
	PR target/100736
	* gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub.
	Scan no cror insns.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index efc8ae35c2e..26f131e61ea 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB])
 (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add")
 			      (UNSPEC_BCDSUB "sub")])

-(define_code_iterator BCD_TEST [eq lt le gt ge unordered])
+(define_code_iterator BCD_TEST [eq lt le gt ge])
 (define_mode_iterator VBCD [V1TI V16QI])

 (define_insn "bcd<bcd_add_sub>_<mode>"
@@ -4379,7 +4379,7 @@ (define_insn "bcd<bcd_add_sub>_<mode>"
 		      (match_operand:VBCD 2 "register_operand" "v")
 		      (match_operand:QI 3 "const_0_to_1_operand" "n")]
 		     UNSPEC_BCD_ADD_SUB))
-   (clobber (reg:CCFP CR6_REGNO))]
+   (clobber (reg:CC CR6_REGNO))]
   "TARGET_P8_VECTOR"
   "bcd<bcd_add_sub>. %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
@@ -4389,9 +4389,9 @@ (define_insn "bcd<bcd_add_sub>_<mode>"
 ;; UNORDERED test on an integer type (like V1TImode) is not defined.  The type
 ;; probably should be one that can go in the VMX (Altivec) registers, so we
 ;; can't use DDmode or DFmode.
-(define_insn "*bcd<bcd_add_sub>_test_<mode>"
-  [(set (reg:CCFP CR6_REGNO)
-	(compare:CCFP
+(define_insn "bcd<bcd_add_sub>_test_<mode>"
+  [(set (reg:CC CR6_REGNO)
+	(compare:CC
 	 (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")
 		       (match_operand:VBCD 2 "register_operand" "v")
 		       (match_operand:QI 3 "const_0_to_1_operand" "i")]
@@ -4408,8 +4408,8 @@ (define_insn "*bcd<bcd_add_sub>_test2_<mode>"
 		      (match_operand:VBCD 2 "register_operand" "v")
 		      (match_operand:QI 3 "const_0_to_1_operand" "i")]
 		     UNSPEC_BCD_ADD_SUB))
-   (set (reg:CCFP CR6_REGNO)
-	(compare:CCFP
+   (set (reg:CC CR6_REGNO)
+	(compare:CC
 	 (unspec:V2DF [(match_dup 1)
 		       (match_dup 2)
 		       (match_dup 3)]
@@ -4502,8 +4502,8 @@ (define_insn "vclrrb"
    [(set_attr "type" "vecsimple")])

 (define_expand "bcd<bcd_add_sub>_<code>_<mode>"
-  [(parallel [(set (reg:CCFP CR6_REGNO)
-		   (compare:CCFP
+  [(parallel [(set (reg:CC CR6_REGNO)
+		   (compare:CC
 		    (unspec:V2DF [(match_operand:VBCD 1 "register_operand")
 				  (match_operand:VBCD 2 "register_operand")
 				  (match_operand:QI 3 "const_0_to_1_operand")]
@@ -4511,46 +4511,138 @@ (define_expand "bcd<bcd_add_sub>_<code>_<mode>"
 		    (match_dup 4)))
 	      (clobber (match_scratch:VBCD 5))])
    (set (match_operand:SI 0 "register_operand")
-	(BCD_TEST:SI (reg:CCFP CR6_REGNO)
+	(BCD_TEST:SI (reg:CC CR6_REGNO)
 		     (const_int 0)))]
   "TARGET_P8_VECTOR"
 {
   operands[4] = CONST0_RTX (V2DFmode);
+  emit_insn (gen_bcd<bcd_add_sub>_test_<mode> (operands[0], operands[1],
+					       operands[2], operands[3],
+					       operands[4]));
+
+  rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO);
+  rtx condition_rtx = gen_rtx_<CODE> (SImode, cr6, const0_rtx);
+
+  if (<CODE> == GE || <CODE> == LE)
+    {
+      rtx not_result = gen_reg_rtx (CCEQmode);
+      rtx not_op, rev_cond_rtx;
+      rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (SImode, <CODE>),
+				     SImode, XEXP (condition_rtx, 0),
+				     const0_rtx);
+      not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
+      emit_insn (gen_rtx_SET (not_result, not_op));
+      condition_rtx = gen_rtx_EQ (SImode, not_result, const0_rtx);
+    }
+
+  emit_insn (gen_rtx_SET (operands[0], condition_rtx));
+  DONE;
 })

-(define_insn "*bcdinvalid_<mode>"
-  [(set (reg:CCFP CR6_REGNO)
-	(compare:CCFP
-	 (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")]
-		      UNSPEC_BCDADD)
-	 (match_operand:V2DF 2 "zero_constant" "j")))
-   (clobber (match_scratch:VBCD 0 "=v"))]
+(define_expand "bcd<bcd_add_sub>_overflow_<mode>"
+  [(parallel [(set (reg:CC CR6_REGNO)
+		   (compare:CC
+		    (unspec:V2DF [(match_operand:VBCD 1 "register_operand")
+				  (match_operand:VBCD 2 "register_operand")
+				  (match_operand:QI 3 "const_0_to_1_operand")]
+				 UNSPEC_BCD_ADD_SUB)
+		    (match_dup 4)))
+	      (clobber (match_scratch:VBCD 5))])
+   (set (match_operand:SI 0 "register_operand")
+	(unspec:SI [(reg:CC CR6_REGNO)
+		    (const_int 0)]
+		   UNSPEC_BCD_OVERFLOW))]
   "TARGET_P8_VECTOR"
-  "bcdadd. %0,%1,%1,0"
+{
+  operands[4] = CONST0_RTX (V2DFmode);
+})
+
+(define_insn "*bcdoverflow_<mode>"
+  [(set (match_operand:SDI 0 "register_operand" "=r")
+	(unspec:SDI [(reg:CC CR6_REGNO)
+		     (const_int 0)]
+		    UNSPEC_BCD_OVERFLOW))]
+  "TARGET_P8_VECTOR"
+  "mfcr %0,2\;rlwinm %0,%0,28,1"
   [(set_attr "type" "vecsimple")])

 (define_expand "bcdinvalid_<mode>"
-  [(parallel [(set (reg:CCFP CR6_REGNO)
-		   (compare:CCFP
-		    (unspec:V2DF [(match_operand:VBCD 1 "register_operand")]
-				 UNSPEC_BCDADD)
+  [(parallel [(set (reg:CC CR6_REGNO)
+		   (compare:CC
+		    (unspec:V2DF [(match_operand:VBCD 1 "register_operand")
+				  (match_dup 1)
+				  (const_int 0)]
+				 UNSPEC_BCDSUB)
 		    (match_dup 2)))
 	      (clobber (match_scratch:VBCD 3))])
    (set (match_operand:SI 0 "register_operand")
-	(unordered:SI (reg:CCFP CR6_REGNO)
-		      (const_int 0)))]
+	(unspec:SI [(reg:CC CR6_REGNO)
+		    (const_int 0)]
+		   UNSPEC_BCD_OVERFLOW))]
   "TARGET_P8_VECTOR"
 {
   operands[2] = CONST0_RTX (V2DFmode);
 })

+(define_code_attr nuun [(eq "nu")
+			(ne "un")])
+
+(define_insn "*overflow_cbranch"
+  [(set (pc)
+	(if_then_else (eqne
+		       (unspec:SI [(reg:CC CR6_REGNO)
+				   (const_int 0)]
+				  UNSPEC_BCD_OVERFLOW)
+		       (const_int 0))
+		      (label_ref (match_operand 0))
+		      (pc)))]
+  "TARGET_P8_VECTOR"
+  "b<nuun> 6,%l0"
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc))
+			       (const_int -32768))
+			   (lt (minus (match_dup 0) (pc))
+			       (const_int 32764)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+(define_insn "*overflow_creturn"
+  [(set (pc)
+	(if_then_else (eqne
+		       (unspec:SI [(reg:CC CR6_REGNO)
+				   (const_int 0)]
+				  UNSPEC_BCD_OVERFLOW)
+		       (const_int 0))
+		      (simple_return)
+		      (pc)))]
+  "TARGET_P8_VECTOR"
+  "b<nuun>lr 6"
+  [(set_attr "type" "jmpreg")])
+
+(define_insn_and_split "*overflow_extendsidi"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(sign_extend:DI
+	 (unspec:SI [(reg:CC CR6_REGNO)
+		     (const_int 0)]
+		    UNSPEC_BCD_OVERFLOW)))]
+  "TARGET_P8_VECTOR"
+  "#"
+  "&& 1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(unspec:DI [(reg:CC CR6_REGNO)
+		    (const_int 0)]
+		   UNSPEC_BCD_OVERFLOW))]
+  ""
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "bcdshift_v16qi"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
 		       (match_operand:V16QI 2 "register_operand" "v")
 		       (match_operand:QI 3 "const_0_to_1_operand" "n")]
 		     UNSPEC_BCDSHIFT))
-   (clobber (reg:CCFP CR6_REGNO))]
+   (clobber (reg:CC CR6_REGNO))]
   "TARGET_P8_VECTOR"
   "bcds. %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
@@ -4559,7 +4651,7 @@ (define_expand "bcdmul10_v16qi"
   [(set (match_operand:V16QI 0 "register_operand")
 	(unspec:V16QI [(match_operand:V16QI 1 "register_operand")]
 		      UNSPEC_BCDSHIFT))
-   (clobber (reg:CCFP CR6_REGNO))]
+   (clobber (reg:CC CR6_REGNO))]
   "TARGET_P9_VECTOR"
 {
   rtx one = gen_reg_rtx (V16QImode);
@@ -4574,7 +4666,7 @@ (define_expand "bcddiv10_v16qi"
   [(set (match_operand:V16QI 0 "register_operand")
 	(unspec:V16QI [(match_operand:V16QI 1 "register_operand")]
 		      UNSPEC_BCDSHIFT))
-   (clobber (reg:CCFP CR6_REGNO))]
+   (clobber (reg:CC CR6_REGNO))]
   "TARGET_P9_VECTOR"
 {
   rtx one = gen_reg_rtx (V16QImode);
@@ -4598,9 +4690,9 @@ (define_peephole2
 				 (match_operand:V1TI 2 "register_operand")
 				 (match_operand:QI 3 "const_0_to_1_operand")]
 				UNSPEC_BCD_ADD_SUB))
-	      (clobber (reg:CCFP CR6_REGNO))])
-   (parallel [(set (reg:CCFP CR6_REGNO)
-		   (compare:CCFP
+	      (clobber (reg:CC CR6_REGNO))])
+   (parallel [(set (reg:CC CR6_REGNO)
+		   (compare:CC
 		    (unspec:V2DF [(match_dup 1)
 				  (match_dup 2)
 				  (match_dup 3)]
@@ -4613,8 +4705,8 @@ (define_peephole2
 				 (match_dup 2)
 				 (match_dup 3)]
 				UNSPEC_BCD_ADD_SUB))
-	      (set (reg:CCFP CR6_REGNO)
-		   (compare:CCFP
+	      (set (reg:CC CR6_REGNO)
+		   (compare:CC
 		    (unspec:V2DF [(match_dup 1)
 				  (match_dup 2)
 				  (match_dup 3)]
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index f4a9f24bcc5..8e94fe5c438 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2371,10 +2371,10 @@
     BCDADD_LT_V16QI bcdadd_lt_v16qi {}

   const signed int __builtin_bcdadd_ov_v1ti (vsq, vsq, const int<1>);
-    BCDADD_OV_V1TI bcdadd_unordered_v1ti {}
+    BCDADD_OV_V1TI bcdadd_overflow_v1ti {}

   const signed int __builtin_bcdadd_ov_v16qi (vsc, vsc, const int<1>);
-    BCDADD_OV_V16QI bcdadd_unordered_v16qi {}
+    BCDADD_OV_V16QI bcdadd_overflow_v16qi {}

   const signed int __builtin_bcdinvalid_v1ti (vsq);
     BCDINVALID_V1TI bcdinvalid_v1ti {}
@@ -2419,10 +2419,10 @@
     BCDSUB_LT_V16QI bcdsub_lt_v16qi {}

   const signed int __builtin_bcdsub_ov_v1ti (vsq, vsq, const int<1>);
-    BCDSUB_OV_V1TI bcdsub_unordered_v1ti {}
+    BCDSUB_OV_V1TI bcdsub_overflow_v1ti {}

   const signed int __builtin_bcdsub_ov_v16qi (vsc, vsc, const int<1>);
-    BCDSUB_OV_V16QI bcdsub_unordered_v16qi {}
+    BCDSUB_OV_V16QI bcdsub_overflow_v16qi {}

   const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc);
     VPERMXOR_V16QI crypto_vpermxor_v16qi {}
diff --git a/gcc/testsuite/gcc.target/powerpc/bcd-4.c b/gcc/testsuite/gcc.target/powerpc/bcd-4.c
index 2c8554dfe82..3c25ed60e17 100644
--- a/gcc/testsuite/gcc.target/powerpc/bcd-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/bcd-4.c
@@ -2,10 +2,11 @@
 /* { dg-require-effective-target int128 } */
 /* { dg-require-effective-target power10_hw } */
 /* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */
-/* { dg-final { scan-assembler-times {\mbcdadd\M} 7 } } */
-/* { dg-final { scan-assembler-times {\mbcdsub\M} 18 } } */
+/* { dg-final { scan-assembler-times {\mbcdadd\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mbcdsub\M} 20 } } */
 /* { dg-final { scan-assembler-times {\mbcds\M} 2 } } */
 /* { dg-final { scan-assembler-times {\mdenbcdq\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mcror\M} 1 } } */

 #include <altivec.h>


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-09-21  5:25 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-22  8:26 [PATCH v2, rs6000] Use CC for BCD operations [PR100736] HAO CHEN GUI
2022-07-04  6:33 ` HAO CHEN GUI
2022-08-01  2:02   ` Ping^2 " HAO CHEN GUI
2022-09-21  5:25     ` Ping^3 " HAO CHEN GUI

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).