[PATCH v2, rs6000] Use CC for BCD operations [PR100736]

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: HAO CHEN GUI <guihaoc@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Segher Boessenkool <segher@kernel.crashing.org>,
	David <dje.gcc@gmail.com>, "Kewen.Lin" <linkw@linux.ibm.com>,
	Peter Bergner <bergner@linux.ibm.com>
Subject: [PATCH v2, rs6000] Use CC for BCD operations [PR100736]
Date: Wed, 22 Jun 2022 16:26:15 +0800	[thread overview]
Message-ID: <85f7e36e-4a24-0e9b-ad8e-56f85cabf5b5@linux.ibm.com> (raw)

Hi,
  This patch uses CC instead of CCFP for all BCD operations. Thus, infinite
math flag has no impact on BCD operations. To support BCD overflow and
invalid coding, an UNSPEC is defined to move the bit to a general register.
The patterns of condition branch and return with overflow bit are defined as
the UNSPEC and branch/return can be combined to one jump insn. The split
pattern of overflow bit extension is define for optimization.

  This patch also replaces bcdadd with bcdsub for BCD invaliding coding
expand.

ChangeLog
2022-06-22 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
	PR target/100736
	* config/rs6000/altivec.md (BCD_TEST): Remove unordered.
	(bcd<bcd_add_sub>_<mode>): Replace CCFP with CC.
	(*bcd<bcd_add_sub>_test_<mode>): Replace CCFP with CC.  Generate
	condition insn with CC mode.
	(bcd<bcd_add_sub>_overflow_<mode>): New.
	(*bcdoverflow_<mode>): New.
	(*bcdinvalid_<mode>): Removed.
	(bcdinvalid_<mode>): Implement by UNSPEC_BCDSUB and UNSPEC_BCD_OVERFLOW.
	(nuun): New.
	(*overflow_cbranch): New.
	(*overflow_creturn): New.
	(*overflow_extendsidi): New.
	(bcdshift_v16qi): Replace CCFP with CC.
	(bcdmul10_v16qi): Likewise.
	(bcddiv10_v16qi): Likewise.
	(peephole for bcd_add/sub): Likewise.
	* config/rs6000/rs6000-builtins.def (__builtin_bcdadd_ov_v1ti): Set
	pattern to bcdadd_overflow_v1ti.
	(__builtin_bcdadd_ov_v16qi): Set pattern to bcdadd_overflow_v16qi.
	(__builtin_bcdsub_ov_v1ti): Set pattern to bcdsub_overflow_v1ti.
	(__builtin_bcdsub_ov_v16qi): Set pattern to bcdsub_overflow_v16qi.

gcc/testsuite/
	PR target/100736
	* gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub.
	Scan no cror insns.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index efc8ae35c2e..26f131e61ea 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB])
 (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add")
 			      (UNSPEC_BCDSUB "sub")])

-(define_code_iterator BCD_TEST [eq lt le gt ge unordered])
+(define_code_iterator BCD_TEST [eq lt le gt ge])
 (define_mode_iterator VBCD [V1TI V16QI])

 (define_insn "bcd<bcd_add_sub>_<mode>"
@@ -4379,7 +4379,7 @@ (define_insn "bcd<bcd_add_sub>_<mode>"
 		      (match_operand:VBCD 2 "register_operand" "v")
 		      (match_operand:QI 3 "const_0_to_1_operand" "n")]
 		     UNSPEC_BCD_ADD_SUB))
-   (clobber (reg:CCFP CR6_REGNO))]
+   (clobber (reg:CC CR6_REGNO))]
   "TARGET_P8_VECTOR"
   "bcd<bcd_add_sub>. %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
@@ -4389,9 +4389,9 @@ (define_insn "bcd<bcd_add_sub>_<mode>"
 ;; UNORDERED test on an integer type (like V1TImode) is not defined.  The type
 ;; probably should be one that can go in the VMX (Altivec) registers, so we
 ;; can't use DDmode or DFmode.
-(define_insn "*bcd<bcd_add_sub>_test_<mode>"
-  [(set (reg:CCFP CR6_REGNO)
-	(compare:CCFP
+(define_insn "bcd<bcd_add_sub>_test_<mode>"
+  [(set (reg:CC CR6_REGNO)
+	(compare:CC
 	 (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")
 		       (match_operand:VBCD 2 "register_operand" "v")
 		       (match_operand:QI 3 "const_0_to_1_operand" "i")]
@@ -4408,8 +4408,8 @@ (define_insn "*bcd<bcd_add_sub>_test2_<mode>"
 		      (match_operand:VBCD 2 "register_operand" "v")
 		      (match_operand:QI 3 "const_0_to_1_operand" "i")]
 		     UNSPEC_BCD_ADD_SUB))
-   (set (reg:CCFP CR6_REGNO)
-	(compare:CCFP
+   (set (reg:CC CR6_REGNO)
+	(compare:CC
 	 (unspec:V2DF [(match_dup 1)
 		       (match_dup 2)
 		       (match_dup 3)]
@@ -4502,8 +4502,8 @@ (define_insn "vclrrb"
    [(set_attr "type" "vecsimple")])

 (define_expand "bcd<bcd_add_sub>_<code>_<mode>"
-  [(parallel [(set (reg:CCFP CR6_REGNO)
-		   (compare:CCFP
+  [(parallel [(set (reg:CC CR6_REGNO)
+		   (compare:CC
 		    (unspec:V2DF [(match_operand:VBCD 1 "register_operand")
 				  (match_operand:VBCD 2 "register_operand")
 				  (match_operand:QI 3 "const_0_to_1_operand")]
@@ -4511,46 +4511,138 @@ (define_expand "bcd<bcd_add_sub>_<code>_<mode>"
 		    (match_dup 4)))
 	      (clobber (match_scratch:VBCD 5))])
    (set (match_operand:SI 0 "register_operand")
-	(BCD_TEST:SI (reg:CCFP CR6_REGNO)
+	(BCD_TEST:SI (reg:CC CR6_REGNO)
 		     (const_int 0)))]
   "TARGET_P8_VECTOR"
 {
   operands[4] = CONST0_RTX (V2DFmode);
+  emit_insn (gen_bcd<bcd_add_sub>_test_<mode> (operands[0], operands[1],
+					       operands[2], operands[3],
+					       operands[4]));
+
+  rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO);
+  rtx condition_rtx = gen_rtx_<CODE> (SImode, cr6, const0_rtx);
+
+  if (<CODE> == GE || <CODE> == LE)
+    {
+      rtx not_result = gen_reg_rtx (CCEQmode);
+      rtx not_op, rev_cond_rtx;
+      rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (SImode, <CODE>),
+				     SImode, XEXP (condition_rtx, 0),
+				     const0_rtx);
+      not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
+      emit_insn (gen_rtx_SET (not_result, not_op));
+      condition_rtx = gen_rtx_EQ (SImode, not_result, const0_rtx);
+    }
+
+  emit_insn (gen_rtx_SET (operands[0], condition_rtx));
+  DONE;
 })

-(define_insn "*bcdinvalid_<mode>"
-  [(set (reg:CCFP CR6_REGNO)
-	(compare:CCFP
-	 (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")]
-		      UNSPEC_BCDADD)
-	 (match_operand:V2DF 2 "zero_constant" "j")))
-   (clobber (match_scratch:VBCD 0 "=v"))]
+(define_expand "bcd<bcd_add_sub>_overflow_<mode>"
+  [(parallel [(set (reg:CC CR6_REGNO)
+		   (compare:CC
+		    (unspec:V2DF [(match_operand:VBCD 1 "register_operand")
+				  (match_operand:VBCD 2 "register_operand")
+				  (match_operand:QI 3 "const_0_to_1_operand")]
+				 UNSPEC_BCD_ADD_SUB)
+		    (match_dup 4)))
+	      (clobber (match_scratch:VBCD 5))])
+   (set (match_operand:SI 0 "register_operand")
+	(unspec:SI [(reg:CC CR6_REGNO)
+		    (const_int 0)]
+		   UNSPEC_BCD_OVERFLOW))]
   "TARGET_P8_VECTOR"
-  "bcdadd. %0,%1,%1,0"
+{
+  operands[4] = CONST0_RTX (V2DFmode);
+})
+
+(define_insn "*bcdoverflow_<mode>"
+  [(set (match_operand:SDI 0 "register_operand" "=r")
+	(unspec:SDI [(reg:CC CR6_REGNO)
+		     (const_int 0)]
+		    UNSPEC_BCD_OVERFLOW))]
+  "TARGET_P8_VECTOR"
+  "mfcr %0,2\;rlwinm %0,%0,28,1"
   [(set_attr "type" "vecsimple")])

 (define_expand "bcdinvalid_<mode>"
-  [(parallel [(set (reg:CCFP CR6_REGNO)
-		   (compare:CCFP
-		    (unspec:V2DF [(match_operand:VBCD 1 "register_operand")]
-				 UNSPEC_BCDADD)
+  [(parallel [(set (reg:CC CR6_REGNO)
+		   (compare:CC
+		    (unspec:V2DF [(match_operand:VBCD 1 "register_operand")
+				  (match_dup 1)
+				  (const_int 0)]
+				 UNSPEC_BCDSUB)
 		    (match_dup 2)))
 	      (clobber (match_scratch:VBCD 3))])
    (set (match_operand:SI 0 "register_operand")
-	(unordered:SI (reg:CCFP CR6_REGNO)
-		      (const_int 0)))]
+	(unspec:SI [(reg:CC CR6_REGNO)
+		    (const_int 0)]
+		   UNSPEC_BCD_OVERFLOW))]
   "TARGET_P8_VECTOR"
 {
   operands[2] = CONST0_RTX (V2DFmode);
 })

+(define_code_attr nuun [(eq "nu")
+			(ne "un")])
+
+(define_insn "*overflow_cbranch"
+  [(set (pc)
+	(if_then_else (eqne
+		       (unspec:SI [(reg:CC CR6_REGNO)
+				   (const_int 0)]
+				  UNSPEC_BCD_OVERFLOW)
+		       (const_int 0))
+		      (label_ref (match_operand 0))
+		      (pc)))]
+  "TARGET_P8_VECTOR"
+  "b<nuun> 6,%l0"
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc))
+			       (const_int -32768))
+			   (lt (minus (match_dup 0) (pc))
+			       (const_int 32764)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+(define_insn "*overflow_creturn"
+  [(set (pc)
+	(if_then_else (eqne
+		       (unspec:SI [(reg:CC CR6_REGNO)
+				   (const_int 0)]
+				  UNSPEC_BCD_OVERFLOW)
+		       (const_int 0))
+		      (simple_return)
+		      (pc)))]
+  "TARGET_P8_VECTOR"
+  "b<nuun>lr 6"
+  [(set_attr "type" "jmpreg")])
+
+(define_insn_and_split "*overflow_extendsidi"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(sign_extend:DI
+	 (unspec:SI [(reg:CC CR6_REGNO)
+		     (const_int 0)]
+		    UNSPEC_BCD_OVERFLOW)))]
+  "TARGET_P8_VECTOR"
+  "#"
+  "&& 1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(unspec:DI [(reg:CC CR6_REGNO)
+		    (const_int 0)]
+		   UNSPEC_BCD_OVERFLOW))]
+  ""
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "bcdshift_v16qi"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
 		       (match_operand:V16QI 2 "register_operand" "v")
 		       (match_operand:QI 3 "const_0_to_1_operand" "n")]
 		     UNSPEC_BCDSHIFT))
-   (clobber (reg:CCFP CR6_REGNO))]
+   (clobber (reg:CC CR6_REGNO))]
   "TARGET_P8_VECTOR"
   "bcds. %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
@@ -4559,7 +4651,7 @@ (define_expand "bcdmul10_v16qi"
   [(set (match_operand:V16QI 0 "register_operand")
 	(unspec:V16QI [(match_operand:V16QI 1 "register_operand")]
 		      UNSPEC_BCDSHIFT))
-   (clobber (reg:CCFP CR6_REGNO))]
+   (clobber (reg:CC CR6_REGNO))]
   "TARGET_P9_VECTOR"
 {
   rtx one = gen_reg_rtx (V16QImode);
@@ -4574,7 +4666,7 @@ (define_expand "bcddiv10_v16qi"
   [(set (match_operand:V16QI 0 "register_operand")
 	(unspec:V16QI [(match_operand:V16QI 1 "register_operand")]
 		      UNSPEC_BCDSHIFT))
-   (clobber (reg:CCFP CR6_REGNO))]
+   (clobber (reg:CC CR6_REGNO))]
   "TARGET_P9_VECTOR"
 {
   rtx one = gen_reg_rtx (V16QImode);
@@ -4598,9 +4690,9 @@ (define_peephole2
 				 (match_operand:V1TI 2 "register_operand")
 				 (match_operand:QI 3 "const_0_to_1_operand")]
 				UNSPEC_BCD_ADD_SUB))
-	      (clobber (reg:CCFP CR6_REGNO))])
-   (parallel [(set (reg:CCFP CR6_REGNO)
-		   (compare:CCFP
+	      (clobber (reg:CC CR6_REGNO))])
+   (parallel [(set (reg:CC CR6_REGNO)
+		   (compare:CC
 		    (unspec:V2DF [(match_dup 1)
 				  (match_dup 2)
 				  (match_dup 3)]
@@ -4613,8 +4705,8 @@ (define_peephole2
 				 (match_dup 2)
 				 (match_dup 3)]
 				UNSPEC_BCD_ADD_SUB))
-	      (set (reg:CCFP CR6_REGNO)
-		   (compare:CCFP
+	      (set (reg:CC CR6_REGNO)
+		   (compare:CC
 		    (unspec:V2DF [(match_dup 1)
 				  (match_dup 2)
 				  (match_dup 3)]
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index f4a9f24bcc5..8e94fe5c438 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2371,10 +2371,10 @@
     BCDADD_LT_V16QI bcdadd_lt_v16qi {}

   const signed int __builtin_bcdadd_ov_v1ti (vsq, vsq, const int<1>);
-    BCDADD_OV_V1TI bcdadd_unordered_v1ti {}
+    BCDADD_OV_V1TI bcdadd_overflow_v1ti {}

   const signed int __builtin_bcdadd_ov_v16qi (vsc, vsc, const int<1>);
-    BCDADD_OV_V16QI bcdadd_unordered_v16qi {}
+    BCDADD_OV_V16QI bcdadd_overflow_v16qi {}

   const signed int __builtin_bcdinvalid_v1ti (vsq);
     BCDINVALID_V1TI bcdinvalid_v1ti {}
@@ -2419,10 +2419,10 @@
     BCDSUB_LT_V16QI bcdsub_lt_v16qi {}

   const signed int __builtin_bcdsub_ov_v1ti (vsq, vsq, const int<1>);
-    BCDSUB_OV_V1TI bcdsub_unordered_v1ti {}
+    BCDSUB_OV_V1TI bcdsub_overflow_v1ti {}

   const signed int __builtin_bcdsub_ov_v16qi (vsc, vsc, const int<1>);
-    BCDSUB_OV_V16QI bcdsub_unordered_v16qi {}
+    BCDSUB_OV_V16QI bcdsub_overflow_v16qi {}

   const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc);
     VPERMXOR_V16QI crypto_vpermxor_v16qi {}
diff --git a/gcc/testsuite/gcc.target/powerpc/bcd-4.c b/gcc/testsuite/gcc.target/powerpc/bcd-4.c
index 2c8554dfe82..3c25ed60e17 100644
--- a/gcc/testsuite/gcc.target/powerpc/bcd-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/bcd-4.c
@@ -2,10 +2,11 @@
 /* { dg-require-effective-target int128 } */
 /* { dg-require-effective-target power10_hw } */
 /* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */
-/* { dg-final { scan-assembler-times {\mbcdadd\M} 7 } } */
-/* { dg-final { scan-assembler-times {\mbcdsub\M} 18 } } */
+/* { dg-final { scan-assembler-times {\mbcdadd\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mbcdsub\M} 20 } } */
 /* { dg-final { scan-assembler-times {\mbcds\M} 2 } } */
 /* { dg-final { scan-assembler-times {\mdenbcdq\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mcror\M} 1 } } */

 #include <altivec.h>

next             reply	other threads:[~2022-06-22  8:26 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-22  8:26 HAO CHEN GUI [this message]
2022-07-04  6:33 ` HAO CHEN GUI
2022-08-01  2:02   ` Ping^2 " HAO CHEN GUI
2022-09-21  5:25     ` Ping^3 " HAO CHEN GUI

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=85f7e36e-4a24-0e9b-ad8e-56f85cabf5b5@linux.ibm.com \
    --to=guihaoc@linux.ibm.com \
    --cc=bergner@linux.ibm.com \
    --cc=dje.gcc@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=linkw@linux.ibm.com \
    --cc=segher@kernel.crashing.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).