public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* rs6000:  RFC/Update support for addg6s instruction.  PR100693
@ 2022-03-16 17:20 will schmidt
  2022-03-16 18:12 ` Segher Boessenkool
  0 siblings, 1 reply; 4+ messages in thread
From: will schmidt @ 2022-03-16 17:20 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, David Edelsohn, Peter Bergner

Hi,

RFC/Update support for addg6s instruction.  PR100693

For PR100693, we currently provide an addg6s builtin using unsigned
int arguments, but we are missing an unsigned long long argument
equivalent.  This patch adds an overload to provide the long long
version of the builtin.

unsigned long long __builtin_addg6s (unsigned long long, unsigned long long);

RFC/concerns: This patch works, but looking briefly at intermediate stages
is not behaving quite as I expected.   Looking at the intermediate dumps, I
see in pr100693.original that calls I expect to be routed to the internal
__builtin_addg6s_si() that uses (unsigned int) arguments are instead being
handled by __builtin_addg6s_di() with casts that convert the arguments to
(unsigned long long).
i.e.
 return (unsigned int) __builtin_addg6s_di
                 ((long long unsigned int) a, (long long unsigned int) b);

As a test, I see if I swap the order of the builtins in rs6000-overload.def
I end up with code casting the ULL values to UI, which provides truncated
results, and is similar to what occurs today without this patch.

All that said, this patch seems to work.  OK for next stage 1?
Tested on power8BE as well as LE power8,power9,power10.

2022-03-15  Will Schmidt  <will_schmidt@vnet.ibm.com>

gcc/
	PR target/100693
	* config/rs6000/rs600-builtins.def: Remove entry for __builtin_addgs()
	  and add entries for __builtin_addg6s_di() and __builtin_addg6s_si().
	* config/rs6000/rs6000-overload.def: Add overloaded entries allowing
	  __builtin_addg6s() to map to either of the __builtin_addg6s_{di,si}
	  builtins.
	* config/rs6000/rs6000.md: Add UNSPEC_ADDG6S_SI and UNSPEC_ADDG6S_DI
	  unspecs.   Add define_insn entries for addg6s_si and addg6s_di based
	  on those unspecs.
	* doc/extend.texi:  Add entry for ULL __builtin_addg6s (ULL, ULL);

testsuite/
	PR target/100693
	* gcc.target/powerpc/pr100693.c:  New test.

diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index ae2760c33389..4c23cac26932 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1993,12 +1993,16 @@
     XXSPLTD_V2DI vsx_xxspltd_v2di {}
 
 
 ; Power7 builtins (ISA 2.06).
 [power7]
-  const unsigned int __builtin_addg6s (unsigned int, unsigned int);
-    ADDG6S addg6s {}
+  const unsigned long long __builtin_addg6s_di (unsigned long long, \
+						unsigned long long);
+    ADDG6S_DI addg6s_di {}
+
+  const unsigned int __builtin_addg6s_si (unsigned int, unsigned int);
+    ADDG6S_SI addg6s_si {}
 
   const signed long __builtin_bpermd (signed long, signed long);
     BPERMD bpermd_di {32bit}
 
   const unsigned int __builtin_cbcdtd (unsigned int);
diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
index 44e2945aaa0e..931f85b738c5 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -76,10 +76,15 @@
 ; Blank lines may be used as desired in this file between the lines as
 ; defined above; that is, you can introduce as many extra newlines as you
 ; like after a required newline, but nowhere else.  Lines beginning with
 ; a semicolon are also treated as blank lines.
 
+[ADDG6S, __builtin_i_addg6s, __builtin_addg6s]
+  unsigned long long __builtin_addg6s_di (signed long long, unsigned long long);
+    ADDG6S_DI
+  unsigned int __builtin_addg6s_si (unsigned int, unsigned int);
+    ADDG6S_SI
 
 [BCDADD, __builtin_bcdadd, __builtin_vec_bcdadd]
   vsq __builtin_vec_bcdadd (vsq, vsq, const int);
     BCDADD_V1TI
   vuc __builtin_vec_bcdadd (vuc, vuc, const int);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index fdfbc6566a5c..d040f127eb55 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -122,11 +122,12 @@ (define_c_enum "unspec"
    UNSPEC_P8V_MTVSRWZ
    UNSPEC_P8V_RELOAD_FROM_GPR
    UNSPEC_P8V_MTVSRD
    UNSPEC_P8V_XXPERMDI
    UNSPEC_P8V_RELOAD_FROM_VSX
-   UNSPEC_ADDG6S
+   UNSPEC_ADDG6S_SI
+   UNSPEC_ADDG6S_DI
    UNSPEC_CDTBCD
    UNSPEC_CBCDTD
    UNSPEC_DIVE
    UNSPEC_DIVEU
    UNSPEC_UNPACK_128BIT
@@ -14495,15 +14496,24 @@ (define_peephole2
   operands[5] = change_address (mem, <ALTIVEC_DFORM:MODE>mode, new_addr);
 })
    
 \f
 ;; Miscellaneous ISA 2.06 (power7) instructions
-(define_insn "addg6s"
+(define_insn "addg6s_si"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
 		    (match_operand:SI 2 "register_operand" "r")]
-		   UNSPEC_ADDG6S))]
+		   UNSPEC_ADDG6S_SI))]
+  "TARGET_POPCNTD"
+  "addg6s %0,%1,%2"
+  [(set_attr "type" "integer")])
+
+(define_insn "addg6s_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "register_operand" "r")]
+		   UNSPEC_ADDG6S_DI))]
   "TARGET_POPCNTD"
   "addg6s %0,%1,%2"
   [(set_attr "type" "integer")])
 
 (define_insn "cdtbcd"
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 0dc752e8aadd..9eeb962f7363 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18072,10 +18072,11 @@ addition to the @option{-maltivec}, @option{-mpopcntd}, and
 @option{-mvsx} options.
 
 The following basic built-in functions require @option{-mpopcntd}:
 @smallexample
 unsigned int __builtin_addg6s (unsigned int, unsigned int);
+unsigned long long __builtin_addg6s (unsigned long long, unsigned long long);
 long long __builtin_bpermd (long long, long long);
 unsigned int __builtin_cbcdtd (unsigned int);
 unsigned int __builtin_cdtbcd (unsigned int);
 long long __builtin_divde (long long, long long);
 unsigned long long __builtin_divdeu (unsigned long long, unsigned long long);
diff --git a/gcc/testsuite/gcc.target/powerpc/pr100693.c b/gcc/testsuite/gcc.target/powerpc/pr100693.c
new file mode 100644
index 000000000000..31fd118ee0d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr100693.c
@@ -0,0 +1,68 @@
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mdejagnu-cpu=power7 -O2" } */
+/* { dg-final { scan-assembler-times {\maddg6s\M} 4 } } */
+/* { dg-final { scan-assembler-not    "bl __builtin" } } */
+
+/* Test case for the addg6s builtin, exercising both
+ * unsigned int and unsigned long long arguments.  See also bcd-1.c.  */
+
+#include <stdio.h>
+
+unsigned int test1 (unsigned int a, unsigned int b)
+{
+    return __builtin_addg6s (a, b);
+}
+
+unsigned long long test2 (unsigned long long a, unsigned long long b)
+{
+    return __builtin_addg6s (a, b);
+}
+
+/* Expected values, Not a full pattern, these are tuned
+ *  to match the sparse iterations as seen below.  */
+unsigned int exp_int[] = {
+0x00000000,
+0x00000006,
+0x00000666,
+0x00006666,
+0x00666666,
+0x06666666,
+0x77777777
+};
+
+unsigned long long exp_longlong[] = {
+0x0000000000000000,
+0x0000000000000006,
+0x0000000000000666,
+0x0000000000006666,
+0x0000000000666666,
+0x0000000006666666,
+0x0000000666666666,
+0x0000006666666666,
+0x0000666666666666, 
+0x0006666666666666, 
+0x0666666666666666, 
+0x7777777777777777
+};
+
+int main() {
+	unsigned long long z;
+	unsigned int ux;
+	unsigned long long uxl;
+	int idx;
+
+	for (z=0,idx=0 ; z<=31; z+=6,idx++) {
+		ux = test1(0x01<<z,0xffffffff);
+		if (exp_int[idx]!=ux)
+			printf("%08lx i:%d %08lx \n",ux,idx,exp_int[idx]);
+	}
+
+	for (z=0,idx=0 ; z<=63; z+=6,idx++) {
+		uxl = test2(0x01LL<<z,0xffffffffffffffff);
+		if (exp_longlong[idx]!=uxl)
+			printf("%016llx i:%d %016llx \n",uxl,idx,exp_longlong[idx]);
+	}
+}
+


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-03-16 21:00 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-16 17:20 rs6000: RFC/Update support for addg6s instruction. PR100693 will schmidt
2022-03-16 18:12 ` Segher Boessenkool
2022-03-16 20:06   ` will schmidt
2022-03-16 20:59     ` Segher Boessenkool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).