public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] rs6000: Fix vector parity support [PR108699]
@ 2023-02-16  9:23 Kewen.Lin
  2023-02-16 11:14 ` Segher Boessenkool
  0 siblings, 1 reply; 7+ messages in thread
From: Kewen.Lin @ 2023-02-16  9:23 UTC (permalink / raw)
  To: GCC Patches
  Cc: Segher Boessenkool, David Edelsohn, Peter Bergner, Michael Meissner

Hi,

The failures on the original failed case builtin-bitops-1.c
and the associated test case pr108699.c here show that the
current support of parity vector mode is wrong on Power.
The hardware insns vprtyb[wdq] which operate on the least
significant bit of each byte per element, they doesn't match
what RTL opcode parity needs, but the current implementation
expands it with them wrongly.

This patch is to fix the handling with one more pre-insn
vpopcntb.  It also fixes an oversight having V8HI in VEC_IP,
replaces VParity with VEC_IP, and adjusts the existing
UNSPEC_PARITY to a more meaningful name UNSPEC_PARITYB.

I also noticed that we can make use of vpopcnt[bhwd] on
Power8 (AND with 1 on each element), but it's next stage1
content, I plan to support it with one subsequent patch
and make this patch focus on bug fixing.

Bootstrapped and regtested on powerpc64-linux-gnu P{7,8,9}
and powerpc64le-linux-gnu P10.

Is it ok for trunk?

BR,
Kewen
-----
	PR target/108699

gcc/ChangeLog:

	* config/rs6000/altivec.md (*p9v_parity<mode>2): Rename to ...
	(p9v_parityb<mode>2): ... this.  Adjust pattern with UNSPEC_PARITYB,
	and replace mode_iterator VParity with VEC_IP.
	(mode_iterator VParity): Remove.
	* config/rs6000/rs6000-builtins.def (VPRTYBD): Replace parityv2di2 with
	p9v_paritybv2di2.
	(VPRTYBW): Replace parityv4si2 with p9v_paritybv4si2.
	(VPRTYBQ): Replace parityv1ti2 with p9v_paritybv1ti2.
	* config/rs6000/rs6000.cc (rs6000_emit_parity): Replace
	gen_paritysi2_cmpb with gen_paritybsi2, and replace gen_paritydi2_cmpb
	with gen_paritybdi2
	* config/rs6000/rs6000.md (parity<mode>2_cmpb): Rename to ...
	(parityb<mode>2): ... this.
	(UNSPEC_PARITY): Rename to ...
	(UNSPEC_PARITYB): ... this.
	* config/rs6000/vector.md (mode_iterator VEC_IP): Remove V8HI.
	(parity<mode>2 with VEC_IP): Expand with popcountv16qi2 and the
	corresponding vector parity byte p9v_parityb<mode>2.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/p9-vparity.c: Add scan-assembler-not for vpopcntb
	to distinguish parity byte from parity.
	* gcc.target/powerpc/pr108699.c: New test.
---
 gcc/config/rs6000/altivec.md                  | 15 +++----
 gcc/config/rs6000/rs6000-builtins.def         |  6 +--
 gcc/config/rs6000/rs6000.cc                   |  4 +-
 gcc/config/rs6000/rs6000.md                   |  7 ++--
 gcc/config/rs6000/vector.md                   | 14 +++++--
 gcc/testsuite/gcc.target/powerpc/p9-vparity.c |  1 +
 gcc/testsuite/gcc.target/powerpc/pr108699.c   | 42 +++++++++++++++++++
 7 files changed, 68 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108699.c

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 30606b8ab21..87053aa69b5 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -215,13 +215,6 @@ (define_mode_iterator VM2 [V4SI
 ;; versus floating point
 (define_mode_attr VS_sxwsp [(V4SI "sxw") (V4SF "sp")])

-;; Specific iterator for parity which does not have a byte/half-word form, but
-;; does have a quad word form
-(define_mode_iterator VParity [V4SI
-			       V2DI
-			       V1TI
-			       TI])
-
 (define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")])
 (define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")])
 (define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
@@ -4195,9 +4188,11 @@ (define_insn "*p8v_popcount<mode>2"
   [(set_attr "type" "vecsimple")])

 ;; Vector parity
-(define_insn "*p9v_parity<mode>2"
-  [(set (match_operand:VParity 0 "register_operand" "=v")
-        (parity:VParity (match_operand:VParity 1 "register_operand" "v")))]
+(define_insn "p9v_parityb<mode>2"
+  [(set (match_operand:VEC_IP 0 "register_operand" "=v")
+        (unspec:VEC_IP
+          [(match_operand:VEC_IP 1 "register_operand" "v")]
+          UNSPEC_PARITYB))]
   "TARGET_P9_VECTOR"
   "vprtyb<wd> %0,%1"
   [(set_attr "type" "vecsimple")])
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index e0d9f5adc97..182e3fc5bdc 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2666,13 +2666,13 @@
     VMSUMUDM altivec_vmsumudm {}

   const vsll __builtin_altivec_vprtybd (vsll);
-    VPRTYBD parityv2di2 {}
+    VPRTYBD p9v_paritybv2di2 {}

   const vsq __builtin_altivec_vprtybq (vsq);
-    VPRTYBQ parityv1ti2 {}
+    VPRTYBQ p9v_paritybv1ti2 {}

   const vsi __builtin_altivec_vprtybw (vsi);
-    VPRTYBW parityv4si2 {}
+    VPRTYBW p9v_paritybv4si2 {}

   const vsll __builtin_altivec_vrldmi (vsll, vsll, vsll);
     VRLDMI altivec_vrldmi {}
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 16ca3a31757..bfa1060e55a 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -22973,12 +22973,12 @@ rs6000_emit_parity (rtx dst, rtx src)
       if (mode == SImode)
 	{
 	  emit_insn (gen_popcntbsi2 (tmp, src));
-	  emit_insn (gen_paritysi2_cmpb (dst, tmp));
+	  emit_insn (gen_paritybsi2 (dst, tmp));
 	}
       else
 	{
 	  emit_insn (gen_popcntbdi2 (tmp, src));
-	  emit_insn (gen_paritydi2_cmpb (dst, tmp));
+	  emit_insn (gen_paritybdi2 (dst, tmp));
 	}
       return;
     }
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4a7812fa592..100ea115c5a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -109,7 +109,7 @@ (define_c_enum "unspec"
    UNSPEC_MACHOPIC_OFFSET
    UNSPEC_BPERM
    UNSPEC_COPYSIGN
-   UNSPEC_PARITY
+   UNSPEC_PARITYB
    UNSPEC_CMPB
    UNSPEC_FCTIW
    UNSPEC_FCTID
@@ -2501,9 +2501,10 @@ (define_expand "parity<mode>2"
   DONE;
 })

-(define_insn "parity<mode>2_cmpb"
+(define_insn "parityb<mode>2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
-	(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
+	(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")]
+		    UNSPEC_PARITYB))]
   "TARGET_CMPB && TARGET_POPCNTB"
   "prty<wd> %0,%1"
   [(set_attr "type" "popcnt")])
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 12fd5f976ed..a70c9a2043d 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -33,8 +33,7 @@ (define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")])
 (define_mode_iterator VEC_TI [V1TI TI])

 ;; Vector int modes for parity
-(define_mode_iterator VEC_IP [V8HI
-			      V4SI
+(define_mode_iterator VEC_IP [V4SI
 			      V2DI
 			      V1TI
 			      TI])
@@ -1226,7 +1225,16 @@ (define_expand "popcount<mode>2"
 (define_expand "parity<mode>2"
   [(set (match_operand:VEC_IP 0 "register_operand")
 	(parity:VEC_IP (match_operand:VEC_IP 1 "register_operand")))]
-  "TARGET_P9_VECTOR")
+  "TARGET_P9_VECTOR"
+{
+  rtx op1 = gen_lowpart (V16QImode, operands[1]);
+  rtx res = gen_reg_rtx (V16QImode);
+  emit_insn (gen_popcountv16qi2 (res, op1));
+  emit_insn (gen_p9v_parityb<mode>2 (operands[0],
+				    gen_lowpart (<MODE>mode, res)));
+
+  DONE;
+})



 ;; Same size conversions
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vparity.c b/gcc/testsuite/gcc.target/powerpc/p9-vparity.c
index f4aba1567cd..8f6f1239f7a 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vparity.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vparity.c
@@ -105,3 +105,4 @@ parity_ti_4u (__uint128_t a)
 /* { dg-final { scan-assembler "vprtybd" } } */
 /* { dg-final { scan-assembler "vprtybq" } } */
 /* { dg-final { scan-assembler "vprtybw" } } */
+/* { dg-final { scan-assembler-not "vpopcntb" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108699.c b/gcc/testsuite/gcc.target/powerpc/pr108699.c
new file mode 100644
index 00000000000..f02bac130cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108699.c
@@ -0,0 +1,42 @@
+/* { dg-run } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#define N 16
+
+unsigned long long vals[N];
+unsigned int res[N];
+unsigned int expects[N] = {0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+unsigned long long inputs[N]
+  = {0x0000000000000000ULL, 0x0000000000000001ULL, 0x8000000000000000ULL,
+     0x0000000000000002ULL, 0x4000000000000000ULL, 0x0000000100000000ULL,
+     0x0000000080000000ULL, 0xa5a5a5a5a5a5a5a5ULL, 0x5a5a5a5a5a5a5a5aULL,
+     0xcafecafe00000000ULL, 0x0000cafecafe0000ULL, 0x00000000cafecafeULL,
+     0x8070600000000000ULL, 0xffffffffffffffffULL};
+
+__attribute__ ((noipa)) void
+init ()
+{
+  for (int i = 0; i < N; i++)
+    vals[i] = inputs[i];
+}
+
+__attribute__ ((noipa)) void
+do_parity ()
+{
+  for (int i = 0; i < N; i++)
+    res[i] = __builtin_parityll (vals[i]);
+}
+
+int
+main (void)
+{
+  init ();
+  do_parity ();
+  for (int i = 0; i < N; i++)
+    if (res[i] != expects[i])
+      __builtin_abort();
+
+  return 0;
+}
+
--
2.27.0

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-02-20  9:48 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-16  9:23 [PATCH] rs6000: Fix vector parity support [PR108699] Kewen.Lin
2023-02-16 11:14 ` Segher Boessenkool
2023-02-16 12:06   ` Kewen.Lin
2023-02-16 15:10     ` Segher Boessenkool
2023-02-17  3:33       ` Kewen.Lin
2023-02-19 12:12         ` Segher Boessenkool
2023-02-20  2:01           ` Kewen.Lin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).