From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <meissner@sourceware.org>
Received: by sourceware.org (Postfix, from userid 1005)
	id 3BC433858281; Tue, 23 Jan 2024 07:10:54 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 3BC433858281
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1705993854;
	bh=iDixY0M8s4uMqCftCUTco4Ado7z2bBVNMbx7je524xY=;
	h=From:To:Subject:Date:From;
	b=tXAfWH8fQDxyy+93b/AzZl5VuasMdy7THXJ8o6qyi/+xeW0CiQQZ2PVhccYtThJik
	 tMow1BdYO1KJa4URRdcS9YVc5jjteGDLhaxakj4QLu+JPdTmk8Fx+GXCuaoWFMOjUN
	 5eM4CBuVLTmfHzHsG+P5Bx5eR68yq+l7apUGc0U4=
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: Michael Meissner <meissner@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc(refs/users/meissner/heads/work154-vpair)] Add vector pair init
 and splat.
X-Act-Checkin: gcc
X-Git-Author: Michael Meissner <meissner@linux.ibm.com>
X-Git-Refname: refs/users/meissner/heads/work154-vpair
X-Git-Oldrev: d6c48ffd5b8e43023e3efbdd1aa20b7830b82fb8
X-Git-Newrev: 0ef492d559dd4505072f9511510f6bba3faeb995
Message-Id: <20240123071054.3BC433858281@sourceware.org>
Date: Tue, 23 Jan 2024 07:10:54 +0000 (GMT)
List-Id: <gcc-cvs.sourceware.org>

https://gcc.gnu.org/g:0ef492d559dd4505072f9511510f6bba3faeb995

commit 0ef492d559dd4505072f9511510f6bba3faeb995
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Jan 23 02:09:44 2024 -0500

    Add vector pair init and splat.
    
    2024-01-23  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New
            built-in function.
            (__builtin_vpair_f32_splat): Likewise.
            (__builtin_vpair_f64_splat): Likewise.
            * config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec.
            (UNSPEC_VPAIR_SPLAT): Likewise.
            (VPAIR_SPLAT_VMODE): New mode iterator.
            (VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute.
            (vpair_splat_name): Likewise.
            (vpair_zero): New insn.
            (vpair_splat_<vpair_splat_name>): New define_expand.
            (vpair_splat_<vpair_splat_name>_internal): New insns.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vector-pair-5.c: New test.
            * gcc.target/powerpc/vector-pair-6.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def            |  10 +++
 gcc/config/rs6000/vector-pair.md                 | 102 ++++++++++++++++++++++-
 gcc/doc/extend.texi                              |   9 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-5.c |  56 +++++++++++++
 gcc/testsuite/gcc.target/powerpc/vector-pair-6.c |  56 +++++++++++++
 5 files changed, 232 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index 4362cbb8fc7..b757a8630ff 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -4132,6 +4132,10 @@
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
     STXVP nothing {mma,pair}
 
+;; Vector pair built-in functions.
+  v256 __builtin_vpair_zero ();
+    VPAIR_ZERO vpair_zero {mma}
+
 ;; Vector pair built-in functions with float elements
   v256 __builtin_vpair_f32_abs (v256);
     VPAIR_F32_ABS vpair_abs_v8sf2 {mma}
@@ -4169,6 +4173,9 @@
   v256 __builtin_vpair_f32_nfms (v256, v256, v256);
     VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma}
 
+  v256 __builtin_vpair_f32_splat (float);
+    VPAIR_F32_SPLAT vpair_splat_v8sf {mma}
+
   v256 __builtin_vpair_f32_sub (v256, v256);
     VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
 
@@ -4209,5 +4216,8 @@
   v256 __builtin_vpair_f64_nfms (v256, v256, v256);
     VPAIR_F64_NFMS vpair_nfms_v4df4 {mma}
 
+  v256 __builtin_vpair_f64_splat (double);
+    VPAIR_F64_SPLAT vpair_splat_v4df {mma}
+
   v256 __builtin_vpair_f64_sub (v256, v256);
     VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index 73ae46e6d40..39b419c6814 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -38,7 +38,9 @@
    UNSPEC_VPAIR_NEG
    UNSPEC_VPAIR_PLUS
    UNSPEC_VPAIR_SMAX
-   UNSPEC_VPAIR_SMIN])
+   UNSPEC_VPAIR_SMIN
+   UNSPEC_VPAIR_ZERO
+   UNSPEC_VPAIR_SPLAT])
 
 ;; Vector pair element ID that defines the scaler element within the vector pair.
 (define_c_enum "vpair_element"
@@ -98,6 +100,104 @@
 ;; Map the scalar element ID into the appropriate insn type for divide.
 (define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT  "vecfdiv")
 				(VPAIR_ELEMENT_DOUBLE "vecdiv")])
+
+;; Mode iterator for the vector modes that we provide splat operations for.
+(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF])
+
+;; Map element mode to 128-bit vector mode for splat operations
+(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF")
+						(DF "V2DF")])
+
+;; Map either element mode or vector mode into the name for the splat insn.
+(define_mode_attr vpair_splat_name [(SF   "v8sf")
+				    (DF   "v4df")
+				    (V4SF "v8sf")
+				    (V2DF "v4df")])
+
+;; Initialize a vector pair to 0
+(define_insn_and_split "vpair_zero"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+	(unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 1) (match_dup 3))
+   (set (match_dup 2) (match_dup 3))]
+{
+  rtx op0 = operands[0];
+
+  operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0);
+  operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16);
+  operands[3] = CONST0_RTX (V2DFmode);
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "vecperm")])
+
+;; Create a vector pair with a value splat'ed (duplicated) to all of the
+;; elements.
+(define_expand "vpair_splat_<vpair_splat_name>"
+  [(use (match_operand:OO 0 "vsx_register_operand"))
+   (use (match_operand:SFDF 1 "input_operand"))]
+  "TARGET_MMA"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  machine_mode element_mode = <MODE>mode;
+
+  if (op1 == CONST0_RTX (element_mode))
+    {
+      emit_insn (gen_vpair_zero (op0));
+      DONE;
+    }
+
+  machine_mode vector_mode = <VPAIR_SPLAT_ELEMENT_TO_VMODE>mode;
+  rtx vec = gen_reg_rtx (vector_mode);
+  unsigned num_elements = GET_MODE_NUNITS (vector_mode);
+  rtvec elements = rtvec_alloc (num_elements);
+  for (size_t i = 0; i < num_elements; i++)
+    RTVEC_ELT (elements, i) = copy_rtx (op1);
+
+  rs6000_expand_vector_init (vec, gen_rtx_PARALLEL (vector_mode, elements));
+  emit_insn (gen_vpair_splat_<vpair_splat_name>_internal (op0, vec));
+  DONE;
+})
+
+;; Inner splat support.  Operand1 is the vector splat created above.  Allow
+;; operand 1 to overlap with the output registers to eliminate one move
+;; instruction.
+(define_insn_and_split "vpair_splat_<vpair_splat_name>_internal"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+	(unspec:OO
+	 [(match_operand:VPAIR_SPLAT_VMODE 1 "vsx_register_operand" "0,wa")]
+	 UNSPEC_VPAIR_SPLAT))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op0_a = simplify_gen_subreg (<MODE>mode, op0, OOmode, 0);
+  rtx op0_b = simplify_gen_subreg (<MODE>mode, op0, OOmode, 16);
+  rtx op1 = operands[1];
+  unsigned op1_regno = reg_or_subregno (op1);
+
+  /* Check if the input is one of the output registers.  */
+  if (op1_regno == reg_or_subregno (op0_a))
+    emit_move_insn (op0_b, op1);
+
+  else if (op1_regno == reg_or_subregno (op0_b))
+    emit_move_insn (op0_a, op1);
+
+  else
+    {
+      emit_move_insn (op0_a, op1);
+      emit_move_insn (op0_b, op1);
+    }
+
+  DONE;
+}
+  [(set_attr "length" "*,8")
+   (set_attr "type" "vecmove")])
 
 ;; Vector pair unary operations.  The last argument in the UNSPEC is a
 ;; CONST_INT which identifies what the scalar element is.
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 08d977515dc..d455d0c5624 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -23887,6 +23887,13 @@ The @code{nfma} built-in is a combination of @code{neg} of the
 The @code{nfms} built-in is a combination of @code{neg} of the
 @code{fms} built-in.
 
+The following built-in function is independent on the type of the
+underlying vector:
+
+@smallexample
+__vector_pair __builtin_vpair_zero ();
+@end smallexample
+
 The following built-in functions operate on pairs of
 @code{vector float} values:
 
@@ -23907,6 +23914,7 @@ __vector_pair __builtin_vpair_f32_nfma (__vector_pair, __vector_pair,
                                        __vector_pair);
 __vector_pair __builtin_vpair_f32_nfms (__vector_pair, __vector_pair,
                                        __vector_pair);
+__vector_pair __builtin_vpair_f32_splat (float);
 __vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair);
 @end smallexample
 
@@ -23930,6 +23938,7 @@ __vector_pair __builtin_vpair_f64_nfma (__vector_pair, __vector_pair,
                                        __vector_pair);
 __vector_pair __builtin_vpair_f64_nfms (__vector_pair, __vector_pair,
                                        __vector_pair);
+__vector_pair __builtin_vpair_f64_splat (double);
 __vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair);
 @end smallexample
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c
new file mode 100644
index 00000000000..9b645e626e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code generates the expected instructions for
+   vector pairs zero and splat functions for vector pairs containing
+   doubles.  */
+
+void
+test_zero (__vector_pair *p)
+{
+  /* 2 xxspltib/xxlxor.  */
+  *p = __builtin_vpair_zero ();
+}
+
+void
+test_splat_zero (__vector_pair *p)
+{
+  /* 2 xxspltib/xxlxor.  */
+  *p = __builtin_vpair_f64_splat (0.0);
+}
+
+void
+test_splat_one (__vector_pair *p)
+{
+  /* xxspltidp, xxlor.  */
+  *p = __builtin_vpair_f64_splat (1.0);
+}
+
+void
+test_splat_pi (__vector_pair *p)
+{
+  /* plxv, xxlor (note, we cannot use xxspltidp).  */
+  *p = __builtin_vpair_f64_splat (3.1415926535);
+}
+
+void
+test_splat_arg (__vector_pair *p, double x)
+{
+  /* xxpermdi, xxlor.  */
+  *p = __builtin_vpair_f64_splat (x);
+}
+
+void
+test_splat_mem (__vector_pair *p, double *q)
+{
+  /* lxvdsx, xxlor.  */
+  *p = __builtin_vpair_f64_splat (*q);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvdsx\M}              1 } } */
+/* { dg-final { scan-assembler-times {\mp?lxvx?\M}             1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}               6 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M}            1 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M|\mxxlxor\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}           1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c
new file mode 100644
index 00000000000..5ec53d4bfc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code generates the expected instructions for
+   vector pairs zero and splat functions for vector pairs containing
+   floats.  */
+
+void
+test_zero (__vector_pair *p)
+{
+  /* 2 xxspltib/xxlxor.  */
+  *p = __builtin_vpair_zero ();
+}
+
+void
+test_splat_zero (__vector_pair *p)
+{
+  /* 2 xxspltib/xxlxor.  */
+  *p = __builtin_vpair_f32_splat (0.0f);
+}
+
+void
+test_splat_one (__vector_pair *p)
+{
+  /* xxspltiw, xxlor.  */
+  *p = __builtin_vpair_f32_splat (1.0f);
+}
+
+void
+test_splat_pi (__vector_pair *p)
+{
+  /* xxspltiw, xxlor.  */
+  *p = __builtin_vpair_f32_splat (3.1415926535f);
+}
+
+void
+test_splat_arg (__vector_pair *p, float x)
+{
+  /* xscvdpspn, xxspltw, xxlor.  */
+  *p = __builtin_vpair_f32_splat (x);
+}
+
+void
+test_splat_mem (__vector_pair *p, float *q)
+{
+  /* xlvwsx, xxlor.  */
+  *p = __builtin_vpair_f32_splat (*q);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvwsx\M}              1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}               6 } } */
+/* { dg-final { scan-assembler-times {\mxscvdpspn\M}           1 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M|\mxxlxor\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxspltiw\M}            2 } } */
+/* { dg-final { scan-assembler-times {\mxxspltw\M}             1 } } */