public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work146-vpair)] Add support for doing a horizontal add on vector pair elements.
@ 2023-11-17 20:45 Michael Meissner
0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2023-11-17 20:45 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:10825586fc933a60c741e1990aa14055f24011c1
commit 10825586fc933a60c741e1990aa14055f24011c1
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Fri Nov 17 15:43:52 2023 -0500
Add support for doing a horizontal add on vector pair elements.
This patch adds a series of built-in functions to allow users to write code to
do a number of simple operations where the loop is done using the __vector_pair
type. The __vector_pair type is an opaque type. These built-in functions keep
the two 128-bit vectors within the __vector_pair together, and split the
operation after register allocation.
This patch provides vector pair built-in functions to do a horizontal add on
vector pair elements. Only floating point and 64-bit horizontal adds are
provided in this patch.
I have built and tested these patches on:
* A little endian power10 server using --with-cpu=power10
* A little endian power9 server using --with-cpu=power9
* A big endian power9 server using --with-cpu=power9.
Can I check this patch into the master branch after the preceeding patches have
been checked in?
2023-11-17 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/rs6000-builtins.def (__builtin_vpair_f32_add_elements):
New built-in function.
(__builtin_vpair_f64_add_elements): Likewise.
(__builtin_vpair_i64_add_elements): Likewise.
(__builtin_vpair_i64u_add_elements): Likewise.
* config/rs6000/vector-pair.md (UNSPEC_VPAIR_REDUCE_PLUS_F32): New
unspec.
(UNSPEC_VPAIR_REDUCE_PLUS_F64): Likewise.
(UNSPEC_VPAIR_REDUCE_PLUS_I64): Likewise.
(vpair_reduc_plus_scale_v8sf): New insn.
(vpair_reduc_plus_scale_v4df): Likewise.
(vpair_reduc_plus_scale_v4di): Likewise.
* doc/extend.texi (__builtin_vpair_f32_add_elements): Document.
(__builtin_vpair_f64_add_elements): Likewise.
(__builtin_vpair_i64_add_elements): Likewise.
gcc/testsuite/
* gcc.target/powerpc/vector-pair-16.c: New test.
Diff:
---
gcc/config/rs6000/rs6000-builtins.def | 12 +++
gcc/config/rs6000/vector-pair.md | 93 +++++++++++++++++++++++
gcc/doc/extend.texi | 3 +
gcc/testsuite/gcc.target/powerpc/vector-pair-16.c | 45 +++++++++++
4 files changed, 153 insertions(+)
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index fbd416ceb87..b9a16c01420 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -4145,6 +4145,9 @@
v256 __builtin_vpair_f32_add (v256, v256);
VPAIR_F32_ADD vpair_add_v8sf3 {mma,pair}
+ float __builtin_vpair_f32_add_elements (v256);
+ VPAIR_F32_ADD_ELEMENTS vpair_reduc_plus_scale_v8sf {mma,pair}
+
v256 __builtin_vpair_f32_assemble (vf, vf);
VPAIR_F32_ASSEMBLE vpair_assemble_v8sf {mma,pair}
@@ -4180,6 +4183,9 @@
v256 __builtin_vpair_f64_add (v256, v256);
VPAIR_F64_ADD vpair_add_v4df3 {mma,pair}
+ double __builtin_vpair_f64_add_elements (v256);
+ VPAIR_F64_ADD_ELEMENTS vpair_reduc_plus_scale_v4df {mma,pair}
+
v256 __builtin_vpair_f64_assemble (vd, vd);
VPAIR_F64_ASSEMBLE vpair_assemble_v4df {mma,pair}
@@ -4375,6 +4381,9 @@ v256 __builtin_vpair_f64_assemble (vd, vd);
v256 __builtin_vpair_i64_add (v256, v256);
VPAIR_I64_ADD vpair_add_v4di3 {mma,pair}
+ long long __builtin_vpair_i64_add_elements (v256);
+ VPAIR_I64_ADD_ELEMENTS vpair_reduc_plus_scale_v4di {mma,pair,no32bit}
+
v256 __builtin_vpair_i64_and (v256, v256);
VPAIR_I64_AND vpair_and_v4di3 {mma,pair}
@@ -4408,6 +4417,9 @@ v256 __builtin_vpair_f64_assemble (vd, vd);
v256 __builtin_vpair_i64_xor (v256, v256);
VPAIR_I64_XOR vpair_xor_v4di3 {mma,pair}
+ unsigned long long __builtin_vpair_i64u_add_elements (v256);
+ VPAIR_I64U_ADD_ELEMENTS vpair_reduc_plus_scale_v4di {mma,pair,no32bit}
+
v256 __builtin_vpair_i64u_assemble (vull, vull);
VPAIR_I64U_ASSEMBLE vpair_assemble_v4di {mma,pair}
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index f6d0b2a39fc..b5e9330e71f 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -35,6 +35,9 @@
UNSPEC_VPAIR_V4DI
UNSPEC_VPAIR_ZERO
UNSPEC_VPAIR_SPLAT
+ UNSPEC_VPAIR_REDUCE_PLUS_F32
+ UNSPEC_VPAIR_REDUCE_PLUS_F64
+ UNSPEC_VPAIR_REDUCE_PLUS_I64
])
;; Iterator doing unary/binary arithmetic on vector pairs
@@ -577,6 +580,66 @@
}
[(set_attr "length" "8")])
+\f
+;; Add all elements in a pair of V4SF vectors.
+(define_insn_and_split "vpair_reduc_plus_scale_v8sf"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+ (unspec:SF [(match_operand:OO 1 "vsx_register_operand" "v")]
+ UNSPEC_VPAIR_REDUCE_PLUS_F32))
+ (clobber (match_scratch:V4SF 2 "=&v"))
+ (clobber (match_scratch:V4SF 3 "=&v"))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(pc)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp1 = operands[2];
+ rtx tmp2 = operands[3];
+ unsigned r = reg_or_subregno (op1);
+ rtx op1_hi = gen_rtx_REG (V4SFmode, r);
+ rtx op1_lo = gen_rtx_REG (V4SFmode, r + 1);
+
+ emit_insn (gen_addv4sf3 (tmp1, op1_hi, op1_lo));
+ emit_insn (gen_altivec_vsldoi_v4sf (tmp2, tmp1, tmp1, GEN_INT (8)));
+ emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2));
+ emit_insn (gen_altivec_vsldoi_v4sf (tmp1, tmp2, tmp2, GEN_INT (4)));
+ emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2));
+ emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp2));
+ DONE;
+}
+ [(set_attr "length" "24")])
+
+;; Add all elements in a pair of V2DF vectors
+(define_insn_and_split "vpair_reduc_plus_scale_v4df"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=&wa")
+ (unspec:DF [(match_operand:OO 1 "vsx_register_operand" "wa")]
+ UNSPEC_VPAIR_REDUCE_PLUS_F64))
+ (clobber (match_scratch:DF 2 "=&wa"))
+ (clobber (match_scratch:V2DF 3 "=&wa"))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 3)
+ (plus:V2DF (match_dup 4)
+ (match_dup 5)))
+ (set (match_dup 2)
+ (vec_select:DF (match_dup 3)
+ (parallel [(match_dup 6)])))
+ (set (match_dup 0)
+ (plus:DF (match_dup 7)
+ (match_dup 2)))]
+{
+ unsigned reg1 = reg_or_subregno (operands[1]);
+ unsigned reg3 = reg_or_subregno (operands[3]);
+
+ operands[4] = gen_rtx_REG (V2DFmode, reg1);
+ operands[5] = gen_rtx_REG (V2DFmode, reg1 + 1);
+ operands[6] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 0);
+ operands[7] = gen_rtx_REG (DFmode, reg3);
+})
+
\f
;; Vector pair integer negate support.
(define_insn_and_split "vpair_neg_<vp_pmode>2"
@@ -786,3 +849,33 @@
DONE;
}
[(set_attr "length" "8")])
+
+;; Add all elements in a pair of V2DI vectors
+(define_insn_and_split "vpair_reduc_plus_scale_v4di"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=&r")
+ (unspec:DI [(match_operand:OO 1 "altivec_register_operand" "v")]
+ UNSPEC_VPAIR_REDUCE_PLUS_I64))
+ (clobber (match_scratch:V2DI 2 "=&v"))
+ (clobber (match_scratch:DI 3 "=&r"))]
+ "TARGET_MMA && TARGET_POWERPC64"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 2)
+ (plus:V2DI (match_dup 4)
+ (match_dup 5)))
+ (set (match_dup 3)
+ (vec_select:DI (match_dup 2)
+ (parallel [(const_int 0)])))
+ (set (match_dup 0)
+ (vec_select:DI (match_dup 2)
+ (parallel [(const_int 1)])))
+ (set (match_dup 0)
+ (plus:DI (match_dup 0)
+ (match_dup 3)))]
+{
+ unsigned reg1 = reg_or_subregno (operands[1]);
+
+ operands[4] = gen_rtx_REG (V2DImode, reg1);
+ operands[5] = gen_rtx_REG (V2DImode, reg1 + 1);
+}
+ [(set_attr "length" "16")])
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index ee84a17e3b2..f43c0933e31 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21498,6 +21498,7 @@ The following built-in functions operate on pairs of
@smallexample
__vector_pair __builtin_vpair_f32_abs (__vector_pair);
__vector_pair __builtin_vpair_f32_add (__vector_pair, __vector_pair);
+float __builtin_vpair_f32_add_elements (__vector_pair);
__vector_pair __builtin_vpair_f32_assemble (vector float, vector float);
vector float __builtin_vpair_f32_extract_vector (__vector_pair, int);
__vector_pair __builtin_vpair_f32_fma (__vector_pair, __vector_pair, __vector_pair);
@@ -21515,6 +21516,7 @@ The following built-in functions operate on pairs of
@smallexample
__vector_pair __builtin_vpair_f64_abs (__vector_pair);
__vector_pair __builtin_vpair_f64_add (__vector_pair, __vector_pair);
+double __builtin_vpair_f64_add_elements (__vector_pair);
__vector_pair __builtin_vpair_f64_assemble (vector double, vector double);
vector double __builtin_vpair_f64_extract_vector (__vector_pair, int);
__vector_pair __builtin_vpair_f64_fma (__vector_pair, __vector_pair, __vector_pair);
@@ -21531,6 +21533,7 @@ The following built-in functions operate on pairs of
@smallexample
__vector_pair __builtin_vpair_i64_add (__vector_pair, __vector_pair);
+long long __builtin_vpair_i64_add_elements (__vector_pair);
__vector_pair __builtin_vpair_i64_and (__vector_pair, __vector_pair);
__vector_pair __builtin_vpair_i64_assemble (vector long long,
vector long long);
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-16.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-16.c
new file mode 100644
index 00000000000..a8c206c4093
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-16.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test vector pair built-in functions to do a horizontal add of the
+ elements. */
+
+float
+f32_add_elements (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xvaddsp, 2 vsldoi, 2 xvaddsp, 1 xcvspdp. */
+ return __builtin_vpair_f32_add_elements (*p);
+}
+
+double
+f64_add_elements (__vector_pair *p)
+{
+ /* 1 lxvp, 1 xvadddp, 1 xxperdi, 1 fadd/xxadddp. */
+ return __builtin_vpair_f64_add_elements (*p);
+}
+
+long long
+i64_add_elements (__vector_pair *p)
+{
+ /* 1 lxvp, 1vaddudm, 1 mfvsrld, 1 mfvsrd, 1 add. */
+ return __builtin_vpair_i64_add_elements (*p);
+}
+
+unsigned long long
+i64u_add_elements (__vector_pair *p)
+{
+ /* 1 lxvp, 1vaddudm, 1 mfvsrld, 1 mfvsrd, 1 add. */
+ return __builtin_vpair_i64u_add_elements (*p);
+}
+
+/* { dg-final { scan-assembler-times {\mfadd\M|\mxsadddp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mmfvsrd\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mmfvsrld\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvaddudm\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvsldoi\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxscvspdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvadddp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvaddsp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-11-17 20:45 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-17 20:45 [gcc(refs/users/meissner/heads/work146-vpair)] Add support for doing a horizontal add on vector pair elements Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).