public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work161-vpair)] Add vector pair optimizations.
@ 2024-02-29 20:31 Michael Meissner
0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2024-02-29 20:31 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:c8384bfe4fe45667cb07d5c2c785ca1df6f83782
commit c8384bfe4fe45667cb07d5c2c785ca1df6f83782
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Feb 29 15:30:38 2024 -0500
Add vector pair optimizations.
2024-02-29 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/vector-pair.md (vpair_add_neg_<vpair_modename>3): New
combiner insn to convert vector plus/neg into a minus operation.
(vpair_fma_<vpair_modename>_merge): Optimize multiply, add/subtract, and
negation into fma operations if the user specifies to create fmas.
(vpair_fma_<vpair_modename>_merge): Likewise.
(vpair_fma_<vpair_modename>_merge2): Likewise.
(vpair_nfma_<vpair_modename>_merge): Likewise.
(vpair_nfms_<vpair_modename>_merge): Likewise.
(vpair_nfms_<vpair_modename>_merge2): Likewise.
gcc/testsuite/
* gcc.target/powerpc/vector-pair-7.c: New test.
* gcc.target/powerpc/vector-pair-8.c: Likewise.
* gcc.target/powerpc/vector-pair-9.c: Likewise.
* gcc.target/powerpc/vector-pair-10.c: Likewise.
* gcc.target/powerpc/vector-pair-11.c: Likewise.
* gcc.target/powerpc/vector-pair-12xs.c: Likewise.
Diff:
---
gcc/config/rs6000/vector-pair.md | 224 ++++++++++++++++++++++
gcc/testsuite/gcc.target/powerpc/vector-pair-10.c | 61 ++++++
gcc/testsuite/gcc.target/powerpc/vector-pair-11.c | 65 +++++++
gcc/testsuite/gcc.target/powerpc/vector-pair-12.c | 65 +++++++
gcc/testsuite/gcc.target/powerpc/vector-pair-7.c | 18 ++
gcc/testsuite/gcc.target/powerpc/vector-pair-8.c | 18 ++
gcc/testsuite/gcc.target/powerpc/vector-pair-9.c | 61 ++++++
7 files changed, 512 insertions(+)
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index 39b419c6814..7a81acbdc05 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -261,6 +261,31 @@
(set (attr "type") (if_then_else (match_test "<VPAIR_OP> == DIV")
(const_string "<vpair_divtype>")
(const_string "<vpair_type>")))])
+
+;; Optimize vector pair add of a negative value into a subtract.
+(define_insn_and_split "*vpair_add_neg_<vpair_modename>3"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+ (unspec:OO
+ [(match_operand:OO 1 "vsx_register_operand" "wa")
+ (unspec:OO
+ [(match_operand:OO 2 "vsx_register_operand" "wa")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+ VPAIR_FP_BINARY))]
+ "TARGET_MMA"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:OO
+ [(match_dup 1)
+ (match_dup 2)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_MINUS))]
+{
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "<vpair_type>")])
\f
;; Vector pair fused-multiply (FMA) operations. The last argument in the
;; UNSPEC is a CONST_INT which identifies what the scalar element is.
@@ -354,3 +379,202 @@
}
[(set_attr "length" "8")
(set_attr "type" "<vpair_type>")])
+\f
+;; Optimize vector pair multiply and vector pair add into vector pair fma,
+;; providing the compiler would do this optimization for scalar and vectors.
+;; Unlike most of the define_insn_and_splits, this can be done before register
+;; allocation.
+(define_insn_and_split "*vpair_fma_<vpair_modename>_merge"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+ (unspec:OO
+ [(unspec:OO
+ [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa,0")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_MULT)
+ (match_operand:OO 3 "vsx_register_operand" "0,wa")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_PLUS))]
+ "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:OO
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_FMA))]
+{
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "<vpair_type>")])
+
+;; Merge multiply and subtract.
+(define_insn_and_split "*vpair_fma_<vpair_modename>_merge"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+ (unspec:OO
+ [(unspec:OO
+ [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa,0")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_MULT)
+ (match_operand:OO 3 "vsx_register_operand" "0,wa")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_MINUS))]
+ "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:OO
+ [(match_dup 1)
+ (match_dup 2)
+ (unspec:OO
+ [(match_dup 3)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_FMA))]
+{
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "<vpair_type>")])
+
+(define_insn_and_split "*vpair_fma_<vpair_modename>_merge2"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+ (unspec:OO
+ [(unspec:OO
+ [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa,0")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_MULT)
+ (unspec:OO
+ [(match_operand:OO 3 "vsx_register_operand" "0,wa")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_PLUS))]
+ "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:OO
+ [(match_dup 1)
+ (match_dup 2)
+ (unspec:OO
+ [(match_dup 3)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_FMA))]
+{
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "<vpair_type>")])
+
+;; Merge negate, multiply, and add.
+(define_insn_and_split "*vpair_nfma_<vpair_modename>_merge"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+ (unspec:OO
+ [(unspec:OO
+ [(unspec:OO
+ [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa,0")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_MULT)
+ (match_operand:OO 3 "vsx_register_operand" "0,wa")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_PLUS)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG))]
+ "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:OO
+ [(unspec:OO
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_FMA)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG))]
+{
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "<vpair_type>")])
+
+;; Merge negate, multiply, and subtract.
+(define_insn_and_split "*vpair_nfms_<vpair_modename>_merge"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+ (unspec:OO
+ [(unspec:OO
+ [(unspec:OO
+ [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa,0")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_MULT)
+ (match_operand:OO 3 "vsx_register_operand" "0,wa")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_MINUS)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG))]
+ "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:OO
+ [(unspec:OO
+ [(match_dup 1)
+ (match_dup 2)
+ (unspec:OO
+ [(match_dup 3)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_FMA)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG))]
+{
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "<vpair_type>")])
+
+(define_insn_and_split "*vpair_nfms_<vpair_modename>_merge2"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+ (unspec:OO
+ [(unspec:OO
+ [(unspec:OO
+ [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa,0")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_MULT)
+ (unspec:OO
+ [(match_operand:OO 3 "vsx_register_operand" "0,wa")
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_PLUS)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG))]
+ "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:OO
+ [(unspec:OO
+ [(match_dup 1)
+ (match_dup 2)
+ (unspec:OO
+ [(match_dup 3)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_FMA)
+ (const_int VPAIR_FP_ELEMENT)]
+ UNSPEC_VPAIR_NEG))]
+{
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "<vpair_type>")])
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c
new file mode 100644
index 00000000000..d2ee4dd0dd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -Ofast -ffp-contract=fast" } */
+
+/* Test whether the vector builtin code merges multiply, add/subtract, and
+ negate into fma operations. */
+
+void
+test_fma (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvmadd{a,m}sp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+ *p = __builtin_vpair_f32_add (mul, *s);
+}
+
+void
+test_fms (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvmsub{a,m}sp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+ __vector_pair neg = __builtin_vpair_f32_neg (*s);
+ *p = __builtin_vpair_f32_add (mul, neg);
+}
+
+void
+test_nfma (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvnmadd{a,m}sp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+ __vector_pair muladd = __builtin_vpair_f32_add (mul, *s);
+ *p = __builtin_vpair_f32_neg (muladd);
+}
+
+void
+test_nfms (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvnmsub{a,m}sp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+ __vector_pair neg = __builtin_vpair_f32_neg (*s);
+ __vector_pair muladd = __builtin_vpair_f32_add (mul, neg);
+ *p = __builtin_vpair_f32_neg (muladd);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd.sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub.sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd.sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub.sp\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c
new file mode 100644
index 00000000000..e635b599aed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c
@@ -0,0 +1,65 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffp-contract=off" } */
+
+/* Test whether the vector builtin code do not merge multiply, add/subtract,
+ and negate into fma operations if -ffp-contract is off. */
+
+void
+test_fma (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvmuldp, 2 xvadddp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+ *p = __builtin_vpair_f64_add (mul, *s);
+}
+
+void
+test_fms (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvmuldp, 2 xvsubdp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+ __vector_pair neg = __builtin_vpair_f64_neg (*s);
+ *p = __builtin_vpair_f64_add (mul, neg);
+}
+
+void
+test_nfma (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvmuldp, 2 xvadddp, 2 xvnegdp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+ __vector_pair muladd = __builtin_vpair_f64_add (mul, *s);
+ *p = __builtin_vpair_f64_neg (muladd);
+}
+
+void
+test_nfms (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvmuldp, 2 xvsubdp, 2 xvnegdp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+ __vector_pair neg = __builtin_vpair_f64_neg (*s);
+ __vector_pair muladd = __builtin_vpair_f64_add (mul, neg);
+ *p = __builtin_vpair_f64_neg (muladd);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxvadddp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxvmuldp\M} 8 } } */
+/* { dg-final { scan-assembler-times {\mxvnegdp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxvsubdp\M} 4 } } */
+/* { dg-final { scan-assembler-not {\mxvmadd.dp\M} } } */
+/* { dg-final { scan-assembler-not {\mxvmsub.dp\M} } } */
+/* { dg-final { scan-assembler-not {\mxvnmadd.dp\M} } } */
+/* { dg-final { scan-assembler-not {\mxvnmsub.dp\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c
new file mode 100644
index 00000000000..4997279473e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c
@@ -0,0 +1,65 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffp-contract=off" } */
+
+/* Test whether the vector builtin code do not merge multiply, add/subtract,
+ and negate into fma operations if -ffp-contract is off. */
+
+void
+test_fma (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvmulsp, 2 xvaddsp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+ *p = __builtin_vpair_f32_add (mul, *s);
+}
+
+void
+test_fms (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvmulsp, 2 xvsubsp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+ __vector_pair neg = __builtin_vpair_f32_neg (*s);
+ *p = __builtin_vpair_f32_add (mul, neg);
+}
+
+void
+test_nfma (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvmulsp, 2 xvaddsp, 2 xvnegsp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+ __vector_pair muladd = __builtin_vpair_f32_add (mul, *s);
+ *p = __builtin_vpair_f32_neg (muladd);
+}
+
+void
+test_nfms (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvmulsp, 2 xvsubsp, 2 xvnegsp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+ __vector_pair neg = __builtin_vpair_f32_neg (*s);
+ __vector_pair muladd = __builtin_vpair_f32_add (mul, neg);
+ *p = __builtin_vpair_f32_neg (muladd);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxvaddsp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxvmulsp\M} 8 } } */
+/* { dg-final { scan-assembler-times {\mxvnegsp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxvsubsp\M} 4 } } */
+/* { dg-final { scan-assembler-not {\mxvmadd.sp\M} } } */
+/* { dg-final { scan-assembler-not {\mxvmsub.sp\M} } } */
+/* { dg-final { scan-assembler-not {\mxvnmadd.sp\M} } } */
+/* { dg-final { scan-assembler-not {\mxvnmsub.sp\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-7.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-7.c
new file mode 100644
index 00000000000..51a400cb4b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-7.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code merges plus and neg into a minus
+ operation. */
+
+void
+test_minus (__vector_pair *p, __vector_pair *q, __vector_pair *r)
+{
+ *p = __builtin_vpair_f64_add (*q, __builtin_vpair_f64_neg (*r));
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvsubdp\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mxvadddp\M} } } */
+/* { dg-final { scan-assembler-not {\mxvnegdp\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-8.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-8.c
new file mode 100644
index 00000000000..67957e3bdea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-8.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code merges plus and neg into a minus
+ operation. */
+
+void
+test_minus (__vector_pair *p, __vector_pair *q, __vector_pair *r)
+{
+ *p = __builtin_vpair_f32_add (*q, __builtin_vpair_f32_neg (*r));
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvsubsp\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mxvaddsp\M} } } */
+/* { dg-final { scan-assembler-not {\mxvnegsp\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c
new file mode 100644
index 00000000000..eacf8dae9d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -Ofast -ffp-contract=fast" } */
+
+/* Test whether the vector builtin code merges multiply, add/subtract, and
+ negate into fma operations. */
+
+void
+test_fma (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvmadd{a,m}dp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+ *p = __builtin_vpair_f64_add (mul, *s);
+}
+
+void
+test_fms (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvmsub{a,m}dp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+ __vector_pair neg = __builtin_vpair_f64_neg (*s);
+ *p = __builtin_vpair_f64_add (mul, neg);
+}
+
+void
+test_nfma (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvnmadd{a,m}dp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+ __vector_pair muladd = __builtin_vpair_f64_add (mul, *s);
+ *p = __builtin_vpair_f64_neg (muladd);
+}
+
+void
+test_nfms (__vector_pair *p,
+ __vector_pair *q,
+ __vector_pair *r,
+ __vector_pair *s)
+{
+ /* lxvp, 2 xvnmsub{a,m}dp, stxvp. */
+ __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+ __vector_pair neg = __builtin_vpair_f64_neg (*s);
+ __vector_pair muladd = __builtin_vpair_f64_add (mul, neg);
+ *p = __builtin_vpair_f64_neg (muladd);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd.dp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub.dp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd.dp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub.dp\M} 2 } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2024-02-29 20:31 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-29 20:31 [gcc(refs/users/meissner/heads/work161-vpair)] Add vector pair optimizations Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).