[gcc(refs/users/meissner/heads/work161-vpair)] Add vector pair optimizations.

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/users/meissner/heads/work161-vpair)] Add vector pair optimizations.
@ 2024-02-29 20:31 Michael Meissner
  0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2024-02-29 20:31 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:c8384bfe4fe45667cb07d5c2c785ca1df6f83782

commit c8384bfe4fe45667cb07d5c2c785ca1df6f83782
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Feb 29 15:30:38 2024 -0500

    Add vector pair optimizations.
    
    2024-02-29  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vector-pair.md (vpair_add_neg_<vpair_modename>3): New
            combiner insn to convert vector plus/neg into a minus operation.
            (vpair_fma_<vpair_modename>_merge): Optimize multiply, add/subtract, and
            negation into fma operations if the user specifies to create fmas.
            (vpair_fma_<vpair_modename>_merge): Likewise.
            (vpair_fma_<vpair_modename>_merge2): Likewise.
            (vpair_nfma_<vpair_modename>_merge): Likewise.
            (vpair_nfms_<vpair_modename>_merge): Likewise.
            (vpair_nfms_<vpair_modename>_merge2): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vector-pair-7.c: New test.
            * gcc.target/powerpc/vector-pair-8.c: Likewise.
            * gcc.target/powerpc/vector-pair-9.c: Likewise.
            * gcc.target/powerpc/vector-pair-10.c: Likewise.
            * gcc.target/powerpc/vector-pair-11.c: Likewise.
            * gcc.target/powerpc/vector-pair-12xs.c: Likewise.

Diff:
---
 gcc/config/rs6000/vector-pair.md                  | 224 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/vector-pair-10.c |  61 ++++++
 gcc/testsuite/gcc.target/powerpc/vector-pair-11.c |  65 +++++++
 gcc/testsuite/gcc.target/powerpc/vector-pair-12.c |  65 +++++++
 gcc/testsuite/gcc.target/powerpc/vector-pair-7.c  |  18 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-8.c  |  18 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-9.c  |  61 ++++++
 7 files changed, 512 insertions(+)

diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index 39b419c6814..7a81acbdc05 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -261,6 +261,31 @@
    (set (attr "type") (if_then_else (match_test "<VPAIR_OP> == DIV")
 				    (const_string "<vpair_divtype>")
 				    (const_string "<vpair_type>")))])
+
+;; Optimize vector pair add of a negative value into a subtract.
+(define_insn_and_split "*vpair_add_neg_<vpair_modename>3"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+	(unspec:OO
+	 [(match_operand:OO 1 "vsx_register_operand" "wa")
+	  (unspec:OO
+	   [(match_operand:OO 2 "vsx_register_operand" "wa")
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_NEG)
+	  (const_int VPAIR_FP_ELEMENT)]
+	 VPAIR_FP_BINARY))]
+  "TARGET_MMA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:OO
+	 [(match_dup 1)
+	  (match_dup 2)
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_MINUS))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vpair_type>")])
 \f
 ;; Vector pair fused-multiply (FMA) operations.  The last argument in the
 ;; UNSPEC is a CONST_INT which identifies what the scalar element is.
@@ -354,3 +379,202 @@
 }
   [(set_attr "length" "8")
    (set_attr "type" "<vpair_type>")])
+\f
+;; Optimize vector pair multiply and vector pair add into vector pair fma,
+;; providing the compiler would do this optimization for scalar and vectors.
+;; Unlike most of the define_insn_and_splits, this can be done before register
+;; allocation.
+(define_insn_and_split "*vpair_fma_<vpair_modename>_merge"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+	(unspec:OO
+	 [(unspec:OO
+	   [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+	    (match_operand:OO 2 "vsx_register_operand" "wa,0")
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_MULT)
+	  (match_operand:OO 3 "vsx_register_operand" "0,wa")
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_PLUS))]
+  "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:OO
+	 [(match_dup 1)
+	  (match_dup 2)
+	  (match_dup 3)
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_FMA))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vpair_type>")])
+
+;; Merge multiply and subtract.
+(define_insn_and_split "*vpair_fma_<vpair_modename>_merge"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+	(unspec:OO
+	 [(unspec:OO
+	   [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+	    (match_operand:OO 2 "vsx_register_operand" "wa,0")
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_MULT)
+	  (match_operand:OO 3 "vsx_register_operand" "0,wa")
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_MINUS))]
+  "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:OO
+	 [(match_dup 1)
+	  (match_dup 2)
+	  (unspec:OO
+	   [(match_dup 3)
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_NEG)
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_FMA))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vpair_type>")])
+
+(define_insn_and_split "*vpair_fma_<vpair_modename>_merge2"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+	(unspec:OO
+	 [(unspec:OO
+	   [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+	    (match_operand:OO 2 "vsx_register_operand" "wa,0")
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_MULT)
+	  (unspec:OO
+	   [(match_operand:OO 3 "vsx_register_operand" "0,wa")
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_NEG)
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_PLUS))]
+  "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:OO
+	 [(match_dup 1)
+	  (match_dup 2)
+	  (unspec:OO
+	   [(match_dup 3)
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_NEG)
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_FMA))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vpair_type>")])
+
+;; Merge negate, multiply, and add.
+(define_insn_and_split "*vpair_nfma_<vpair_modename>_merge"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+	(unspec:OO
+	 [(unspec:OO
+	   [(unspec:OO
+	     [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+	      (match_operand:OO 2 "vsx_register_operand" "wa,0")
+	      (const_int VPAIR_FP_ELEMENT)]
+	     UNSPEC_VPAIR_MULT)
+	    (match_operand:OO 3 "vsx_register_operand" "0,wa")
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_PLUS)
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_NEG))]
+  "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:OO
+	 [(unspec:OO
+	   [(match_dup 1)
+	    (match_dup 2)
+	    (match_dup 3)
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_FMA)
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_NEG))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vpair_type>")])
+
+;; Merge negate, multiply, and subtract.
+(define_insn_and_split "*vpair_nfms_<vpair_modename>_merge"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+	(unspec:OO
+	 [(unspec:OO
+	   [(unspec:OO
+	     [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+	      (match_operand:OO 2 "vsx_register_operand" "wa,0")
+	      (const_int VPAIR_FP_ELEMENT)]
+	     UNSPEC_VPAIR_MULT)
+	    (match_operand:OO 3 "vsx_register_operand" "0,wa")
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_MINUS)
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_NEG))]
+  "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:OO
+	 [(unspec:OO
+	   [(match_dup 1)
+	    (match_dup 2)
+	    (unspec:OO
+	     [(match_dup 3)
+	      (const_int VPAIR_FP_ELEMENT)]
+	     UNSPEC_VPAIR_NEG)
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_FMA)
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_NEG))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vpair_type>")])
+
+(define_insn_and_split "*vpair_nfms_<vpair_modename>_merge2"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+	(unspec:OO
+	 [(unspec:OO
+	   [(unspec:OO
+	     [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+	      (match_operand:OO 2 "vsx_register_operand" "wa,0")
+	      (const_int VPAIR_FP_ELEMENT)]
+	     UNSPEC_VPAIR_MULT)
+	    (unspec:OO
+	     [(match_operand:OO 3 "vsx_register_operand" "0,wa")
+	      (const_int VPAIR_FP_ELEMENT)]
+	     UNSPEC_VPAIR_NEG)
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_PLUS)
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_NEG))]
+  "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:OO
+	 [(unspec:OO
+	   [(match_dup 1)
+	    (match_dup 2)
+	    (unspec:OO
+	     [(match_dup 3)
+	      (const_int VPAIR_FP_ELEMENT)]
+	     UNSPEC_VPAIR_NEG)
+	    (const_int VPAIR_FP_ELEMENT)]
+	   UNSPEC_VPAIR_FMA)
+	  (const_int VPAIR_FP_ELEMENT)]
+	 UNSPEC_VPAIR_NEG))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vpair_type>")])
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c
new file mode 100644
index 00000000000..d2ee4dd0dd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -Ofast -ffp-contract=fast" } */
+
+/* Test whether the vector builtin code merges multiply, add/subtract, and
+   negate into fma operations.  */
+
+void
+test_fma (__vector_pair *p,
+	  __vector_pair *q,
+	  __vector_pair *r,
+	  __vector_pair *s)
+{
+  /* lxvp, 2 xvmadd{a,m}sp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+  *p = __builtin_vpair_f32_add (mul, *s);
+}
+
+void
+test_fms (__vector_pair *p,
+	  __vector_pair *q,
+	  __vector_pair *r,
+	  __vector_pair *s)
+{
+  /* lxvp, 2 xvmsub{a,m}sp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+  __vector_pair neg = __builtin_vpair_f32_neg (*s);
+  *p = __builtin_vpair_f32_add (mul, neg);
+}
+
+void
+test_nfma (__vector_pair *p,
+	   __vector_pair *q,
+	   __vector_pair *r,
+	   __vector_pair *s)
+{
+  /* lxvp, 2 xvnmadd{a,m}sp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+  __vector_pair muladd = __builtin_vpair_f32_add (mul, *s);
+  *p = __builtin_vpair_f32_neg (muladd);
+}
+
+void
+test_nfms (__vector_pair *p,
+	   __vector_pair *q,
+	   __vector_pair *r,
+	   __vector_pair *s)
+{
+  /* lxvp, 2 xvnmsub{a,m}sp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+  __vector_pair neg = __builtin_vpair_f32_neg (*s);
+  __vector_pair muladd = __builtin_vpair_f32_add (mul, neg);
+  *p = __builtin_vpair_f32_neg (muladd);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}       12 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}       4 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd.sp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub.sp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd.sp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub.sp\M}  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c
new file mode 100644
index 00000000000..e635b599aed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c
@@ -0,0 +1,65 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffp-contract=off" } */
+
+/* Test whether the vector builtin code do not merge multiply, add/subtract,
+   and negate into fma operations if -ffp-contract is off.  */
+
+void
+test_fma (__vector_pair *p,
+	  __vector_pair *q,
+	  __vector_pair *r,
+	  __vector_pair *s)
+{
+  /* lxvp, 2 xvmuldp, 2 xvadddp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+  *p = __builtin_vpair_f64_add (mul, *s);
+}
+
+void
+test_fms (__vector_pair *p,
+	  __vector_pair *q,
+	  __vector_pair *r,
+	  __vector_pair *s)
+{
+  /* lxvp, 2 xvmuldp, 2 xvsubdp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+  __vector_pair neg = __builtin_vpair_f64_neg (*s);
+  *p = __builtin_vpair_f64_add (mul, neg);
+}
+
+void
+test_nfma (__vector_pair *p,
+	   __vector_pair *q,
+	   __vector_pair *r,
+	   __vector_pair *s)
+{
+  /* lxvp, 2 xvmuldp, 2 xvadddp, 2 xvnegdp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+  __vector_pair muladd = __builtin_vpair_f64_add (mul, *s);
+  *p = __builtin_vpair_f64_neg (muladd);
+}
+
+void
+test_nfms (__vector_pair *p,
+	   __vector_pair *q,
+	   __vector_pair *r,
+	   __vector_pair *s)
+{
+  /* lxvp, 2 xvmuldp, 2 xvsubdp, 2 xvnegdp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+  __vector_pair neg = __builtin_vpair_f64_neg (*s);
+  __vector_pair muladd = __builtin_vpair_f64_add (mul, neg);
+  *p = __builtin_vpair_f64_neg (muladd);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}       12 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}       4 } } */
+/* { dg-final { scan-assembler-times {\mxvadddp\M}     4 } } */
+/* { dg-final { scan-assembler-times {\mxvmuldp\M}     8 } } */
+/* { dg-final { scan-assembler-times {\mxvnegdp\M}     4 } } */
+/* { dg-final { scan-assembler-times {\mxvsubdp\M}     4 } } */
+/* { dg-final { scan-assembler-not   {\mxvmadd.dp\M}     } } */
+/* { dg-final { scan-assembler-not   {\mxvmsub.dp\M}     } } */
+/* { dg-final { scan-assembler-not   {\mxvnmadd.dp\M}    } } */
+/* { dg-final { scan-assembler-not   {\mxvnmsub.dp\M}    } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c
new file mode 100644
index 00000000000..4997279473e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c
@@ -0,0 +1,65 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffp-contract=off" } */
+
+/* Test whether the vector builtin code do not merge multiply, add/subtract,
+   and negate into fma operations if -ffp-contract is off.  */
+
+void
+test_fma (__vector_pair *p,
+	  __vector_pair *q,
+	  __vector_pair *r,
+	  __vector_pair *s)
+{
+  /* lxvp, 2 xvmulsp, 2 xvaddsp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+  *p = __builtin_vpair_f32_add (mul, *s);
+}
+
+void
+test_fms (__vector_pair *p,
+	  __vector_pair *q,
+	  __vector_pair *r,
+	  __vector_pair *s)
+{
+  /* lxvp, 2 xvmulsp, 2 xvsubsp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+  __vector_pair neg = __builtin_vpair_f32_neg (*s);
+  *p = __builtin_vpair_f32_add (mul, neg);
+}
+
+void
+test_nfma (__vector_pair *p,
+	   __vector_pair *q,
+	   __vector_pair *r,
+	   __vector_pair *s)
+{
+  /* lxvp, 2 xvmulsp, 2 xvaddsp, 2 xvnegsp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+  __vector_pair muladd = __builtin_vpair_f32_add (mul, *s);
+  *p = __builtin_vpair_f32_neg (muladd);
+}
+
+void
+test_nfms (__vector_pair *p,
+	   __vector_pair *q,
+	   __vector_pair *r,
+	   __vector_pair *s)
+{
+  /* lxvp, 2 xvmulsp, 2 xvsubsp, 2 xvnegsp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f32_mul (*q, *r);
+  __vector_pair neg = __builtin_vpair_f32_neg (*s);
+  __vector_pair muladd = __builtin_vpair_f32_add (mul, neg);
+  *p = __builtin_vpair_f32_neg (muladd);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}       12 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}       4 } } */
+/* { dg-final { scan-assembler-times {\mxvaddsp\M}     4 } } */
+/* { dg-final { scan-assembler-times {\mxvmulsp\M}     8 } } */
+/* { dg-final { scan-assembler-times {\mxvnegsp\M}     4 } } */
+/* { dg-final { scan-assembler-times {\mxvsubsp\M}     4 } } */
+/* { dg-final { scan-assembler-not   {\mxvmadd.sp\M}     } } */
+/* { dg-final { scan-assembler-not   {\mxvmsub.sp\M}     } } */
+/* { dg-final { scan-assembler-not   {\mxvnmadd.sp\M}    } } */
+/* { dg-final { scan-assembler-not   {\mxvnmsub.sp\M}    } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-7.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-7.c
new file mode 100644
index 00000000000..51a400cb4b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-7.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code merges plus and neg into a minus
+   operation.  */
+
+void
+test_minus (__vector_pair *p, __vector_pair *q, __vector_pair *r)
+{
+  *p = __builtin_vpair_f64_add (*q, __builtin_vpair_f64_neg (*r));
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}    1 } } */
+/* { dg-final { scan-assembler-times {\mxvsubdp\M}  2 } } */
+/* { dg-final { scan-assembler-not   {\mxvadddp\M}    } } */
+/* { dg-final { scan-assembler-not   {\mxvnegdp\M}    } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-8.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-8.c
new file mode 100644
index 00000000000..67957e3bdea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-8.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code merges plus and neg into a minus
+   operation.  */
+
+void
+test_minus (__vector_pair *p, __vector_pair *q, __vector_pair *r)
+{
+  *p = __builtin_vpair_f32_add (*q, __builtin_vpair_f32_neg (*r));
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}    1 } } */
+/* { dg-final { scan-assembler-times {\mxvsubsp\M}  2 } } */
+/* { dg-final { scan-assembler-not   {\mxvaddsp\M}    } } */
+/* { dg-final { scan-assembler-not   {\mxvnegsp\M}    } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c
new file mode 100644
index 00000000000..eacf8dae9d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -Ofast -ffp-contract=fast" } */
+
+/* Test whether the vector builtin code merges multiply, add/subtract, and
+   negate into fma operations.  */
+
+void
+test_fma (__vector_pair *p,
+	  __vector_pair *q,
+	  __vector_pair *r,
+	  __vector_pair *s)
+{
+  /* lxvp, 2 xvmadd{a,m}dp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+  *p = __builtin_vpair_f64_add (mul, *s);
+}
+
+void
+test_fms (__vector_pair *p,
+	  __vector_pair *q,
+	  __vector_pair *r,
+	  __vector_pair *s)
+{
+  /* lxvp, 2 xvmsub{a,m}dp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+  __vector_pair neg = __builtin_vpair_f64_neg (*s);
+  *p = __builtin_vpair_f64_add (mul, neg);
+}
+
+void
+test_nfma (__vector_pair *p,
+	   __vector_pair *q,
+	   __vector_pair *r,
+	   __vector_pair *s)
+{
+  /* lxvp, 2 xvnmadd{a,m}dp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+  __vector_pair muladd = __builtin_vpair_f64_add (mul, *s);
+  *p = __builtin_vpair_f64_neg (muladd);
+}
+
+void
+test_nfms (__vector_pair *p,
+	   __vector_pair *q,
+	   __vector_pair *r,
+	   __vector_pair *s)
+{
+  /* lxvp, 2 xvnmsub{a,m}dp, stxvp.  */
+  __vector_pair mul = __builtin_vpair_f64_mul (*q, *r);
+  __vector_pair neg = __builtin_vpair_f64_neg (*s);
+  __vector_pair muladd = __builtin_vpair_f64_add (mul, neg);
+  *p = __builtin_vpair_f64_neg (muladd);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}       12 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}       4 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd.dp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub.dp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd.dp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub.dp\M}  2 } } */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-02-29 20:31 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-29 20:31 [gcc(refs/users/meissner/heads/work161-vpair)] Add vector pair optimizations Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).