From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 081B8385840E; Tue, 23 Jan 2024 07:10:43 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 081B8385840E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1705993844; bh=XS6DAgRAV5Qo5JhlyxnhGeeRwHI5G9jHLGkvnz0UUvk=; h=From:To:Subject:Date:From; b=PJnrjaSQnV9Oup0Cj0sG6gXleATRL9aihOd3SAdv4jV5DdK7edzkUkA3lrtAtY0Bm LTxncrEHZDFIrqTS/mkCMIFQuJa2glVRXdGJ3xJVrD0A7Quhv20kuM6l3asv3w2a4M z2GnZw0z1hDIVv/565juAV8dxWvETJaFh4DPBEDQ= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work154-vpair)] Add support for vector pair unary and binary operations. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work154-vpair X-Git-Oldrev: bb9c92ae679abea62fdc29372b374d2711e008a3 X-Git-Newrev: 656c1538a142c9ddbf2e836dde7609569602e15a Message-Id: <20240123071044.081B8385840E@sourceware.org> Date: Tue, 23 Jan 2024 07:10:43 +0000 (GMT) List-Id: https://gcc.gnu.org/g:656c1538a142c9ddbf2e836dde7609569602e15a commit 656c1538a142c9ddbf2e836dde7609569602e15a Author: Michael Meissner Date: Tue Jan 23 02:08:51 2024 -0500 Add support for vector pair unary and binary operations. 2024-01-23 Michael Meissner gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vpair_*): Add new built-in functions for vector pair support. * config/rs6000/rs6000-protos.h (enum vpair_split_unary): New enumeration. (vpair_split_unary): New declaration. (vpair_split_binary): Likewise. * config/rs6000/rs6000.cc (vpair_split_unary): New function to split vector pair operations. (vpair_split_binary): Likewise. * config/rs6000/rs6000.md (toplevel): Include vector-pair.md. * config/rs6000/t-rs6000 (MD_INCLUDES): Add vector-pair.md. * config/rs6000/vector-pair.md: New file. * doc/extend.texi (PowerPC Vector Pair Built-in Functions): Add documentation for the new vector pair built-in functions. gcc/testsuite/ * gcc.target/powerpc/vector-pair-1.c: New test. * gcc.target/powerpc/vector-pair-2.c: Likewise. Diff: --- gcc/config/rs6000/rs6000-builtins.def | 56 ++++++++ gcc/config/rs6000/rs6000-protos.h | 12 ++ gcc/config/rs6000/rs6000.cc | 67 ++++++++++ gcc/config/rs6000/rs6000.md | 1 + gcc/config/rs6000/t-rs6000 | 1 + gcc/config/rs6000/vector-pair.md | 160 +++++++++++++++++++++++ gcc/doc/extend.texi | 51 ++++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-1.c | 87 ++++++++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-2.c | 86 ++++++++++++ 9 files changed, 521 insertions(+) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 3bc7fed6956..83e7206e989 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -4131,3 +4131,59 @@ void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); STXVP nothing {mma,pair} + +;; Vector pair built-in functions with float elements + v256 __builtin_vpair_f32_abs (v256); + VPAIR_F32_ABS vpair_abs_v8sf2 {mma} + + v256 __builtin_vpair_f32_add (v256, v256); + VPAIR_F32_ADD vpair_add_v8sf3 {mma} + + v256 __builtin_vpair_f32_div (v256, v256); + VPAIR_F32_DIV vpair_div_v8sf3 {mma} + + v256 __builtin_vpair_f32_max (v256, v256); + VPAIR_F32_MAX vpair_smax_v8sf3 {mma} + + v256 __builtin_vpair_f32_min (v256, v256); + VPAIR_F32_MIN vpair_smin_v8sf3 {mma} + + v256 __builtin_vpair_f32_mul (v256, v256); + VPAIR_F32_MUL vpair_mul_v8sf3 {mma} + + v256 __builtin_vpair_f32_nabs (v256); + VPAIR_F32_NABS vpair_nabs_v8sf2 {mma} + + v256 __builtin_vpair_f32_neg (v256); + VPAIR_F32_NEG vpair_neg_v8sf2 {mma} + + v256 __builtin_vpair_f32_sub (v256, v256); + VPAIR_F32_SUB vpair_sub_v8sf3 {mma} + +;; Vector pair built-in functions with double elements + v256 __builtin_vpair_f64_abs (v256); + VPAIR_F64_ABS vpair_abs_v4df2 {mma} + + v256 __builtin_vpair_f64_add (v256, v256); + VPAIR_F64_ADD vpair_add_v4df3 {mma} + + v256 __builtin_vpair_f64_div (v256, v256); + VPAIR_F64_DIV vpair_div_v4df3 {mma} + + v256 __builtin_vpair_f64_max (v256, v256); + VPAIR_F64_MAX vpair_smax_v4df3 {mma} + + v256 __builtin_vpair_f64_min (v256, v256); + VPAIR_F64_MIN vpair_smin_v4df3 {mma} + + v256 __builtin_vpair_f64_mul (v256, v256); + VPAIR_F64_MUL vpair_mul_v4df3 {mma} + + v256 __builtin_vpair_f64_nabs (v256); + VPAIR_F64_NABS vpair_nabs_v4df2 {mma} + + v256 __builtin_vpair_f64_neg (v256); + VPAIR_F64_NEG vpair_neg_v4df2 {mma} + + v256 __builtin_vpair_f64_sub (v256, v256); + VPAIR_F64_SUB vpair_sub_v4df3 {mma} diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 09a57a806fa..4d6ecc83436 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -162,6 +162,18 @@ extern bool rs6000_pcrel_p (void); extern bool rs6000_fndecl_pcrel_p (const_tree); extern void rs6000_output_addr_vec_elt (FILE *, int); +/* If we are splitting a vector pair unary operator into two separate vector + operations, we need to generate a NEG if this is NABS. */ + +enum vpair_split_unary { + VPAIR_SPLIT_NORMAL, /* No extra processing is needed. */ + VPAIR_SPLIT_NEGATE /* Wrap operation with a NEG. */ +}; + +extern void vpair_split_unary (rtx [], machine_mode, enum rtx_code, + enum vpair_split_unary); +extern void vpair_split_binary (rtx [], machine_mode, enum rtx_code); + /* Different PowerPC instruction formats that are used by GCC. There are various other instruction formats used by the PowerPC hardware, but these formats are not currently used by GCC. */ diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 68a14c6f88a..e15669b72fb 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -29388,7 +29388,74 @@ rs6000_opaque_type_invalid_use_p (gimple *stmt) return false; } + +/* Split vector pair unary operations. */ + +void +vpair_split_unary (rtx operands[], /* Dest, input. */ + machine_mode vmode, /* Vector mode. */ + enum rtx_code code, /* Operator code. */ + enum vpair_split_unary action) /* Action to take. */ +{ + rtx op0 = operands[0]; + machine_mode mode0 = GET_MODE (op0); + gcc_assert (GET_MODE_SIZE (mode0) == 32); + rtx op0_a = simplify_gen_subreg (vmode, op0, mode0, 0); + rtx op0_b = simplify_gen_subreg (vmode, op0, mode0, 16); + + rtx op1 = operands[1]; + machine_mode mode1 = GET_MODE (op1); + gcc_assert (GET_MODE_SIZE (mode0) == 32); + rtx op1_a = simplify_gen_subreg (vmode, op1, mode1, 0); + rtx op1_b = simplify_gen_subreg (vmode, op1, mode1, 16); + + rtx operation_a = gen_rtx_fmt_e (code, vmode, op1_a); + rtx operation_b = gen_rtx_fmt_e (code, vmode, op1_b); + + if (action == VPAIR_SPLIT_NEGATE) + { + operation_a = gen_rtx_NEG (vmode, operation_a); + operation_b = gen_rtx_NEG (vmode, operation_b); + } + emit_insn (gen_rtx_SET (op0_a, operation_a)); + emit_insn (gen_rtx_SET (op0_b, operation_b)); + return; +} + +/* Split vector pair binary operations. */ + +void +vpair_split_binary (rtx operands[], /* Dest, 2 inputs. */ + machine_mode vmode, /* Vector mode. */ + enum rtx_code code) /* Operator code. */ +{ + rtx op0 = operands[0]; + machine_mode mode0 = GET_MODE (op0); + gcc_assert (GET_MODE_SIZE (mode0) == 32); + rtx op0_a = simplify_gen_subreg (vmode, op0, mode0, 0); + rtx op0_b = simplify_gen_subreg (vmode, op0, mode0, 16); + + rtx op1 = operands[1]; + machine_mode mode1 = GET_MODE (op1); + gcc_assert (GET_MODE_SIZE (mode1) == 32); + rtx op1_a = simplify_gen_subreg (vmode, op1, mode1, 0); + rtx op1_b = simplify_gen_subreg (vmode, op1, mode1, 16); + + rtx op2 = operands[2]; + machine_mode mode2 = GET_MODE (op2); + gcc_assert (GET_MODE_SIZE (mode2) == 32); + rtx op2_a = simplify_gen_subreg (vmode, op2, mode2, 0); + rtx op2_b = simplify_gen_subreg (vmode, op2, mode2, 16); + + rtx operation_a = gen_rtx_fmt_ee (code, vmode, op1_a, op2_a); + rtx operation_b = gen_rtx_fmt_ee (code, vmode, op1_b, op2_b); + + emit_insn (gen_rtx_SET (op0_a, operation_a)); + emit_insn (gen_rtx_SET (op0_b, operation_b)); + return; +} + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-rs6000.h" diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 4acb4031ae0..129e1ce74e2 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -15834,6 +15834,7 @@ (include "vsx.md") (include "altivec.md") (include "mma.md") +(include "vector-pair.md") (include "dfp.md") (include "crypto.md") (include "htm.md") diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 index b3ce09d523b..64655ef38b8 100644 --- a/gcc/config/rs6000/t-rs6000 +++ b/gcc/config/rs6000/t-rs6000 @@ -128,6 +128,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \ $(srcdir)/config/rs6000/vsx.md \ $(srcdir)/config/rs6000/altivec.md \ $(srcdir)/config/rs6000/mma.md \ + $(srcdir)/config/rs6000/vector-pair.md \ $(srcdir)/config/rs6000/crypto.md \ $(srcdir)/config/rs6000/htm.md \ $(srcdir)/config/rs6000/dfp.md \ diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md new file mode 100644 index 00000000000..4b7a8db0d48 --- /dev/null +++ b/gcc/config/rs6000/vector-pair.md @@ -0,0 +1,160 @@ +;; Vector pair arithmetic support. +;; Copyright (C) 2020-2023 Free Software Foundation, Inc. +;; Contributed by Peter Bergner and +;; Michael Meissner +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . +;; +;; This file adds support for doing vector operations on pairs of vector +;; registers. Most of the instructions use vector pair instructions to load +;; and possibly store registers, but splitting the operation after register +;; allocation to do 2 separate operations. The second scheduler pass can +;; interleave other instructions between these pairs of instructions if +;; possible. + +;; We use UNSPEC to identify the representation for the operation rather than +;; SUBREG, because SUBREG tends to generate extra moves. + +(define_c_enum "unspec" + [UNSPEC_VPAIR_ABS + UNSPEC_VPAIR_DIV + UNSPEC_VPAIR_MINUS + UNSPEC_VPAIR_MULT + UNSPEC_VPAIR_NEG + UNSPEC_VPAIR_PLUS + UNSPEC_VPAIR_SMAX + UNSPEC_VPAIR_SMIN]) + +;; Vector pair element ID that defines the scaler element within the vector pair. +(define_c_enum "vpair_element" + [VPAIR_ELEMENT_FLOAT + VPAIR_ELEMENT_DOUBLE]) + +(define_int_iterator VPAIR_FP_ELEMENT [VPAIR_ELEMENT_FLOAT + VPAIR_ELEMENT_DOUBLE]) + +;; Map vector pair element ID to the vector mode after the vector pair has been +;; split. +(define_int_attr VPAIR_VMODE [(VPAIR_ELEMENT_FLOAT "V4SF") + (VPAIR_ELEMENT_DOUBLE "V2DF")]) + +;; Map vector pair element ID to the name used on the define_insn (in lower +;; case). +(define_int_attr vpair_modename [(VPAIR_ELEMENT_FLOAT "v8sf") + (VPAIR_ELEMENT_DOUBLE "v4df")]) + +;; Unary/binary arithmetic iterator on vector pairs. +(define_int_iterator VPAIR_FP_UNARY [UNSPEC_VPAIR_ABS + UNSPEC_VPAIR_NEG]) + +(define_int_iterator VPAIR_FP_BINARY [UNSPEC_VPAIR_DIV + UNSPEC_VPAIR_MINUS + UNSPEC_VPAIR_MULT + UNSPEC_VPAIR_PLUS + UNSPEC_VPAIR_SMAX + UNSPEC_VPAIR_SMIN]) + +;; Map the vpair operator unspec number to the standard name. +(define_int_attr vpair_stdname [(UNSPEC_VPAIR_ABS "abs") + (UNSPEC_VPAIR_DIV "div") + (UNSPEC_VPAIR_MINUS "sub") + (UNSPEC_VPAIR_MULT "mul") + (UNSPEC_VPAIR_NEG "neg") + (UNSPEC_VPAIR_PLUS "add") + (UNSPEC_VPAIR_SMAX "smax") + (UNSPEC_VPAIR_SMIN "smin")]) + +;; Map the vpair operator unspec number to the RTL operator. +(define_int_attr VPAIR_OP [(UNSPEC_VPAIR_ABS "ABS") + (UNSPEC_VPAIR_DIV "DIV") + (UNSPEC_VPAIR_MINUS "MINUS") + (UNSPEC_VPAIR_MULT "MULT") + (UNSPEC_VPAIR_NEG "NEG") + (UNSPEC_VPAIR_PLUS "PLUS") + (UNSPEC_VPAIR_SMAX "SMAX") + (UNSPEC_VPAIR_SMIN "SMIN")]) + +;; Map the scalar element ID into the appropriate insn type. +(define_int_attr vpair_type [(VPAIR_ELEMENT_FLOAT "vecfloat") + (VPAIR_ELEMENT_DOUBLE "vecdouble")]) + +;; Map the scalar element ID into the appropriate insn type for divide. +(define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT "vecfdiv") + (VPAIR_ELEMENT_DOUBLE "vecdiv")]) + +;; Vector pair unary operations. The last argument in the UNSPEC is a +;; CONST_INT which identifies what the scalar element is. +(define_insn_and_split "vpair__2" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "wa") + (const_int VPAIR_FP_ELEMENT)] + VPAIR_FP_UNARY))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + vpair_split_unary (operands, mode, , + VPAIR_SPLIT_NORMAL); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "")]) + +;; Optimize vector pair (neg (abs)). +(define_insn_and_split "vpair_nabs_2" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_ABS) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + vpair_split_unary (operands, mode, ABS, VPAIR_SPLIT_NEGATE); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "")]) + +;; Vector pair binary operations. The last argument in the UNSPEC is a +;; CONST_INT which identifies what the scalar element is. +(define_insn_and_split "vpair__3" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "wa") + (match_operand:OO 2 "vsx_register_operand" "wa") + (const_int VPAIR_FP_ELEMENT)] + VPAIR_FP_BINARY))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + vpair_split_binary (operands, mode, ); + DONE; +} + [(set_attr "length" "8") + (set (attr "type") (if_then_else (match_test " == DIV") + (const_string "") + (const_string "")))]) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 0bc586d120e..e519b71877a 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -15827,6 +15827,7 @@ instructions, but allow the compiler to schedule those calls. * NDS32 Built-in Functions:: * Nvidia PTX Built-in Functions:: * Basic PowerPC Built-in Functions:: +* PowerPC Vector Pair Built-in Functions:: * PowerPC AltiVec/VSX Built-in Functions:: * PowerPC Hardware Transactional Memory Built-in Functions:: * PowerPC Atomic Memory Operation Functions:: @@ -23857,6 +23858,56 @@ int vec_any_le (vector unsigned __int128, vector unsigned __int128); @end smallexample +@node PowerPC Vector Pair Built-in Functions +@subsection PowerPC Vector Pair Built-in Functions + +GCC provides functions to speed up processing by using the type +@code{__vector_pair} to hold two 128-bit vectors on processors that +support ISA 3.1 (power10). The @code{__vector_pair} type and the +vector pair built-in functions require the MMA instruction set +(@option{-mmma}) to be enabled, which is on by default for +@option{-mcpu=power10}. + +By default, @code{__vector_pair} types are loaded into vectors with a +single load vector pair instruction. The processing for the built-in +function is done as two separate vector instructions on each of the +two 128-bit vectors stored in the vector pair. The +@code{__vector_pair} type is usually stored with a single vector pair +store instruction. + +The @code{nabs} built-in is a combination of @code{neg} and +@code{abs}. + +The following built-in functions operate on pairs of +@code{vector float} values: + +@smallexample +__vector_pair __builtin_vpair_f32_abs (__vector_pair); +__vector_pair __builtin_vpair_f32_add (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_div (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_max (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_min (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_mul (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_nabs (__vector_pair); +__vector_pair __builtin_vpair_f32_neg (__vector_pair); +__vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair); +@end smallexample + +The following built-in functions operate on pairs of +@code{vector double} values: + +@smallexample +__vector_pair __builtin_vpair_f64_abs (__vector_pair); +__vector_pair __builtin_vpair_f64_add (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_div (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_max (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_min (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_mul (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_nabs (__vector_pair); +__vector_pair __builtin_vpair_f64_neg (__vector_pair); +__vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair); +@end smallexample + @node PowerPC Hardware Transactional Memory Built-in Functions @subsection PowerPC Hardware Transactional Memory Built-in Functions GCC provides two interfaces for accessing the Hardware Transactional diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-1.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-1.c new file mode 100644 index 00000000000..a6dbc457639 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-1.c @@ -0,0 +1,87 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code generates the expected instructions for + vector pairs with 4 double elements. */ + +void +test_add (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y) +{ + /* 2 lxvp, 2 xvadddp, 1 stxvp. */ + *dest = __builtin_vpair_f64_add (*x, *y); +} + +void +test_sub (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y) +{ + /* 2 lxvp, 2 xvsubdp, 1 stxvp. */ + *dest = __builtin_vpair_f64_sub (*x, *y); +} + +void +test_multiply (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y) +{ + /* 2 lxvp, 2 xvmuldp, 1 stxvp. */ + *dest = __builtin_vpair_f64_mul (*x, *y); +} + +void +test_min (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y) +{ + /* 2 lxvp, 2 xvmindp, 1 stxvp. */ + *dest = __builtin_vpair_f64_min (*x, *y); +} + +void +test_max (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y) +{ + /* 2 lxvp, 2 xvmaxdp, 1 stxvp. */ + *dest = __builtin_vpair_f64_max (*x, *y); +} + +void +test_negate (__vector_pair *dest, + __vector_pair *x) +{ + /* 1 lxvp, 2 xvnegdp, 1 stxvp. */ + *dest = __builtin_vpair_f64_neg (*x); +} + +void +test_abs (__vector_pair *dest, + __vector_pair *x) +{ + /* 1 lxvp, 2 xvabsdp, 1 stxvp. */ + *dest = __builtin_vpair_f64_abs (*x); +} + +void +test_negative_abs (__vector_pair *dest, + __vector_pair *x) +{ + /* 2 lxvp, 2 xvnabsdp, 1 stxvp. */ + __vector_pair ab = __builtin_vpair_f64_abs (*x); + *dest = __builtin_vpair_f64_neg (ab); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 13 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mxvabsdp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvadddp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvmaxdp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvmindp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvmuldp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnabsdp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnegdp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvsubdp\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-2.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-2.c new file mode 100644 index 00000000000..2f663c5780c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-2.c @@ -0,0 +1,86 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code generates the expected instructions for + vector pairs with 8 float elements. */ + +void +test_add (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y) +{ + /* 2 lxvp, 2 xvaddsp, 1 stxvp. */ + *dest = __builtin_vpair_f32_add (*x, *y); +} + +void +test_sub (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y) +{ + /* 2 lxvp, 2 xvsubsp, 1 stxvp. */ + *dest = __builtin_vpair_f32_sub (*x, *y); +} + +void +test_multiply (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y) +{ + /* 2 lxvp, 2 xvmulsp, 1 stxvp. */ + *dest = __builtin_vpair_f32_mul (*x, *y); +} + +void +test_max (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y) +{ + /* 2 lxvp, 2 xvmaxsp, 1 stxvp. */ + *dest = __builtin_vpair_f32_max (*x, *y); +} + +void +test_min (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y) +{ + /* 2 lxvp, 2 xvminsp, 1 stxvp. */ + *dest = __builtin_vpair_f32_min (*x, *y); +} + +void +test_negate (__vector_pair *dest, + __vector_pair *x) +{ + /* 1 lxvp, 2 xvnegsp, 1 stxvp. */ + *dest = __builtin_vpair_f32_neg (*x); +} + +void +test_abs (__vector_pair *dest, + __vector_pair *x) +{ + /* 1 lxvp, 2 xvabssp, 1 stxvp. */ + *dest = __builtin_vpair_f32_abs (*x); +} + +void +test_negative_abs (__vector_pair *dest, + __vector_pair *x) +{ + /* 2 lxvp, 2 xvnabssp, 1 stxvp. */ + __vector_pair ab = __builtin_vpair_f32_abs (*x); + *dest = __builtin_vpair_f32_neg (ab); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 13 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mxvabssp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvaddsp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvmaxsp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvminsp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvmulsp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnabssp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnegsp\M} 2 } } */