From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id ADA053861019; Tue, 9 Apr 2024 05:15:35 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org ADA053861019 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1712639735; bh=wwD+stuKcfRYJ9jvo1pvAiguiwx5OTsitoDx5LGfHVg=; h=From:To:Subject:Date:From; b=F+r3NFx+miKgs61h8FwX2ZRjd7FGhg+7+ol49qAJoJEtvFSXGyGo7VIh7mFrOyjPD eozWN/4EVF8AFgPqFjtfcDveyUHLQ2YYUVT684mE9BjxZtXD7tm3gnktHHkWOIV4+R Cnc8b9/vnzK34gw2cl/lqYG0G3Irz2AebL0w4Yhk= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work164-vpair)] Add vector pair init and splat. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work164-vpair X-Git-Oldrev: dd2a7e91bc7f77c9bf37472daf343a6a22d977a0 X-Git-Newrev: aa0d0f245a6d4a763b754944463a9c1800394e35 Message-Id: <20240409051535.ADA053861019@sourceware.org> Date: Tue, 9 Apr 2024 05:15:35 +0000 (GMT) List-Id: https://gcc.gnu.org/g:aa0d0f245a6d4a763b754944463a9c1800394e35 commit aa0d0f245a6d4a763b754944463a9c1800394e35 Author: Michael Meissner Date: Tue Apr 9 01:08:44 2024 -0400 Add vector pair init and splat. 2024-04-09 Michael Meissner gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New built-in function. (__builtin_vpair_f32_splat): Likewise. (__builtin_vpair_f64_splat): Likewise. * config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec. (UNSPEC_VPAIR_SPLAT): Likewise. (VPAIR_SPLAT_VMODE): New mode iterator. (VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute. (vpair_splat_name): Likewise. (vpair_zero): New insn. (vpair_splat_): New define_expand. (vpair_splat__internal): New insns. gcc/testsuite/ * gcc.target/powerpc/vector-pair-5.c: New test. * gcc.target/powerpc/vector-pair-6.c: Likewise. Diff: --- gcc/config/rs6000/rs6000-builtins.def | 10 +++ gcc/config/rs6000/vector-pair.md | 102 ++++++++++++++++++++++- gcc/doc/extend.texi | 9 ++ gcc/testsuite/gcc.target/powerpc/vector-pair-5.c | 56 +++++++++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-6.c | 56 +++++++++++++ 5 files changed, 232 insertions(+), 1 deletion(-) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 4362cbb8fc7..b757a8630ff 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -4132,6 +4132,10 @@ void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); STXVP nothing {mma,pair} +;; Vector pair built-in functions. + v256 __builtin_vpair_zero (); + VPAIR_ZERO vpair_zero {mma} + ;; Vector pair built-in functions with float elements v256 __builtin_vpair_f32_abs (v256); VPAIR_F32_ABS vpair_abs_v8sf2 {mma} @@ -4169,6 +4173,9 @@ v256 __builtin_vpair_f32_nfms (v256, v256, v256); VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma} + v256 __builtin_vpair_f32_splat (float); + VPAIR_F32_SPLAT vpair_splat_v8sf {mma} + v256 __builtin_vpair_f32_sub (v256, v256); VPAIR_F32_SUB vpair_sub_v8sf3 {mma} @@ -4209,5 +4216,8 @@ v256 __builtin_vpair_f64_nfms (v256, v256, v256); VPAIR_F64_NFMS vpair_nfms_v4df4 {mma} + v256 __builtin_vpair_f64_splat (double); + VPAIR_F64_SPLAT vpair_splat_v4df {mma} + v256 __builtin_vpair_f64_sub (v256, v256); VPAIR_F64_SUB vpair_sub_v4df3 {mma} diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index 73ae46e6d40..39b419c6814 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -38,7 +38,9 @@ UNSPEC_VPAIR_NEG UNSPEC_VPAIR_PLUS UNSPEC_VPAIR_SMAX - UNSPEC_VPAIR_SMIN]) + UNSPEC_VPAIR_SMIN + UNSPEC_VPAIR_ZERO + UNSPEC_VPAIR_SPLAT]) ;; Vector pair element ID that defines the scaler element within the vector pair. (define_c_enum "vpair_element" @@ -98,6 +100,104 @@ ;; Map the scalar element ID into the appropriate insn type for divide. (define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT "vecfdiv") (VPAIR_ELEMENT_DOUBLE "vecdiv")]) + +;; Mode iterator for the vector modes that we provide splat operations for. +(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF]) + +;; Map element mode to 128-bit vector mode for splat operations +(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF") + (DF "V2DF")]) + +;; Map either element mode or vector mode into the name for the splat insn. +(define_mode_attr vpair_splat_name [(SF "v8sf") + (DF "v4df") + (V4SF "v8sf") + (V2DF "v4df")]) + +;; Initialize a vector pair to 0 +(define_insn_and_split "vpair_zero" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 1) (match_dup 3)) + (set (match_dup 2) (match_dup 3))] +{ + rtx op0 = operands[0]; + + operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0); + operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16); + operands[3] = CONST0_RTX (V2DFmode); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +;; Create a vector pair with a value splat'ed (duplicated) to all of the +;; elements. +(define_expand "vpair_splat_" + [(use (match_operand:OO 0 "vsx_register_operand")) + (use (match_operand:SFDF 1 "input_operand"))] + "TARGET_MMA" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + machine_mode element_mode = mode; + + if (op1 == CONST0_RTX (element_mode)) + { + emit_insn (gen_vpair_zero (op0)); + DONE; + } + + machine_mode vector_mode = mode; + rtx vec = gen_reg_rtx (vector_mode); + unsigned num_elements = GET_MODE_NUNITS (vector_mode); + rtvec elements = rtvec_alloc (num_elements); + for (size_t i = 0; i < num_elements; i++) + RTVEC_ELT (elements, i) = copy_rtx (op1); + + rs6000_expand_vector_init (vec, gen_rtx_PARALLEL (vector_mode, elements)); + emit_insn (gen_vpair_splat__internal (op0, vec)); + DONE; +}) + +;; Inner splat support. Operand1 is the vector splat created above. Allow +;; operand 1 to overlap with the output registers to eliminate one move +;; instruction. +(define_insn_and_split "vpair_splat__internal" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(match_operand:VPAIR_SPLAT_VMODE 1 "vsx_register_operand" "0,wa")] + UNSPEC_VPAIR_SPLAT))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op0_a = simplify_gen_subreg (mode, op0, OOmode, 0); + rtx op0_b = simplify_gen_subreg (mode, op0, OOmode, 16); + rtx op1 = operands[1]; + unsigned op1_regno = reg_or_subregno (op1); + + /* Check if the input is one of the output registers. */ + if (op1_regno == reg_or_subregno (op0_a)) + emit_move_insn (op0_b, op1); + + else if (op1_regno == reg_or_subregno (op0_b)) + emit_move_insn (op0_a, op1); + + else + { + emit_move_insn (op0_a, op1); + emit_move_insn (op0_b, op1); + } + + DONE; +} + [(set_attr "length" "*,8") + (set_attr "type" "vecmove")]) ;; Vector pair unary operations. The last argument in the UNSPEC is a ;; CONST_INT which identifies what the scalar element is. diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 7494e3bcc6e..d20bef9b967 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -23915,6 +23915,13 @@ The @code{nfma} built-in is a combination of @code{neg} of the The @code{nfms} built-in is a combination of @code{neg} of the @code{fms} built-in. +The following built-in function is independent on the type of the +underlying vector: + +@smallexample +__vector_pair __builtin_vpair_zero (); +@end smallexample + The following built-in functions operate on pairs of @code{vector float} values: @@ -23935,6 +23942,7 @@ __vector_pair __builtin_vpair_f32_nfma (__vector_pair, __vector_pair, __vector_pair); __vector_pair __builtin_vpair_f32_nfms (__vector_pair, __vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_splat (float); __vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair); @end smallexample @@ -23958,6 +23966,7 @@ __vector_pair __builtin_vpair_f64_nfma (__vector_pair, __vector_pair, __vector_pair); __vector_pair __builtin_vpair_f64_nfms (__vector_pair, __vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_splat (double); __vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair); @end smallexample diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c new file mode 100644 index 00000000000..9b645e626e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code generates the expected instructions for + vector pairs zero and splat functions for vector pairs containing + doubles. */ + +void +test_zero (__vector_pair *p) +{ + /* 2 xxspltib/xxlxor. */ + *p = __builtin_vpair_zero (); +} + +void +test_splat_zero (__vector_pair *p) +{ + /* 2 xxspltib/xxlxor. */ + *p = __builtin_vpair_f64_splat (0.0); +} + +void +test_splat_one (__vector_pair *p) +{ + /* xxspltidp, xxlor. */ + *p = __builtin_vpair_f64_splat (1.0); +} + +void +test_splat_pi (__vector_pair *p) +{ + /* plxv, xxlor (note, we cannot use xxspltidp). */ + *p = __builtin_vpair_f64_splat (3.1415926535); +} + +void +test_splat_arg (__vector_pair *p, double x) +{ + /* xxpermdi, xxlor. */ + *p = __builtin_vpair_f64_splat (x); +} + +void +test_splat_mem (__vector_pair *p, double *q) +{ + /* lxvdsx, xxlor. */ + *p = __builtin_vpair_f64_splat (*q); +} + +/* { dg-final { scan-assembler-times {\mlxvdsx\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mp?lxvx?\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M|\mxxlxor\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c new file mode 100644 index 00000000000..5ec53d4bfc3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code generates the expected instructions for + vector pairs zero and splat functions for vector pairs containing + floats. */ + +void +test_zero (__vector_pair *p) +{ + /* 2 xxspltib/xxlxor. */ + *p = __builtin_vpair_zero (); +} + +void +test_splat_zero (__vector_pair *p) +{ + /* 2 xxspltib/xxlxor. */ + *p = __builtin_vpair_f32_splat (0.0f); +} + +void +test_splat_one (__vector_pair *p) +{ + /* xxspltiw, xxlor. */ + *p = __builtin_vpair_f32_splat (1.0f); +} + +void +test_splat_pi (__vector_pair *p) +{ + /* xxspltiw, xxlor. */ + *p = __builtin_vpair_f32_splat (3.1415926535f); +} + +void +test_splat_arg (__vector_pair *p, float x) +{ + /* xscvdpspn, xxspltw, xxlor. */ + *p = __builtin_vpair_f32_splat (x); +} + +void +test_splat_mem (__vector_pair *p, float *q) +{ + /* xlvwsx, xxlor. */ + *p = __builtin_vpair_f32_splat (*q); +} + +/* { dg-final { scan-assembler-times {\mlxvwsx\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mxscvdpspn\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M|\mxxlxor\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxspltw\M} 1 } } */