From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 625BD385E02C; Tue, 5 Oct 2021 17:48:36 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 625BD385E02C Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work070)] Generate XXSPLTIW on power10. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work070 X-Git-Oldrev: be90bb72cf8c4dddf5a9995977b50701fa1737bb X-Git-Newrev: 37ca91ee404329a376ce8cc9bcfb3926d79adbbc Message-Id: <20211005174836.625BD385E02C@sourceware.org> Date: Tue, 5 Oct 2021 17:48:36 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 05 Oct 2021 17:48:36 -0000 https://gcc.gnu.org/g:37ca91ee404329a376ce8cc9bcfb3926d79adbbc commit 37ca91ee404329a376ce8cc9bcfb3926d79adbbc Author: Michael Meissner Date: Tue Oct 5 13:48:09 2021 -0400 Generate XXSPLTIW on power10. This patch adds support to automatically generate the ISA 3.1 XXSPLTIW instruction for V8HImode, V4SImode, and V4SFmode vectors. It does this by adding support for vector constants that can be used, and adding a VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction. I rewrote the XXSPLTW built-in functions to use VEC_DUPLICATE instead of UNSPEC. This patch also updates the insn counts in the vec-splati-runnable.c test to work with the new option to use XXSPLTIW to load up some vector constants. I added 4 new tests to test loading up V16QI, V8HI, V4SI, and V4SF vector constants. At the present time, XXSPLTIW geneation is disabled by default. 2021-10-04 Michael Meissner gcc/ * config/rs6000/constraints.md (eW): New constraint. * config/rs6000/predicates.md (easy_vector_constant_splat_word): New predicate. (easy_vector_constant): If we can use XXSPLTIW, the vector constant is easy. * config/rs6000/rs6000-protos.h (xxspltiw_constant_immediate): New declaration. * config/rs6000/rs6000.c (xxspltib_constant_p): Don't return true if we could generate XXSPLTIW instead of XXSPLTIB and sign extend. (xxspltiw_constant_immediate): New function. (output_vec_const_move): Add support for loading up vector constants with XXSPLTIW. (prefixed_xxsplti_p): Recognize xxspltiw instructions as prefixed. * config/rs6000/rs6000.opt (-mxxspltiw): New debug switch. * config/rs6000/vsx.md (vsx_mov_64bit): Add support for constants loaded with XXSPLTIW. (vsx_mov_32bit): Likewise. * doc/md.texi (PowerPC and IBM RS6000 constraints): Document the eW constraint. gcc/testsuite/ * gcc.target/powerpc/vec-splat-constant-v16qi.c: New test. * gcc.target/powerpc/vec-splat-constant-v4sf.c: New test. * gcc.target/powerpc/vec-splat-constant-v4si.c: New test. * gcc.target/powerpc/vec-splat-constant-v8hi.c: New test. Diff: --- gcc/config/rs6000/constraints.md | 5 + gcc/config/rs6000/predicates.md | 101 +++++++++++++++++++++ gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.c | 92 ++++++++++++++++++- gcc/config/rs6000/rs6000.opt | 5 + gcc/config/rs6000/vsx.md | 28 +++--- gcc/doc/md.texi | 3 + .../gcc.target/powerpc/vec-splat-constant-v16qi.c | 27 ++++++ .../gcc.target/powerpc/vec-splat-constant-v4sf.c | 67 ++++++++++++++ .../gcc.target/powerpc/vec-splat-constant-v4si.c | 51 +++++++++++ .../gcc.target/powerpc/vec-splat-constant-v8hi.c | 62 +++++++++++++ 11 files changed, 426 insertions(+), 16 deletions(-) diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 1700657abe9..46daeb0861c 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -228,6 +228,11 @@ "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction." (match_operand 0 "easy_fp_constant_ieee128")) +;; Vector constant that can be loaded with XXSPLTIW +(define_constraint "eW" + "A vector constant that can be loaded with the XXSPLTIW instruction." + (match_operand 0 "easy_vector_constant_splat_word")) + ;; Floating-point constraints. These two are defined so that insn ;; length attributes can be calculated exactly. diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 30e89ec79f0..9b9f5934e58 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -751,6 +751,104 @@ return easy_fp_constant_64bit_scalar (op, GET_MODE_INNER (mode)); }) +;; Return 1 if the operand is a constant that can be loaded with the XXSPLTIW +;; instruction that loads up a 32-bit immediate and splats it into the vector. + +(define_predicate "easy_vector_constant_splat_word" + (match_code "const_vector") +{ + HOST_WIDE_INT value; + + if (!TARGET_PREFIXED || !TARGET_VSX || !TARGET_XXSPLTIW) + return false; + + rtx element0 = CONST_VECTOR_ELT (op, 0); + + switch (mode) + { + /* V4SImode constant vectors that have the same element are can be used + with XXSPLTIW. */ + case V4SImode: + if (!CONST_VECTOR_DUPLICATE_P (op)) + return false; + + /* Don't return true if we can use the shorter vspltisw instruction. */ + value = INTVAL (element0); + return (!EASY_VECTOR_15 (value)); + + /* V4SFmode constant vectors that have the same element are + can be used with XXSPLTIW. */ + case V4SFmode: + if (!CONST_VECTOR_DUPLICATE_P (op)) + return false; + + /* Don't return true for 0.0f, since that can be created with + xxspltib or xxlxor. */ + return (element0 != CONST0_RTX (SFmode)); + + /* V8Hmode constant vectors that have the same element are can be used + with XXSPLTIW. */ + case V8HImode: + if (CONST_VECTOR_DUPLICATE_P (op)) + { + /* Don't return true if we can use the shorter vspltish instruction. */ + value = INTVAL (element0); + if (EASY_VECTOR_15 (value)) + return false; + + return true; + } + + else + { + /* Check if all even elements are the same and all odd elements are + the same. */ + rtx element1 = CONST_VECTOR_ELT (op, 1); + + if (!CONST_INT_P (element1)) + return false; + + for (size_t i = 2; i < GET_MODE_NUNITS (V8HImode); i += 2) + if (!rtx_equal_p (element0, CONST_VECTOR_ELT (op, i)) + || !rtx_equal_p (element1, CONST_VECTOR_ELT (op, i + 1))) + return false; + + return true; + } + + /* V16QI constant vectors that have the first four elements identical to + the next set of 4 elements, and so forth can generate XXSPLTIW. */ + case V16QImode: + { + /* If we can use XXSPLTIB, don't generate XXSPLTIW. */ + if (xxspltib_constant_nosplit (op, mode)) + return false; + + rtx element1 = CONST_VECTOR_ELT (op, 1); + rtx element2 = CONST_VECTOR_ELT (op, 2); + rtx element3 = CONST_VECTOR_ELT (op, 3); + + if (!CONST_INT_P (element0) || !CONST_INT_P (element1) + || !CONST_INT_P (element2) || !CONST_INT_P (element3)) + return false; + + for (size_t i = 4; i < GET_MODE_NUNITS (V16QImode); i += 4) + if (!rtx_equal_p (element0, CONST_VECTOR_ELT (op, i)) + || !rtx_equal_p (element1, CONST_VECTOR_ELT (op, i + 1)) + || !rtx_equal_p (element2, CONST_VECTOR_ELT (op, i + 2)) + || !rtx_equal_p (element3, CONST_VECTOR_ELT (op, i + 3))) + return false; + + return true; + } + + default: + break; + } + + return false; +}) + ;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB ;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction. @@ -871,6 +969,9 @@ if (easy_vector_constant_64bit_element (op, mode)) return true; + if (easy_vector_constant_splat_word (op, mode)) + return true; + if (TARGET_P9_VECTOR && xxspltib_constant_p (op, mode, &num_insns, &value)) return true; diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index a21fa08b367..540c401e7ad 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -33,6 +33,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int, extern int easy_altivec_constant (rtx, machine_mode); extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); extern long xxspltidp_constant_immediate (rtx, machine_mode); +extern long xxspltiw_constant_immediate (rtx, machine_mode); extern int lxvkq_constant_immediate (rtx, machine_mode); extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 81004d9a879..92cd2a1cf87 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6940,8 +6940,8 @@ xxspltib_constant_p (rtx op, *num_insns_ptr = 1; /* See if we could generate the constant with XXSPLTIW instead of XXSPLTIB + - VUPKLSB/VEXTSB2W. */ - else if ((mode == V8HImode || mode == V4SImode) && TARGET_POWER10 && TARGET_XXSPLTIW) + VUPKLSB or VEXTSB2W. */ + else if (easy_vector_constant_splat_word (op, mode)) return false; else @@ -7005,6 +7005,82 @@ xxspltidp_constant_immediate (rtx op, machine_mode mode) return ret; } +/* Return the immediate value used in the XXSPLTIW instruction. */ +long +xxspltiw_constant_immediate (rtx op, machine_mode mode) +{ + long ret; + + gcc_assert (easy_vector_constant_splat_word (op, mode)); + + switch (mode) + { + default: + gcc_unreachable (); + + /* V4SImode constant vectors that have the same element are can be used + with XXSPLTIW. */ + case E_V4SImode: + gcc_assert (CONST_VECTOR_DUPLICATE_P (op)); + ret = INTVAL (CONST_VECTOR_ELT (op, 0)); + break; + + /* V4SFmode constant vectors that have the same element are + can be used with XXSPLTIW. */ + case E_V4SFmode: + gcc_assert (CONST_VECTOR_DUPLICATE_P (op)); + ret = rs6000_const_f32_to_i32 (CONST_VECTOR_ELT (op, 0)); + break; + + /* V8HImode constant vectors with all of the even elements the same and + all of the odd elements the same can used XXSPLTIW. */ + case E_V8HImode: + { + if (!rtx_equal_p (CONST_VECTOR_ELT (op, 0), CONST_VECTOR_ELT (op, 2)) + || !rtx_equal_p (CONST_VECTOR_ELT (op, 1), CONST_VECTOR_ELT (op, 3))) + gcc_unreachable (); + + long value0 = INTVAL (CONST_VECTOR_ELT (op, 0)) & 0xffff; + long value1 = INTVAL (CONST_VECTOR_ELT (op, 1)) & 0xffff; + + if (!BYTES_BIG_ENDIAN) + std::swap (value0, value1); + + ret = (value0 << 16) | value1; + } + break; + + /* V16QI constant vectors that have the first four elements identical to + the next set of 4 elements, and so forth can generate XXSPLTIW. */ + case E_V16QImode: + { + rtx op0 = CONST_VECTOR_ELT (op, 0); + rtx op1 = CONST_VECTOR_ELT (op, 1); + rtx op2 = CONST_VECTOR_ELT (op, 2); + rtx op3 = CONST_VECTOR_ELT (op, 3); + + for (size_t i = 4; i < GET_MODE_NUNITS (V16QImode); i += 4) + if (!rtx_equal_p (op0, CONST_VECTOR_ELT (op, i)) + || !rtx_equal_p (op1, CONST_VECTOR_ELT (op, i + 1)) + || !rtx_equal_p (op2, CONST_VECTOR_ELT (op, i + 2)) + || !rtx_equal_p (op3, CONST_VECTOR_ELT (op, i + 3))) + gcc_unreachable (); + + long value0 = INTVAL (op0) & 0xff; + long value1 = INTVAL (op1) & 0xff; + long value2 = INTVAL (op2) & 0xff; + long value3 = INTVAL (op3) & 0xff; + + ret = ((BYTES_BIG_ENDIAN) + ? ((value0 << 24) | (value1 << 16) | (value2 << 8) | value3) + : ((value3 << 24) | (value2 << 16) | (value1 << 8) | value0)); + } + break; + } + + return ret; +} + /* Return the constant that will go in the LXVKQ instruction. */ /* LXVKQ immediates. */ @@ -7146,6 +7222,12 @@ output_vec_const_move (rtx *operands) return "xxspltidp %x0,%2"; } + if (easy_vector_constant_splat_word (vec, mode)) + { + operands[2] = GEN_INT (xxspltiw_constant_immediate (vec, mode)); + return "xxspltiw %x0,%2"; + } + if (easy_fp_constant_ieee128 (vec, mode)) { operands[2] = GEN_INT (lxvkq_constant_immediate (vec, mode)); @@ -26920,6 +27002,12 @@ prefixed_xxsplti_p (rtx_insn *insn) case E_V2DFmode: return easy_vector_constant_64bit_element (src, mode); + case E_V16QImode: + case E_V8HImode: + case E_V4SImode: + case E_V4SFmode: + return easy_vector_constant_splat_word (src, mode); + default: break; } diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index c9eb78952d6..a53aad72547 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -644,6 +644,11 @@ mxxspltidp Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save Generate (do not generate) XXSPLTIDP instructions. +;; Do not enable at this time. +mxxspltiw +Target Undocumented Var(TARGET_XXSPLTIW) Init(0) Save +Generate (do not generate) XXSPLTIW instructions. + mlxvkq Target Undocumented Var(TARGET_LXVKQ) Init(1) Save Generate (do not generate) LXVKQ instructions. diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index d7e58654ded..712e5df0c02 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1191,19 +1191,19 @@ ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move. ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) -;; XXSPLTIDP LXVKQ +;; XXSPLTIDP XXSPLTIW LXVKQ ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) (define_insn "vsx_mov_64bit" [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO, wa, wa, r, we, ?wQ, - wa, wa, + wa, wa, wa, ?&r, ??r, ??Y, , wa, v, ?wa, v, , wZ, v") (match_operand:VSX_M 1 "input_operand" "wa, ZwO, wa, we, r, r, - eV, eQ, + eV, eW, eQ, wQ, Y, r, r, wE, jwM, ?jwM, W, , v, wZ"))] @@ -1215,44 +1215,44 @@ } [(set_attr "type" "vecstore, vecload, vecsimple, mtvsr, mfvsr, load, - vecperm, vecperm, + vecperm, vecperm, vecperm, store, load, store, *, vecsimple, vecsimple, vecsimple, *, *, vecstore, vecload") (set_attr "num_insns" "*, *, *, 2, *, 2, - *, *, + *, *, *, 2, 2, 2, 2, *, *, *, 5, 2, *, *") (set_attr "max_prefixed_insns" "*, *, *, *, *, 2, - *, *, + *, *, *, 2, 2, 2, 2, *, *, *, *, *, *, *") (set_attr "length" "*, *, *, 8, *, 8, - *, *, + *, *, *, 8, 8, 8, 8, *, *, *, 20, 8, *, *") (set_attr "isa" ", , , *, *, *, - p10, p10, + p10, p10, p10, *, *, *, *, p9v, *, , *, *, *, *")]) ;; VSX store VSX load VSX move GPR load GPR store GPR move -;; XXSPLTIDP LXVKQ +;; XXSPLTIDP XXSPLTIW LXVKQ ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const ;; LVX (VMX) STVX (VMX) (define_insn "*vsx_mov_32bit" [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO, wa, wa, ??r, ??Y, , - wa, wa, + wa, wa, wa, wa, v, ?wa, v, , wZ, v") (match_operand:VSX_M 1 "input_operand" "wa, ZwO, wa, Y, r, r, - eV, eQ, + eV, eW, eQ, wE, jwM, ?jwM, W, , v, wZ"))] @@ -1264,17 +1264,17 @@ } [(set_attr "type" "vecstore, vecload, vecsimple, load, store, *, - vecperm, vecperm, + vecperm, vecperm, vecperm, vecsimple, vecsimple, vecsimple, *, *, vecstore, vecload") (set_attr "length" "*, *, *, 16, 16, 16, - *, *, + *, *, *, *, *, *, 20, 16, *, *") (set_attr "isa" ", , , *, *, *, - p10, p10, + p10, p10, p10, p9v, *, , *, *, *, *")]) diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 813d6316d8c..4ad0e745c94 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3345,6 +3345,9 @@ An IEEE 128-bit constant that can be loaded with the LXVKQ instruction. @item eV A 128-bit vector constant that can be loaded with the XXSPLTIDP instruction. +@item eW +A vector constant that can be loaded with the XXSPLTIW instruction. + @ifset INTERNALS @item G A floating point constant that can be loaded into a register with one diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c new file mode 100644 index 00000000000..2707d86e6fd --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */ + +#include + +/* Test whether XXSPLTIW is generated for V16HI vector constants where the + first 4 elements are the same as the next 4 elements, etc. */ + +vector unsigned char +v16qi_const_1 (void) +{ + return (vector unsigned char) { 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, }; /* VSLTPISB. */ +} + +vector unsigned char +v16qi_const_2 (void) +{ + return (vector unsigned char) { 1, 2, 3, 4, 1, 2, 3, 4, + 1, 2, 3, 4, 1, 2, 3, 4, }; /* XXSPLTIW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvspltisb\M|\mxxspltib\M} 1 } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c new file mode 100644 index 00000000000..05d4ee3f5cb --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c @@ -0,0 +1,67 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */ + +#include + +/* Test whether XXSPLTIW is generated for V4SF vector constants. */ + +vector float +v4sf_const_1 (void) +{ + return (vector float) { 1.0f, 1.0f, 1.0f, 1.0f }; /* XXSPLTIW. */ +} + +vector float +v4sf_const_nan (void) +{ + return (vector float) { __builtin_nanf (""), + __builtin_nanf (""), + __builtin_nanf (""), + __builtin_nanf ("") }; /* XXSPLTIW. */ +} + +vector float +v4sf_const_inf (void) +{ + return (vector float) { __builtin_inff (), + __builtin_inff (), + __builtin_inff (), + __builtin_inff () }; /* XXSPLTIW. */ +} + +vector float +v4sf_const_m0 (void) +{ + return (vector float) { -0.0f, -0.0f, -0.0f, -0.0f }; /* XXSPLTIB/VSLW. */ +} + +vector float +v4sf_splats_1 (void) +{ + return vec_splats (1.0f); /* XXSPLTIW. */ +} + +vector float +v4sf_splats_nan (void) +{ + return vec_splats (__builtin_nanf ("")); /* XXSPLTIW. */ +} + +vector float +v4sf_splats_inf (void) +{ + return vec_splats (__builtin_inff ()); /* XXSPLTIW. */ +} + +vector float +v8hi_splats_m0 (void) +{ + return vec_splats (-0.0f); /* XXSPLTIB/VSLW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvslw\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c new file mode 100644 index 00000000000..da909e948b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */ + +#include + +/* Test whether XXSPLTIW is generated for V4SI vector constants. We make sure + the power9 support (XXSPLTIB/VEXTSB2W) is not done. */ + +vector int +v4si_const_1 (void) +{ + return (vector int) { 1, 1, 1, 1 }; /* VSLTPISW. */ +} + +vector int +v4si_const_126 (void) +{ + return (vector int) { 126, 126, 126, 126 }; /* XXSPLTIW. */ +} + +vector int +v4si_const_1023 (void) +{ + return (vector int) { 1023, 1023, 1023, 1023 }; /* XXSPLTIW. */ +} + +vector int +v4si_splats_1 (void) +{ + return vec_splats (1); /* VSLTPISW. */ +} + +vector int +v4si_splats_126 (void) +{ + return vec_splats (126); /* XXSPLTIW. */ +} + +vector int +v8hi_splats_1023 (void) +{ + return vec_splats (1023); /* XXSPLTIW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mvspltisw\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */ +/* { dg-final { scan-assembler-not {\mvextsb2w\M} } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c new file mode 100644 index 00000000000..290e05d4a64 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c @@ -0,0 +1,62 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */ + +#include + +/* Test whether XXSPLTIW is generated for V8HI vector constants. We make sure + the power9 support (XXSPLTIB/VUPKLSB) is not done. */ + +vector short +v8hi_const_1 (void) +{ + return (vector short) { 1, 1, 1, 1, 1, 1, 1, 1 }; /* VSLTPISH. */ +} + +vector short +v8hi_const_126 (void) +{ + return (vector short) { 126, 126, 126, 126, + 126, 126, 126, 126 }; /* XXSPLTIW. */ +} + +vector short +v8hi_const_1023 (void) +{ + return (vector short) { 1023, 1023, 1023, 1023, + 1023, 1023, 1023, 1023 }; /* XXSPLTIW. */ +} + +vector short +v8hi_splats_1 (void) +{ + return vec_splats ((short)1); /* VSLTPISH. */ +} + +vector short +v8hi_splats_126 (void) +{ + return vec_splats ((short)126); /* XXSPLTIW. */ +} + +vector short +v8hi_splats_1023 (void) +{ + return vec_splats ((short)1023); /* XXSPLTIW. */ +} + +/* Test that we can optimiza V8HI where all of the even elements are the same + and all of the odd elements are the same. */ +vector short +v8hi_const_1023_1000 (void) +{ + return (vector short) { 1023, 1000, 1023, 1000, + 1023, 1000, 1023, 1000 }; /* XXSPLTIW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 5 } } */ +/* { dg-final { scan-assembler-times {\mvspltish\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */ +/* { dg-final { scan-assembler-not {\mvupklsb\M} } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */