From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 2B5793858401; Sat, 16 Oct 2021 01:36:00 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 2B5793858401 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work071)] Generate XXSPLTIW on power10. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work071 X-Git-Oldrev: ad4f1acb7a60bf0b36beea7b996871f130adc4c7 X-Git-Newrev: 66c5ef8a3e48d022a1a434b8473f6396be2a9b62 Message-Id: <20211016013600.2B5793858401@sourceware.org> Date: Sat, 16 Oct 2021 01:36:00 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 16 Oct 2021 01:36:00 -0000 https://gcc.gnu.org/g:66c5ef8a3e48d022a1a434b8473f6396be2a9b62 commit 66c5ef8a3e48d022a1a434b8473f6396be2a9b62 Author: Michael Meissner Date: Fri Oct 15 21:35:41 2021 -0400 Generate XXSPLTIW on power10. This patch adds support to automatically generate the ISA 3.1 XXSPLTIW instruction for V8HImode, V4SImode, and V4SFmode vectors. It does this by adding support for vector constants that can be used, and adding a VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction. The eV constraint added with the XXSPLTIDP patch will also recognize use of the XXSPLTIW instruction. I have not updated the eS constraint because right now I didn't add support to use XXSPLTIW to load SImode and HImode constants into vector registers. I rewrote the XXSPLTW built-in functions to use VEC_DUPLICATE instead of UNSPEC. I added 4 new tests to test loading up V16QI, V8HI, V4SI, and V4SF vector constants. 2021-10-15 Michael Meissner gcc/ * config/rs6000/predicates.md (easy_fp_constant): Add support for XXSPLTIW. (easy_vector_constant_prefixed): Likewise. (easy_vector_constant): Likewise. * config/rs6000/rs6000-protos.h (rs6000_vec_const): Add field for XXSPLTIW. (vec_const_use_xxspltiw): New declaration. * config/rs6000/rs6000.c (xxspltib_constant_p): If we can generate XXSPLTIW, don't do XXSPLTIB and sign extend. (output_vec_const_move): Add support for XXSPLTIW. (prefixed_xxsplti_p): Recognize XXSPLTIW instructions as prefixed. (vec_const_simple_constant): New function. (vec_const_use_xxspltiw): New function. * config/rs6000/rs6000.opt (-mxxspltiw): New debug switch. * config/rs6000/vsx.md (vsx_mov_64bit): Update comment. (vsx_mov_32bit): Likewise. gcc/testsuite/ * gcc.target/powerpc/vec-splat-constant-v16qi.c: New test. * gcc.target/powerpc/vec-splat-constant-v4sf.c: New test. * gcc.target/powerpc/vec-splat-constant-v4si.c: New test. * gcc.target/powerpc/vec-splat-constant-v8hi.c: New test. * gcc.target/powerpc/vec-splati-runnable.c: Update insn count. Diff: --- gcc/config/rs6000/predicates.md | 11 +- gcc/config/rs6000/rs6000-protos.h | 2 + gcc/config/rs6000/rs6000.c | 120 +++++++++++++++++++-- gcc/config/rs6000/rs6000.opt | 4 + gcc/config/rs6000/vsx.md | 4 +- .../gcc.target/powerpc/vec-splat-constant-v16qi.c | 27 +++++ .../gcc.target/powerpc/vec-splat-constant-v4sf.c | 67 ++++++++++++ .../gcc.target/powerpc/vec-splat-constant-v4si.c | 51 +++++++++ .../gcc.target/powerpc/vec-splat-constant-v8hi.c | 62 +++++++++++ .../gcc.target/powerpc/vec-splati-runnable.c | 2 +- 10 files changed, 340 insertions(+), 10 deletions(-) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 517ce08f03d..252abbbaf9a 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -611,6 +611,9 @@ if (vec_const_use_xxspltidp (&vec_const)) return true; + + if (vec_const_use_xxspltiw (&vec_const)) + return true; } /* Otherwise consider floating point constants hard, so that the @@ -644,7 +647,7 @@ }) ;; Return 1 if the operand is a scalar constant that can be loaded to a VSX -;; register with one prefixed instruction, such as XXSPLTIDP. +;; register with one prefixed instruction, such as XXSPLTIDP or XXSPLTIW. ;; ;; We have to have separate predicates and constraints for scalars and vectors, ;; otherwise things get messed up with TImode when you try to load very large @@ -666,6 +669,9 @@ if (vec_const_use_xxspltidp (&vec_const)) return true; + if (vec_const_use_xxspltiw (&vec_const)) + return true; + return false; }) @@ -744,6 +750,9 @@ if (vec_const_use_xxspltidp (&vec_const)) return true; + + if (vec_const_use_xxspltiw (&vec_const)) + return true; } return easy_altivec_constant (op, mode); diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 6e8b81cb134..52f094dd410 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -240,11 +240,13 @@ typedef struct { unsigned char bytes[VECTOR_CONST_BYTES]; machine_mode orig_mode; /* Original mode. */ unsigned int xxspltidp_immediate; /* Immediate value for XXSPLTIDP. */ + unsigned int xxspltiw_immediate; /* Immediate value for XXSPLTIW. */ unsigned int lxvkq_immediate; /* Immediate to use with LXVKQ. */ } rs6000_vec_const; extern bool vec_const_to_bytes (rtx, machine_mode, rs6000_vec_const *); extern bool vec_const_use_xxspltidp (rs6000_vec_const *); +extern bool vec_const_use_xxspltiw (rs6000_vec_const *); extern bool vec_const_use_lxvkq (rs6000_vec_const *); #endif /* RTX_CODE */ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index d238dd84fe7..838161fb23a 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6925,12 +6925,17 @@ xxspltib_constant_p (rtx op, else return false; - /* See if we could generate vspltisw/vspltish directly instead of xxspltib + - sign extend. Special case 0/-1 to allow getting any VSX register instead - of an Altivec register. */ - if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0) - && EASY_VECTOR_15 (value)) - return false; + /* See if we could generate vspltisw/vspltish/xxspltiw directly instead of + xxspltib + sign extend. Special case 0/-1 to allow getting any VSX + register instead of an Altivec register. */ + if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)) + { + if (EASY_VECTOR_15 (value)) + return false; + + if (TARGET_XXSPLTIW && TARGET_PREFIXED && TARGET_VSX) + return false; + } /* Return # of instructions and the constant byte for XXSPLTIB. */ if (mode == V16QImode) @@ -7004,6 +7009,52 @@ output_vec_const_move (rtx *operands) operands[2] = GEN_INT (vec_const.xxspltidp_immediate); return "xxspltidp %x0,%2"; } + + if (vec_const_use_xxspltiw (&vec_const)) + { + HOST_WIDE_INT imm = vec_const.xxspltiw_immediate; + + /* See if we can generate the shorter VSPLTISB, VSPLTISH, or + VSPLTISW instead of XXSPLTIW. */ + if (dest_vmx_p) + { + HOST_WIDE_INT sign_imm + = ((imm & 0xffffffff) ^ 0x80000000) - 0x80000000; + + if (EASY_VECTOR_15 (sign_imm)) + { + operands[2] = GEN_INT (sign_imm); + return "vspltisw %0,%2"; + } + + if (vec_const.bytes[0] == vec_const.bytes[1] + && vec_const.bytes[0] == vec_const.bytes[2] + && vec_const.bytes[0] == vec_const.bytes[3]) + { + HOST_WIDE_INT sign_imm8 = ((imm & 0xff) ^ 0x80) - 0x80; + if (EASY_VECTOR_15 (sign_imm8)) + { + operands[2] = GEN_INT (sign_imm8); + return "vspltisb %0,%2"; + } + } + + if (vec_const.h_words[0] == vec_const.h_words[1]) + { + HOST_WIDE_INT sign_imm16 + = ((imm & 0xffff) ^ 0x8000) - 0x8000; + + if (EASY_VECTOR_15 (sign_imm16)) + { + operands[2] = GEN_INT (sign_imm16); + return "vspltish %0,%2"; + } + } + } + + operands[2] = GEN_INT (imm); + return "xxspltiw %x0,%2"; + } } if (TARGET_P9_VECTOR @@ -26770,6 +26821,9 @@ prefixed_xxsplti_p (rtx_insn *insn) { if (vec_const_use_xxspltidp (&vec_const)) return true; + + if (vec_const_use_xxspltiw (&vec_const)) + return true; } return false; @@ -28784,6 +28838,60 @@ vec_const_use_xxspltidp (rs6000_vec_const *vec_const) return true; } +/* Determine if a vector constant can be loaded with XXSPLTIW. If so, + fill out the fields used to generate the instruction. */ + +bool +vec_const_use_xxspltiw (rs6000_vec_const *vec_const) +{ + if (!TARGET_XXSPLTIW || !TARGET_PREFIXED || !TARGET_VSX) + return false; + + /* Make sure that each of the 4 32-bit segments are the same. */ + unsigned int value = vec_const->words[0]; + if (value != vec_const->words[1] + || value != vec_const->words[2] + || value != vec_const->words[3]) + return false; + + /* Avoid values that are easy to create with other instructions (0.0 for + floating point, and values that can be loaded with VSPLTISW, VSPLTISH, + VSPLTISB, or XXSPLTISB. */ + if (value == 0) + return false; + + machine_mode mode = vec_const->orig_mode; + if (mode == VOIDmode) + mode = SImode; + + if (!FLOAT_MODE_P (mode)) + { + /* Can we use VSPLTISW to load the constant? */ + int sign_value = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000; + if (EASY_VECTOR_15 (sign_value)) + return false; + + /* Can we use VSPLTISH to load the constant? */ + if (vec_const->h_words[0] == vec_const->h_words[1]) + { + int sign_value16 = ((value & 0xffff) ^ 0x8000) - 0x8000; + if (EASY_VECTOR_15 (sign_value16)) + return false; + } + + /* Can we use XXSPLTISB/VSPLTISB to load the constant? */ + if (vec_const->bytes[0] == vec_const->bytes[1] + && vec_const->bytes[0] == vec_const->bytes[2] + && vec_const->bytes[0] == vec_const->bytes[3]) + return false; + } + + /* Record the immediate in the vec_const structure for XXSPLTIW. */ + vec_const->xxspltiw_immediate = value; + + return true; +} + /* Determine if a vector constant can be loaded with LXVKQ. If so, fill out the fields used to generate the instruction. */ diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index c9eb78952d6..015bf91b6d5 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -644,6 +644,10 @@ mxxspltidp Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save Generate (do not generate) XXSPLTIDP instructions. +mxxspltiw +Target Undocumented Var(TARGET_XXSPLTIW) Init(1) Save +Generate (do not generate) XXSPLTIW instructions. + mlxvkq Target Undocumented Var(TARGET_LXVKQ) Init(1) Save Generate (do not generate) LXVKQ instructions. diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 15a22525000..07b0b671920 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1192,7 +1192,7 @@ ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW -;; XXLSPLTIDP LXVKQ +;; XXLSPLTI* LXVKQ ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) (define_insn "vsx_mov_64bit" [(set (match_operand:VSX_M 0 "nonimmediate_operand" @@ -1241,7 +1241,7 @@ ;; VSX store VSX load VSX move GPR load GPR store GPR move ;; XXSPLTIB VSPLTISW VSX 0/-1 -;; XXSPLTIDP LXVKQ +;; XXSPLTI* LXVKQ ;; VMX const GPR const ;; LVX (VMX) STVX (VMX) (define_insn "*vsx_mov_32bit" diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c new file mode 100644 index 00000000000..2707d86e6fd --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */ + +#include + +/* Test whether XXSPLTIW is generated for V16HI vector constants where the + first 4 elements are the same as the next 4 elements, etc. */ + +vector unsigned char +v16qi_const_1 (void) +{ + return (vector unsigned char) { 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, }; /* VSLTPISB. */ +} + +vector unsigned char +v16qi_const_2 (void) +{ + return (vector unsigned char) { 1, 2, 3, 4, 1, 2, 3, 4, + 1, 2, 3, 4, 1, 2, 3, 4, }; /* XXSPLTIW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvspltisb\M|\mxxspltib\M} 1 } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c new file mode 100644 index 00000000000..05d4ee3f5cb --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c @@ -0,0 +1,67 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */ + +#include + +/* Test whether XXSPLTIW is generated for V4SF vector constants. */ + +vector float +v4sf_const_1 (void) +{ + return (vector float) { 1.0f, 1.0f, 1.0f, 1.0f }; /* XXSPLTIW. */ +} + +vector float +v4sf_const_nan (void) +{ + return (vector float) { __builtin_nanf (""), + __builtin_nanf (""), + __builtin_nanf (""), + __builtin_nanf ("") }; /* XXSPLTIW. */ +} + +vector float +v4sf_const_inf (void) +{ + return (vector float) { __builtin_inff (), + __builtin_inff (), + __builtin_inff (), + __builtin_inff () }; /* XXSPLTIW. */ +} + +vector float +v4sf_const_m0 (void) +{ + return (vector float) { -0.0f, -0.0f, -0.0f, -0.0f }; /* XXSPLTIB/VSLW. */ +} + +vector float +v4sf_splats_1 (void) +{ + return vec_splats (1.0f); /* XXSPLTIW. */ +} + +vector float +v4sf_splats_nan (void) +{ + return vec_splats (__builtin_nanf ("")); /* XXSPLTIW. */ +} + +vector float +v4sf_splats_inf (void) +{ + return vec_splats (__builtin_inff ()); /* XXSPLTIW. */ +} + +vector float +v8hi_splats_m0 (void) +{ + return vec_splats (-0.0f); /* XXSPLTIB/VSLW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvslw\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c new file mode 100644 index 00000000000..da909e948b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */ + +#include + +/* Test whether XXSPLTIW is generated for V4SI vector constants. We make sure + the power9 support (XXSPLTIB/VEXTSB2W) is not done. */ + +vector int +v4si_const_1 (void) +{ + return (vector int) { 1, 1, 1, 1 }; /* VSLTPISW. */ +} + +vector int +v4si_const_126 (void) +{ + return (vector int) { 126, 126, 126, 126 }; /* XXSPLTIW. */ +} + +vector int +v4si_const_1023 (void) +{ + return (vector int) { 1023, 1023, 1023, 1023 }; /* XXSPLTIW. */ +} + +vector int +v4si_splats_1 (void) +{ + return vec_splats (1); /* VSLTPISW. */ +} + +vector int +v4si_splats_126 (void) +{ + return vec_splats (126); /* XXSPLTIW. */ +} + +vector int +v8hi_splats_1023 (void) +{ + return vec_splats (1023); /* XXSPLTIW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mvspltisw\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */ +/* { dg-final { scan-assembler-not {\mvextsb2w\M} } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c new file mode 100644 index 00000000000..290e05d4a64 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c @@ -0,0 +1,62 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */ + +#include + +/* Test whether XXSPLTIW is generated for V8HI vector constants. We make sure + the power9 support (XXSPLTIB/VUPKLSB) is not done. */ + +vector short +v8hi_const_1 (void) +{ + return (vector short) { 1, 1, 1, 1, 1, 1, 1, 1 }; /* VSLTPISH. */ +} + +vector short +v8hi_const_126 (void) +{ + return (vector short) { 126, 126, 126, 126, + 126, 126, 126, 126 }; /* XXSPLTIW. */ +} + +vector short +v8hi_const_1023 (void) +{ + return (vector short) { 1023, 1023, 1023, 1023, + 1023, 1023, 1023, 1023 }; /* XXSPLTIW. */ +} + +vector short +v8hi_splats_1 (void) +{ + return vec_splats ((short)1); /* VSLTPISH. */ +} + +vector short +v8hi_splats_126 (void) +{ + return vec_splats ((short)126); /* XXSPLTIW. */ +} + +vector short +v8hi_splats_1023 (void) +{ + return vec_splats ((short)1023); /* XXSPLTIW. */ +} + +/* Test that we can optimiza V8HI where all of the even elements are the same + and all of the odd elements are the same. */ +vector short +v8hi_const_1023_1000 (void) +{ + return (vector short) { 1023, 1000, 1023, 1000, + 1023, 1000, 1023, 1000 }; /* XXSPLTIW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 5 } } */ +/* { dg-final { scan-assembler-times {\mvspltish\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */ +/* { dg-final { scan-assembler-not {\mvupklsb\M} } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c index 5f84930e1a7..6c01666b625 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c @@ -149,7 +149,7 @@ main (int argc, char *argv []) return 0; } -/* { dg-final { scan-assembler-times {\mxxspltiw\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 3 } } */ /* { dg-final { scan-assembler-times {\mxxspltidp\M} 3 } } */ /* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */