From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 6A17C3851C35; Tue, 25 May 2021 05:20:37 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 6A17C3851C35 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work053)] Generate LXVKQ on power10. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work053 X-Git-Oldrev: d3d18c4236b7a5e3fe1b1ceb70504e29247ddcf5 X-Git-Newrev: 94a1b09f2658272d64f3bd617b6b031dbcdaed22 Message-Id: <20210525052037.6A17C3851C35@sourceware.org> Date: Tue, 25 May 2021 05:20:37 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 25 May 2021 05:20:37 -0000 https://gcc.gnu.org/g:94a1b09f2658272d64f3bd617b6b031dbcdaed22 commit 94a1b09f2658272d64f3bd617b6b031dbcdaed22 Author: Michael Meissner Date: Tue May 25 01:20:19 2021 -0400 Generate LXVKQ on power10. This patch generates the LXVKQ instruction to load certain IEEE 128-bit constants. gcc/ 2021-05-25 Michael Meissner * config/rs6000/constraint.md (eQ): New constraint. * config/rs6000/predicates.md (easy_fp_constant): If the constant can be loaded with LXVKQ, it is easy. (lxvkq_operand): New predicate. * config/rs6000/rs6000-protos.h (lxvkq_constant_p): New declaration. * config/rs6000/rs6000-cpus.h (ISA_3_1_MASKS_SERVER): Add -mlxvkq. (POWERPC_MASKS): Add -mlxvkq. * config/rs6000/rs6000.c (rs6000_option_override_internal): Add support for -mlxvkq. (lxvkq_constant_p): New function. (rs6000_output_move_128bit): Add support for generating lxvkq. (rs6000_opt_masks): Add -mlxvkq. * config/rs6000/rs6000.opt (-mlxvkq): New option. * config/rs6000/vsx.md (vsx_mov_64bit): Add support to generate lxvkq. (vsx_mov_32bit): Add support to generate lxvkq. gcc/testsuite/ 2021-05-20 Michael Meissner * gcc.target/powerpc/float128-lxvkq.c: New test. Diff: --- gcc/config/rs6000/constraints.md | 5 + gcc/config/rs6000/predicates.md | 14 +++ gcc/config/rs6000/rs6000-cpus.def | 2 + gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.c | 96 ++++++++++++++- gcc/config/rs6000/rs6000.opt | 4 + gcc/config/rs6000/vsx.md | 28 ++--- gcc/testsuite/gcc.target/powerpc/float128-lxvkq.c | 144 ++++++++++++++++++++++ 8 files changed, 279 insertions(+), 15 deletions(-) diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index d665e2a94db..d14ce98e9ac 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -224,6 +224,11 @@ "A signed 34-bit integer constant if prefixed instructions are supported." (match_operand 0 "cint34_operand")) +;; KF/TF scalar than can be loaded with XVKQ +(define_constraint "eQ" + "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction." + (match_operand 0 "lxvkq_operand")) + ;; Floating-point constraints. These two are defined so that insn ;; length attributes can be calculated exactly. diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index fc30b69018d..0c17db42962 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -611,6 +611,11 @@ if (xxsplti32dx_operand (op, mode)) return 1; + /* If we have the ISA 3.1 LXVKQ instruction, see if the constant can be loaded + with that instruction. */ + if (lxvkq_operand (op, mode)) + return 1; + /* Otherwise consider floating point constants hard, so that the constant gets pushed to memory during the early RTL phases. This has the advantage that double precision constants that can be @@ -699,6 +704,15 @@ return xxsplti32dx_constant_p (op, mode, &high, &low); }) +;; Return 1 if the operand is an IEEE 128-bit special constant that can be +;; loaded with the LXVKQ instruction. +(define_predicate "lxvkq_operand" + (match_code "const_double") +{ + int immediate = 0; + return lxvkq_constant_p (op, mode, &immediate); +}) + ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a ;; vector register without using memory. (define_predicate "easy_vector_constant" diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index b0821b34a69..665e678c39c 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -83,6 +83,7 @@ #define ISA_3_1_MASKS_SERVER (ISA_3_0_MASKS_SERVER \ | OPTION_MASK_POWER10 \ | OTHER_POWER10_MASKS \ + | OPTION_MASK_LXVKQ \ | OPTION_MASK_P10_FUSION \ | OPTION_MASK_P10_FUSION_LD_CMPI \ | OPTION_MASK_P10_FUSION_2LOGICAL \ @@ -140,6 +141,7 @@ | OPTION_MASK_P10_FUSION_2LOGICAL \ | OPTION_MASK_HTM \ | OPTION_MASK_ISEL \ + | OPTION_MASK_LXVKQ \ | OPTION_MASK_MFCRF \ | OPTION_MASK_MMA \ | OPTION_MASK_MODULO \ diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index ce1a2fd1473..d71aef11bed 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -35,6 +35,7 @@ extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *); extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *, HOST_WIDE_INT *); +extern bool lxvkq_constant_p (rtx, machine_mode, int *); extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); extern bool macho_lo_sum_memory_operand (rtx, machine_mode); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 7aca290918e..a0586804625 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -4497,9 +4497,13 @@ rs6000_option_override_internal (bool global_init_p) if ((rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTIDP) == 0) rs6000_isa_flags |= OPTION_MASK_XXSPLTIDP; + + if ((rs6000_isa_flags_explicit & OPTION_MASK_LXVKQ) == 0) + rs6000_isa_flags |= OPTION_MASK_LXVKQ; } else - rs6000_isa_flags &= ~(OPTION_MASK_XXSPLTIW + rs6000_isa_flags &= ~(OPTION_MASK_LXVKQ + | OPTION_MASK_XXSPLTIW | OPTION_MASK_XXSPLTIDP | OPTION_MASK_XXSPLTI32DX); @@ -6704,6 +6708,86 @@ xxsplti32dx_constant_p (rtx op, return false; } +/* Return true if OP is of the given MODE is one of the 18 special values that + can be generated with the LXVKQ instruction. + + Return the constant that will go in the LXVKQ instruction. + + The LXVKQ immediates are: + 1 - 7: 1.0 .. 7.0. + 8: Positive infinity. + 9: Default quiet NaN. + 16: -0.0. + 17 - 23: -1.0 .. 7.0. + 24: Negative infinity. */ + +bool +lxvkq_constant_p (rtx op, + machine_mode mode, + int *imm_p) +{ + *imm_p = -1; + + if (!TARGET_LXVKQ) + return false; + + if (mode == VOIDmode) + mode = GET_MODE (op); + + if (!FLOAT128_IEEE_P (mode)) + return false; + + if (!CONST_DOUBLE_P (op)) + return false; + + /* All of the values generated can be expressed as SFmode values, so if it + doesn't fit in SFmode, exit. */ + const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op); + if (!exact_real_truncate (SFmode, rv)) + return 0; + + /* +/- Inifinity is 8/24. */ + if (REAL_VALUE_ISINF (*rv)) + { + *imm_p = real_isneg (rv) ? 24 : 8; + return true; + } + + /* NaN is 9. */ + if (REAL_VALUE_ISNAN (*rv) && !REAL_VALUE_NEGATIVE (*rv)) + { + *imm_p = 9; + return true; + } + + /* -0.0 is 16. */ + if (REAL_VALUE_MINUS_ZERO (*rv)) + { + *imm_p = 16; + return true; + } + + /* The other values are all integers 1..7, and -1..-7. */ + if (!real_isinteger (rv, mode)) + return false; + + HOST_WIDE_INT value = real_to_integer (rv); + if (value >= 1 && value <= 7) + { + *imm_p = value; + return true; + } + else if (value >= -7 && value <= -1) + { + /* Subtraction is used because value is negative. */ + *imm_p = 16 - value; + return true; + } + + /* We can't load the value with LXVKQ. */ + return false; +} + const char * output_vec_const_move (rtx *operands) { @@ -13476,6 +13560,7 @@ rs6000_output_move_128bit (rtx operands[]) int src_regno; bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p; bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p; + int lxvkq_immediate = 0; if (REG_P (dest)) { @@ -13620,6 +13705,14 @@ rs6000_output_move_128bit (rtx operands[]) } /* Constants. */ + else if (dest_vmx_p + && CONST_DOUBLE_P (src) + && lxvkq_constant_p (src, mode, &lxvkq_immediate)) + { + operands[2] = GEN_INT (lxvkq_immediate); + return "lxvkq %x0,%2"; + } + else if (dest_regno >= 0 && (CONST_INT_P (src) || CONST_WIDE_INT_P (src) @@ -24319,6 +24412,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "hard-dfp", OPTION_MASK_DFP, false, true }, { "htm", OPTION_MASK_HTM, false, true }, { "isel", OPTION_MASK_ISEL, false, true }, + { "lxvkq", OPTION_MASK_LXVKQ, false, true }, { "mfcrf", OPTION_MASK_MFCRF, false, true }, { "mfpgpr", 0, false, true }, { "mma", OPTION_MASK_MMA, false, true }, diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 352d4a72ae4..5bf96209b83 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -639,3 +639,7 @@ Generate (do not generate) XXSPLTIDP instructions. mxxsplti32dx Target Undocumented Mask(XXSPLTI32DX) Var(rs6000_isa_flags) Generate (do not generate) XXSPLTI32DX instructions. + +mlxvkq +Target Undocumented Mask(LXVKQ) Var(rs6000_isa_flags) +Generate (do not generate) LXVKQ instructions. diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index e84feffa8d3..bc708113865 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1189,17 +1189,17 @@ ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW -;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) +;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) LXVKQ (define_insn "vsx_mov_64bit" [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO, wa, wa, r, we, ?wQ, ?&r, ??r, ??Y, , wa, v, - ?wa, v, , wZ, v") + ?wa, v, , wZ, v, wa") (match_operand:VSX_M 1 "input_operand" "wa, ZwO, wa, we, r, r, wQ, Y, r, r, wE, jwM, - ?jwM, W, , v, wZ"))] + ?jwM, W, , v, wZ, eQ"))] "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (mode) && (register_operand (operands[0], mode) @@ -1210,37 +1210,37 @@ [(set_attr "type" "vecstore, vecload, vecsimple, mtvsr, mfvsr, load, store, load, store, *, vecsimple, vecsimple, - vecsimple, *, *, vecstore, vecload") + vecsimple, *, *, vecstore, vecload, vecsimple") (set_attr "num_insns" "*, *, *, 2, *, 2, 2, 2, 2, 2, *, *, - *, 5, 2, *, *") + *, 5, 2, *, *, *") (set_attr "max_prefixed_insns" "*, *, *, *, *, 2, 2, 2, 2, 2, *, *, - *, *, *, *, *") + *, *, *, *, *, *") (set_attr "length" "*, *, *, 8, *, 8, 8, 8, 8, 8, *, *, - *, 20, 8, *, *") + *, 20, 8, *, *, *") (set_attr "isa" ", , , *, *, *, *, *, *, *, p9v, *, - , *, *, *, *")]) + , *, *, *, *, p10")]) ;; VSX store VSX load VSX move GPR load GPR store GPR move ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const -;; LVX (VMX) STVX (VMX) +;; LVX (VMX) STVX (VMX) LXVKQ (define_insn "*vsx_mov_32bit" [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO, wa, wa, ??r, ??Y, , wa, v, ?wa, v, , - wZ, v") + wZ, v, wa") (match_operand:VSX_M 1 "input_operand" "wa, ZwO, wa, Y, r, r, wE, jwM, ?jwM, W, , - v, wZ"))] + v, wZ, eQ"))] "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (mode) && (register_operand (operands[0], mode) @@ -1251,15 +1251,15 @@ [(set_attr "type" "vecstore, vecload, vecsimple, load, store, *, vecsimple, vecsimple, vecsimple, *, *, - vecstore, vecload") + vecstore, vecload, vecsimple") (set_attr "length" "*, *, *, 16, 16, 16, *, *, *, 20, 16, - *, *") + *, *, *") (set_attr "isa" ", , , *, *, *, p9v, *, , *, *, - *, *")]) + *, *, p10")]) ;; Explicit load/store expanders for the builtin functions (define_expand "vsx_load_" diff --git a/gcc/testsuite/gcc.target/powerpc/float128-lxvkq.c b/gcc/testsuite/gcc.target/powerpc/float128-lxvkq.c new file mode 100644 index 00000000000..a5cbe0b477f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/float128-lxvkq.c @@ -0,0 +1,144 @@ +/* { dg-require-effective-target ppc_float128_hw } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit + constants. */ + +_Float128 +return_0 (void) +{ + return 0.0f128; /* XXSPLTIB 34,0. */ +} + +_Float128 +return_1 (void) +{ + return 1.0f128; /* LXVKQ 34,1. */ +} + +_Float128 +return_2 (void) +{ + return 2.0f128; /* LXVKQ 34,2. */ +} + +_Float128 +return_3 (void) +{ + return 3.0f128; /* LXVKQ 34,3. */ +} + +_Float128 +return_4 (void) +{ + return 4.0f128; /* LXVKQ 34,4. */ +} + +_Float128 +return_5 (void) +{ + return 5.0f128; /* LXVKQ 34,5. */ +} + +_Float128 +return_6 (void) +{ + return 6.0f128; /* LXVKQ 34,6. */ +} + +_Float128 +return_7 (void) +{ + return 7.0f128; /* LXVKQ 34,7. */ +} + +_Float128 +return_m0 (void) +{ + return -0.0f128; /* LXVKQ 34,16. */ +} + +_Float128 +return_m1 (void) +{ + return -1.0f128; /* LXVKQ 34,17. */ +} + +_Float128 +return_m2 (void) +{ + return -2.0f128; /* LXVKQ 34,18. */ +} + +_Float128 +return_m3 (void) +{ + return -3.0f128; /* LXVKQ 34,19. */ +} + +_Float128 +return_m4 (void) +{ + return -4.0f128; /* LXVKQ 34,20. */ +} + +_Float128 +return_m5 (void) +{ + return -5.0f128; /* LXVKQ 34,21. */ +} + +_Float128 +return_m6 (void) +{ + return -6.0f128; /* LXVKQ 34,22. */ +} + +_Float128 +return_m7 (void) +{ + return -7.0f128; /* LXVKQ 34,23. */ +} + +_Float128 +return_inf (void) +{ + return __builtin_inff128 (); /* LXVKQ 34,8. */ +} + +_Float128 +return_minf (void) +{ + return - __builtin_inff128 (); /* LXVKQ 34,24. */ +} + +_Float128 +return_nan (void) +{ + return __builtin_nanf128 (""); /* LXVKQ 34,9. */ +} + +/* Note, the following NaNs should not generate a LXVKQ instruction. */ +_Float128 +return_mnan (void) +{ + return - __builtin_nanf128 (""); /* PLXV 34,... */ +} + +_Float128 +return_nan2 (void) +{ + return __builtin_nanf128 ("1"); /* PLXV 34,... */ +} + +_Float128 +return_nans (void) +{ + return __builtin_nansf128 (""); /* PLXV 34,... */ +} + +/* { dg-final { scan-assembler-times {\mlxvkq\M} 18 } } */ +/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */ +