public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Michael Meissner <meissner@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support. Date: Mon, 18 Oct 2021 19:12:12 +0000 (GMT) [thread overview] Message-ID: <20211018191212.F40B83858C27@sourceware.org> (raw) https://gcc.gnu.org/g:86ae6e0d17cb144be310c96dd425383000534e45 commit 86ae6e0d17cb144be310c96dd425383000534e45 Author: Michael Meissner <meissner@linux.ibm.com> Date: Mon Oct 18 15:09:45 2021 -0400 Add LXVKQ support. This patch adds support to generate the LXVKQ instruction to load specific IEEE-128 floating point constants. Compared to the last time I submitted this patch, I modified it so that it uses the bit pattern of the vector to see if it can generate the LXVKQ instruction. This means on a little endian Power<xxx> system, the following code will generate a LXVKQ 34,16 instruction: vector long long foo (void) { return (vector long long) { 0x0000000000000000, 0x8000000000000000 }; } because that vector pattern is the same bit pattern as -0.0F128. 2021-10-18 Michael Meissner <meissner@the-meissners.org> gcc/ * config/rs6000/constraints.md (eQ): New constraint. * config/rs6000/predicates.md (easy_fp_constant): Add support for generating the LXVKQ instruction. (easy_vector_constant_ieee128): New predicate. (easy_vector_constant): Add support for generating the LXVKQ instruction. * config/rs6000/rs6000-protos.h (rs6000_vec_concat): Add fields for generating LXVKQ. * config/rs6000/rs6000.c (output_vec_const_move): Add support for generating LXVKQ. (vec_const_use_lxvkq): New function. * config/rs6000/rs6000.opt (-mlxvkq): New debug option. * config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for generating LXVKQ. (vsx_mov<mode>_32bit): Likewise. * doc/md.texi (PowerPC and IBM RS6000 constraints): Document the eQ constraint. gcc/testsuite/ * gcc.target/powerpc/float128-constant.c: New test. Diff: --- gcc/config/rs6000/constraints.md | 5 + gcc/config/rs6000/predicates.md | 22 +++ gcc/config/rs6000/rs6000-protos.h | 2 + gcc/config/rs6000/rs6000.c | 56 ++++++++ gcc/config/rs6000/rs6000.opt | 4 + gcc/config/rs6000/vsx.md | 28 ++-- gcc/doc/md.texi | 3 + .../gcc.target/powerpc/float128-constant.c | 160 +++++++++++++++++++++ 8 files changed, 266 insertions(+), 14 deletions(-) diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 7d594872a78..f5b524254ab 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -219,6 +219,11 @@ "A constant that can be loaded into a VSX register with one prefixed insn." (match_operand 0 "vsx_prefixed_constant")) +;; 128-bit IEEE 128-bit constant +(define_constraint "eQ" + "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction." + (match_operand 0 "easy_vector_constant_ieee128")) + ;; Floating-point constraints. These two are defined so that insn ;; length attributes can be calculated exactly. diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 40c4cba68ff..c271b379f6d 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -606,6 +606,9 @@ if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const)) { + if (vec_const_use_lxvkq (&vec_const)) + return true; + if (vec_const_use_xxspltidp (&vec_const)) return true; @@ -661,6 +664,22 @@ return false; }) +;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded +;; via the LXVKQ instruction. + +(define_predicate "easy_vector_constant_ieee128" + (match_code "const_vector,const_double") +{ + rs6000_vec_const vec_const; + + /* Can we generate the LXVKQ instruction? */ + if (!TARGET_LXVKQ || !TARGET_FLOAT128_HW || !TARGET_POWER10 || !TARGET_VSX) + return false; + + return (vec_const_to_bytes (op, mode, &vec_const) + && vec_const_use_lxvkq (&vec_const)); +}) + ;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB ;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction. @@ -715,6 +734,9 @@ if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const)) { + if (vec_const_use_lxvkq (&vec_const)) + return true; + if (vec_const_use_xxspltidp (&vec_const)) return true; diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index b12f6b10c13..40a796d4461 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -240,6 +240,7 @@ typedef struct { unsigned char bytes[VECTOR_CONST_BYTES]; unsigned int xxspltidp_immediate; /* Immediate value for XXSPLTIDP. */ + unsigned int lxvkq_immediate; /* Immediate value for LXVKQ. */ bool fp_constant_p; /* Is the constant floating point? */ bool all_double_words_same; /* Are the double words all equal? */ bool all_words_same; /* Are the words all equal? */ @@ -250,6 +251,7 @@ typedef struct { extern bool vec_const_to_bytes (rtx, machine_mode, rs6000_vec_const *); extern bool vec_const_use_xxspltidp (rs6000_vec_const *); extern bool vec_const_use_xxspltiw (rs6000_vec_const *); +extern bool vec_const_use_lxvkq (rs6000_vec_const *); #endif /* RTX_CODE */ #ifdef TREE_CODE diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 20226169ba2..fbcd307177c 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6998,6 +6998,12 @@ output_vec_const_move (rtx *operands) rs6000_vec_const vec_const; if (TARGET_POWER10 && vec_const_to_bytes (vec, mode, &vec_const)) { + if (vec_const_use_lxvkq (&vec_const)) + { + operands[2] = GEN_INT (vec_const.lxvkq_immediate); + return "lxvkq %x0,%2"; + } + if (vec_const_use_xxspltidp (&vec_const)) { operands[2] = GEN_INT (vec_const.xxspltidp_immediate); @@ -28827,6 +28833,56 @@ vec_const_use_xxspltiw (rs6000_vec_const *vec_const) return true; } +/* Determine if a vector constant can be loaded with LXVKQ. If so, fill out + the fields used to generate the instruction. */ + +bool +vec_const_use_lxvkq (rs6000_vec_const *vec_const) +{ + unsigned immediate; + + if (!TARGET_LXVKQ || !TARGET_PREFIXED || !TARGET_VSX) + return false; + + /* Verify that all of the bottom 3 words in the constants loaded by the + LXVKQ instruction are zero. */ + for (size_t i = 1; i < VECTOR_CONST_WORDS; i++) + if (vec_const->words[i] != 0) + return false; + + /* See if we have a match. */ + switch (vec_const->words[0]) + { + case 0x3FFF0000U: immediate = 1; break; /* IEEE 128-bit +1.0. */ + case 0x40000000U: immediate = 2; break; /* IEEE 128-bit +2.0. */ + case 0x40008000U: immediate = 3; break; /* IEEE 128-bit +3.0. */ + case 0x40010000U: immediate = 4; break; /* IEEE 128-bit +4.0. */ + case 0x40014000U: immediate = 5; break; /* IEEE 128-bit +5.0. */ + case 0x40018000U: immediate = 6; break; /* IEEE 128-bit +6.0. */ + case 0x4001C000U: immediate = 7; break; /* IEEE 128-bit +7.0. */ + case 0x7FFF0000U: immediate = 8; break; /* IEEE 128-bit +Infinity. */ + case 0x7FFF8000U: immediate = 9; break; /* IEEE 128-bit quiet NaN. */ + case 0x80000000U: immediate = 16; break; /* IEEE 128-bit -0.0. */ + case 0xBFFF0000U: immediate = 17; break; /* IEEE 128-bit -1.0. */ + case 0xC0000000U: immediate = 18; break; /* IEEE 128-bit -2.0. */ + case 0xC0008000U: immediate = 19; break; /* IEEE 128-bit -3.0. */ + case 0xC0010000U: immediate = 20; break; /* IEEE 128-bit -4.0. */ + case 0xC0014000U: immediate = 21; break; /* IEEE 128-bit -5.0. */ + case 0xC0018000U: immediate = 22; break; /* IEEE 128-bit -6.0. */ + case 0xC001C000U: immediate = 23; break; /* IEEE 128-bit -7.0. */ + case 0xFFFF0000U: immediate = 24; break; /* IEEE 128-bit -Infinity. */ + + /* anything else cannot be loaded. */ + default: + return false; + } + + /* We can use the LXVKQ instruction, record the immediate needed for the + instruction. */ + vec_const->lxvkq_immediate = immediate; + return true; +} + /* Convert a vector constant to an internal structure, breaking it out to bytes, half words, words, and double words. Return true if we have successfully broken it out. */ diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 332f61be0ba..015bf91b6d5 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -648,6 +648,10 @@ mxxspltiw Target Undocumented Var(TARGET_XXSPLTIW) Init(1) Save Generate (do not generate) XXSPLTIW instructions. +mlxvkq +Target Undocumented Var(TARGET_LXVKQ) Init(1) Save +Generate (do not generate) LXVKQ instructions. + -param=rs6000-density-pct-threshold= Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param When costing for loop vectorization, we probably need to penalize the loop body diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 0ceecc1975c..ce8402101ef 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1192,19 +1192,19 @@ ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW -;; XXLSPLTI* +;; XXLSPLTI* LXVKQ ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) (define_insn "vsx_mov<mode>_64bit" [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO, wa, wa, r, we, ?wQ, ?&r, ??r, ??Y, <??r>, wa, v, - wa, + wa, wa, ?wa, v, <??r>, wZ, v") (match_operand:VSX_M 1 "input_operand" "wa, ZwO, wa, we, r, r, wQ, Y, r, r, wE, jwM, - eP, + eP, eQ, ?jwM, W, <nW>, v, wZ"))] "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) @@ -1216,46 +1216,46 @@ [(set_attr "type" "vecstore, vecload, vecsimple, mtvsr, mfvsr, load, store, load, store, *, vecsimple, vecsimple, - vecperm, + vecperm, vecperm, vecsimple, *, *, vecstore, vecload") (set_attr "num_insns" "*, *, *, 2, *, 2, 2, 2, 2, 2, *, *, - *, + *, *, *, 5, 2, *, *") (set_attr "max_prefixed_insns" "*, *, *, *, *, 2, 2, 2, 2, 2, *, *, - *, + *, *, *, *, *, *, *") (set_attr "length" "*, *, *, 8, *, 8, 8, 8, 8, 8, *, *, - *, + *, *, *, 20, 8, *, *") (set_attr "isa" "<VSisa>, <VSisa>, <VSisa>, *, *, *, *, *, *, *, p9v, *, - p10, + p10, p10, <VSisa>, *, *, *, *")]) ;; VSX store VSX load VSX move GPR load GPR store GPR move ;; XXSPLTIB VSPLTISW VSX 0/-1 -;; XXSPLTI* +;; XXSPLTI* LXVKQ ;; VMX const GPR const ;; LVX (VMX) STVX (VMX) (define_insn "*vsx_mov<mode>_32bit" [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO, wa, wa, ??r, ??Y, <??r>, wa, v, ?wa, - wa, + wa, wa, v, <??r>, wZ, v") (match_operand:VSX_M 1 "input_operand" "wa, ZwO, wa, Y, r, r, wE, jwM, ?jwM, - eP, + eP, eQ, W, <nW>, v, wZ"))] @@ -1268,19 +1268,19 @@ [(set_attr "type" "vecstore, vecload, vecsimple, load, store, *, vecsimple, vecsimple, vecsimple, - vecperm, + vecperm, vecperm, *, *, vecstore, vecload") (set_attr "length" "*, *, *, 16, 16, 16, *, *, *, - *, + *, *, 20, 16, *, *") (set_attr "isa" "<VSisa>, <VSisa>, <VSisa>, *, *, *, p9v, *, <VSisa>, - p10, + p10, p10, *, *, *, *")]) diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 13b56279565..cd70e170955 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3340,6 +3340,9 @@ A signed 34-bit integer constant if prefixed instructions are supported. A scalar floating point constant or a vector constant that can be loaded with one prefixed instruction to a VSX register. +@item eQ +A constant that can be loaded with the LXVKQ instruction. + @ifset INTERNALS @item G A floating point constant that can be loaded into a register with one diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c new file mode 100644 index 00000000000..f6becac1075 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c @@ -0,0 +1,160 @@ +/* { dg-require-effective-target ppc_float128_hw } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -mlxvkq -O2" } */ + +/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit + constants. */ + +_Float128 +return_0 (void) +{ + return 0.0f128; /* XXSPLTIB 34,0. */ +} + +_Float128 +return_1 (void) +{ + return 1.0f128; /* LXVKQ 34,1. */ +} + +_Float128 +return_2 (void) +{ + return 2.0f128; /* LXVKQ 34,2. */ +} + +_Float128 +return_3 (void) +{ + return 3.0f128; /* LXVKQ 34,3. */ +} + +_Float128 +return_4 (void) +{ + return 4.0f128; /* LXVKQ 34,4. */ +} + +_Float128 +return_5 (void) +{ + return 5.0f128; /* LXVKQ 34,5. */ +} + +_Float128 +return_6 (void) +{ + return 6.0f128; /* LXVKQ 34,6. */ +} + +_Float128 +return_7 (void) +{ + return 7.0f128; /* LXVKQ 34,7. */ +} + +_Float128 +return_m0 (void) +{ + return -0.0f128; /* LXVKQ 34,16. */ +} + +_Float128 +return_m1 (void) +{ + return -1.0f128; /* LXVKQ 34,17. */ +} + +_Float128 +return_m2 (void) +{ + return -2.0f128; /* LXVKQ 34,18. */ +} + +_Float128 +return_m3 (void) +{ + return -3.0f128; /* LXVKQ 34,19. */ +} + +_Float128 +return_m4 (void) +{ + return -4.0f128; /* LXVKQ 34,20. */ +} + +_Float128 +return_m5 (void) +{ + return -5.0f128; /* LXVKQ 34,21. */ +} + +_Float128 +return_m6 (void) +{ + return -6.0f128; /* LXVKQ 34,22. */ +} + +_Float128 +return_m7 (void) +{ + return -7.0f128; /* LXVKQ 34,23. */ +} + +_Float128 +return_inf (void) +{ + return __builtin_inff128 (); /* LXVKQ 34,8. */ +} + +_Float128 +return_minf (void) +{ + return - __builtin_inff128 (); /* LXVKQ 34,24. */ +} + +_Float128 +return_nan (void) +{ + return __builtin_nanf128 (""); /* LXVKQ 34,9. */ +} + +/* Note, the following NaNs should not generate a LXVKQ instruction. */ +_Float128 +return_mnan (void) +{ + return - __builtin_nanf128 (""); /* PLXV 34,... */ +} + +_Float128 +return_nan2 (void) +{ + return __builtin_nanf128 ("1"); /* PLXV 34,... */ +} + +_Float128 +return_nans (void) +{ + return __builtin_nansf128 (""); /* PLXV 34,... */ +} + +vector long long +return_longlong_neg_0 (void) +{ + /* This vector is the same pattern as -0.0F128. */ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define FIRST 0x8000000000000000 +#define SECOND 0x0000000000000000 + +#else +#define FIRST 0x0000000000000000 +#define SECOND 0x8000000000000000 +#endif + + return (vector long long) { FIRST, SECOND }; /* LXVKQ 34,16. */ +} + +/* { dg-final { scan-assembler-times {\mlxvkq\M} 19 } } */ +/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */ +
next reply other threads:[~2021-10-18 19:12 UTC|newest] Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-10-18 19:12 Michael Meissner [this message] -- strict thread matches above, loose matches on Subject: below -- 2021-10-21 2:54 Michael Meissner 2021-10-21 2:39 Michael Meissner 2021-10-21 2:20 Michael Meissner 2021-10-15 3:39 Michael Meissner 2021-10-14 16:51 Michael Meissner 2021-10-14 16:50 Michael Meissner 2021-10-14 16:46 Michael Meissner 2021-10-14 15:32 Michael Meissner 2021-10-14 3:39 Michael Meissner 2021-10-14 1:56 Michael Meissner
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20211018191212.F40B83858C27@sourceware.org \ --to=meissner@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).