public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.
@ 2021-10-18 19:12 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2021-10-18 19:12 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:86ae6e0d17cb144be310c96dd425383000534e45
commit 86ae6e0d17cb144be310c96dd425383000534e45
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Mon Oct 18 15:09:45 2021 -0400
Add LXVKQ support.
This patch adds support to generate the LXVKQ instruction to load specific
IEEE-128 floating point constants.
Compared to the last time I submitted this patch, I modified it so that it
uses the bit pattern of the vector to see if it can generate the LXVKQ
instruction. This means on a little endian Power<xxx> system, the
following code will generate a LXVKQ 34,16 instruction:
vector long long foo (void)
{
return (vector long long) { 0x0000000000000000, 0x8000000000000000 };
}
because that vector pattern is the same bit pattern as -0.0F128.
2021-10-18 Michael Meissner <meissner@the-meissners.org>
gcc/
* config/rs6000/constraints.md (eQ): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating the LXVKQ instruction.
(easy_vector_constant_ieee128): New predicate.
(easy_vector_constant): Add support for generating the LXVKQ
instruction.
* config/rs6000/rs6000-protos.h (rs6000_vec_concat): Add fields
for generating LXVKQ.
* config/rs6000/rs6000.c (output_vec_const_move): Add support for
generating LXVKQ.
(vec_const_use_lxvkq): New function.
* config/rs6000/rs6000.opt (-mlxvkq): New debug option.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
generating LXVKQ.
(vsx_mov<mode>_32bit): Likewise.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
eQ constraint.
gcc/testsuite/
* gcc.target/powerpc/float128-constant.c: New test.
Diff:
---
gcc/config/rs6000/constraints.md | 5 +
gcc/config/rs6000/predicates.md | 22 +++
gcc/config/rs6000/rs6000-protos.h | 2 +
gcc/config/rs6000/rs6000.c | 56 ++++++++
gcc/config/rs6000/rs6000.opt | 4 +
gcc/config/rs6000/vsx.md | 28 ++--
gcc/doc/md.texi | 3 +
.../gcc.target/powerpc/float128-constant.c | 160 +++++++++++++++++++++
8 files changed, 266 insertions(+), 14 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 7d594872a78..f5b524254ab 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -219,6 +219,11 @@
"A constant that can be loaded into a VSX register with one prefixed insn."
(match_operand 0 "vsx_prefixed_constant"))
+;; 128-bit IEEE 128-bit constant
+(define_constraint "eQ"
+ "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction."
+ (match_operand 0 "easy_vector_constant_ieee128"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 40c4cba68ff..c271b379f6d 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,9 @@
if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
+
if (vec_const_use_xxspltidp (&vec_const))
return true;
@@ -661,6 +664,22 @@
return false;
})
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+ (match_code "const_vector,const_double")
+{
+ rs6000_vec_const vec_const;
+
+ /* Can we generate the LXVKQ instruction? */
+ if (!TARGET_LXVKQ || !TARGET_FLOAT128_HW || !TARGET_POWER10 || !TARGET_VSX)
+ return false;
+
+ return (vec_const_to_bytes (op, mode, &vec_const)
+ && vec_const_use_lxvkq (&vec_const));
+})
+
;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
@@ -715,6 +734,9 @@
if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
+
if (vec_const_use_xxspltidp (&vec_const))
return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index b12f6b10c13..40a796d4461 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -240,6 +240,7 @@ typedef struct {
unsigned char bytes[VECTOR_CONST_BYTES];
unsigned int xxspltidp_immediate; /* Immediate value for XXSPLTIDP. */
+ unsigned int lxvkq_immediate; /* Immediate value for LXVKQ. */
bool fp_constant_p; /* Is the constant floating point? */
bool all_double_words_same; /* Are the double words all equal? */
bool all_words_same; /* Are the words all equal? */
@@ -250,6 +251,7 @@ typedef struct {
extern bool vec_const_to_bytes (rtx, machine_mode, rs6000_vec_const *);
extern bool vec_const_use_xxspltidp (rs6000_vec_const *);
extern bool vec_const_use_xxspltiw (rs6000_vec_const *);
+extern bool vec_const_use_lxvkq (rs6000_vec_const *);
#endif /* RTX_CODE */
#ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 20226169ba2..fbcd307177c 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6998,6 +6998,12 @@ output_vec_const_move (rtx *operands)
rs6000_vec_const vec_const;
if (TARGET_POWER10 && vec_const_to_bytes (vec, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ {
+ operands[2] = GEN_INT (vec_const.lxvkq_immediate);
+ return "lxvkq %x0,%2";
+ }
+
if (vec_const_use_xxspltidp (&vec_const))
{
operands[2] = GEN_INT (vec_const.xxspltidp_immediate);
@@ -28827,6 +28833,56 @@ vec_const_use_xxspltiw (rs6000_vec_const *vec_const)
return true;
}
+/* Determine if a vector constant can be loaded with LXVKQ. If so, fill out
+ the fields used to generate the instruction. */
+
+bool
+vec_const_use_lxvkq (rs6000_vec_const *vec_const)
+{
+ unsigned immediate;
+
+ if (!TARGET_LXVKQ || !TARGET_PREFIXED || !TARGET_VSX)
+ return false;
+
+ /* Verify that all of the bottom 3 words in the constants loaded by the
+ LXVKQ instruction are zero. */
+ for (size_t i = 1; i < VECTOR_CONST_WORDS; i++)
+ if (vec_const->words[i] != 0)
+ return false;
+
+ /* See if we have a match. */
+ switch (vec_const->words[0])
+ {
+ case 0x3FFF0000U: immediate = 1; break; /* IEEE 128-bit +1.0. */
+ case 0x40000000U: immediate = 2; break; /* IEEE 128-bit +2.0. */
+ case 0x40008000U: immediate = 3; break; /* IEEE 128-bit +3.0. */
+ case 0x40010000U: immediate = 4; break; /* IEEE 128-bit +4.0. */
+ case 0x40014000U: immediate = 5; break; /* IEEE 128-bit +5.0. */
+ case 0x40018000U: immediate = 6; break; /* IEEE 128-bit +6.0. */
+ case 0x4001C000U: immediate = 7; break; /* IEEE 128-bit +7.0. */
+ case 0x7FFF0000U: immediate = 8; break; /* IEEE 128-bit +Infinity. */
+ case 0x7FFF8000U: immediate = 9; break; /* IEEE 128-bit quiet NaN. */
+ case 0x80000000U: immediate = 16; break; /* IEEE 128-bit -0.0. */
+ case 0xBFFF0000U: immediate = 17; break; /* IEEE 128-bit -1.0. */
+ case 0xC0000000U: immediate = 18; break; /* IEEE 128-bit -2.0. */
+ case 0xC0008000U: immediate = 19; break; /* IEEE 128-bit -3.0. */
+ case 0xC0010000U: immediate = 20; break; /* IEEE 128-bit -4.0. */
+ case 0xC0014000U: immediate = 21; break; /* IEEE 128-bit -5.0. */
+ case 0xC0018000U: immediate = 22; break; /* IEEE 128-bit -6.0. */
+ case 0xC001C000U: immediate = 23; break; /* IEEE 128-bit -7.0. */
+ case 0xFFFF0000U: immediate = 24; break; /* IEEE 128-bit -Infinity. */
+
+ /* anything else cannot be loaded. */
+ default:
+ return false;
+ }
+
+ /* We can use the LXVKQ instruction, record the immediate needed for the
+ instruction. */
+ vec_const->lxvkq_immediate = immediate;
+ return true;
+}
+
/* Convert a vector constant to an internal structure, breaking it out to
bytes, half words, words, and double words. Return true if we have
successfully broken it out. */
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 332f61be0ba..015bf91b6d5 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -648,6 +648,10 @@ mxxspltiw
Target Undocumented Var(TARGET_XXSPLTIW) Init(1) Save
Generate (do not generate) XXSPLTIW instructions.
+mlxvkq
+Target Undocumented Var(TARGET_LXVKQ) Init(1) Save
+Generate (do not generate) LXVKQ instructions.
+
-param=rs6000-density-pct-threshold=
Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0ceecc1975c..ce8402101ef 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1192,19 +1192,19 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
-;; XXLSPLTI*
+;; XXLSPLTI* LXVKQ
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- wa,
+ wa, wa,
?wa, v, <??r>, wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- eP,
+ eP, eQ,
?jwM, W, <nW>, v, wZ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1216,46 +1216,46 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
vecsimple, *, *, vecstore, vecload")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, 5, 2, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *,
+ *, *,
*, 20, 8, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- p10,
+ p10, p10,
<VSisa>, *, *, *, *")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
;; XXSPLTIB VSPLTISW VSX 0/-1
-;; XXSPLTI*
+;; XXSPLTI* LXVKQ
;; VMX const GPR const
;; LVX (VMX) STVX (VMX)
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
wa, v, ?wa,
- wa,
+ wa, wa,
v, <??r>,
wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
wE, jwM, ?jwM,
- eP,
+ eP, eQ,
W, <nW>,
v, wZ"))]
@@ -1268,19 +1268,19 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
vecsimple, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
*, *,
vecstore, vecload")
(set_attr "length"
"*, *, *, 16, 16, 16,
*, *, *,
- *,
+ *, *,
20, 16,
*, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
p9v, *, <VSisa>,
- p10,
+ p10, p10,
*, *,
*, *")])
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 13b56279565..cd70e170955 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3340,6 +3340,9 @@ A signed 34-bit integer constant if prefixed instructions are supported.
A scalar floating point constant or a vector constant that can be
loaded with one prefixed instruction to a VSX register.
+@item eQ
+A constant that can be loaded with the LXVKQ instruction.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..f6becac1075
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,160 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -mlxvkq -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+ constants. */
+
+_Float128
+return_0 (void)
+{
+ return 0.0f128; /* XXSPLTIB 34,0. */
+}
+
+_Float128
+return_1 (void)
+{
+ return 1.0f128; /* LXVKQ 34,1. */
+}
+
+_Float128
+return_2 (void)
+{
+ return 2.0f128; /* LXVKQ 34,2. */
+}
+
+_Float128
+return_3 (void)
+{
+ return 3.0f128; /* LXVKQ 34,3. */
+}
+
+_Float128
+return_4 (void)
+{
+ return 4.0f128; /* LXVKQ 34,4. */
+}
+
+_Float128
+return_5 (void)
+{
+ return 5.0f128; /* LXVKQ 34,5. */
+}
+
+_Float128
+return_6 (void)
+{
+ return 6.0f128; /* LXVKQ 34,6. */
+}
+
+_Float128
+return_7 (void)
+{
+ return 7.0f128; /* LXVKQ 34,7. */
+}
+
+_Float128
+return_m0 (void)
+{
+ return -0.0f128; /* LXVKQ 34,16. */
+}
+
+_Float128
+return_m1 (void)
+{
+ return -1.0f128; /* LXVKQ 34,17. */
+}
+
+_Float128
+return_m2 (void)
+{
+ return -2.0f128; /* LXVKQ 34,18. */
+}
+
+_Float128
+return_m3 (void)
+{
+ return -3.0f128; /* LXVKQ 34,19. */
+}
+
+_Float128
+return_m4 (void)
+{
+ return -4.0f128; /* LXVKQ 34,20. */
+}
+
+_Float128
+return_m5 (void)
+{
+ return -5.0f128; /* LXVKQ 34,21. */
+}
+
+_Float128
+return_m6 (void)
+{
+ return -6.0f128; /* LXVKQ 34,22. */
+}
+
+_Float128
+return_m7 (void)
+{
+ return -7.0f128; /* LXVKQ 34,23. */
+}
+
+_Float128
+return_inf (void)
+{
+ return __builtin_inff128 (); /* LXVKQ 34,8. */
+}
+
+_Float128
+return_minf (void)
+{
+ return - __builtin_inff128 (); /* LXVKQ 34,24. */
+}
+
+_Float128
+return_nan (void)
+{
+ return __builtin_nanf128 (""); /* LXVKQ 34,9. */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction. */
+_Float128
+return_mnan (void)
+{
+ return - __builtin_nanf128 (""); /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+ return __builtin_nanf128 ("1"); /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+ return __builtin_nansf128 (""); /* PLXV 34,... */
+}
+
+vector long long
+return_longlong_neg_0 (void)
+{
+ /* This vector is the same pattern as -0.0F128. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define FIRST 0x8000000000000000
+#define SECOND 0x0000000000000000
+
+#else
+#define FIRST 0x0000000000000000
+#define SECOND 0x8000000000000000
+#endif
+
+ return (vector long long) { FIRST, SECOND }; /* LXVKQ 34,16. */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M} 19 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */
+
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.
@ 2021-10-21 2:54 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2021-10-21 2:54 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:b93d8d49bdd0317f0232aaad55cb1e311ce9dacb
commit b93d8d49bdd0317f0232aaad55cb1e311ce9dacb
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Wed Oct 20 22:54:02 2021 -0400
Add LXVKQ support.
This patch adds support to generate the LXVKQ instruction to load specific
IEEE-128 floating point constants.
Compared to the last time I submitted this patch, I modified it so that it
uses the bit pattern of the vector to see if it can generate the LXVKQ
instruction. This means on a little endian Power<xxx> system, the
following code will generate a LXVKQ 34,16 instruction:
vector long long foo (void)
{
return (vector long long) { 0x0000000000000000, 0x8000000000000000 };
}
because that vector pattern is the same bit pattern as -0.0F128.
2021-10-20 Michael Meissner <meissner@the-meissners.org>
gcc/
* config/rs6000/constraints.md (eQ): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating the LXVKQ instruction.
(easy_vector_constant_ieee128): New predicate.
(easy_vector_constant): Add support for generating the LXVKQ
instruction.
* config/rs6000/rs6000-protos.h (constant_generates_lxvkq): New
declaration.
* config/rs6000/rs6000.c (output_vec_const_move): Add support for
generating LXVKQ.
(constant_generates_lxvkq): New function.
* config/rs6000/rs6000.opt (-mieee128-constant): New debug
option.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
generating LXVKQ.
(vsx_mov<mode>_32bit): Likewise.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
eQ constraint.
gcc/testsuite/
* gcc.target/powerpc/float128-constant.c: New test.
Diff:
---
gcc/config/rs6000/constraints.md | 6 +
gcc/config/rs6000/predicates.md | 23 +++
gcc/config/rs6000/rs6000-protos.h | 1 +
gcc/config/rs6000/rs6000.c | 64 ++++++++-
gcc/config/rs6000/rs6000.opt | 4 +
gcc/config/rs6000/vsx.md | 28 ++--
gcc/doc/md.texi | 4 +
.../gcc.target/powerpc/float128-constant.c | 160 +++++++++++++++++++++
8 files changed, 275 insertions(+), 15 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 0f0513f2171..a4b05837fa6 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -219,6 +219,12 @@
"A constant that can be loaded into a VSX register with one prefixed insn."
(match_operand 0 "vsx_prefixed_constant"))
+;; A TF/KF scalar constant or a vector constant that can load certain IEEE
+;; 128-bit constants into vector registers using LXVKQ.
+(define_constraint "eQ"
+ "An IEEE 128-bit constant that can be loaded into VSX registers."
+ (match_operand 0 "easy_vector_constant_ieee128"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4b07850eb64..46ea61d64ac 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,9 @@
if (TARGET_POWER10
&& constant_to_bytes (op, mode, &vsx_const, RS6000_CONST_SPLAT_16_BYTES))
{
+ if (constant_generates_lxvkq (&vsx_const))
+ return true;
+
if (constant_generates_xxspltidp (&vsx_const))
return true;
@@ -660,6 +663,23 @@
return false;
})
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+ (match_code "const_vector,const_double")
+{
+ rs6000_const vsx_const;
+
+ /* Can we generate the LXVKQ instruction? */
+ if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+ || !TARGET_VSX)
+ return false;
+
+ return (constant_to_bytes (op, mode, &vsx_const, RS6000_CONST_NO_SPLAT)
+ && constant_generates_lxvkq (&vsx_const));
+})
+
;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
@@ -710,6 +730,9 @@
if (TARGET_POWER10
&& constant_to_bytes (op, mode, &vsx_const, RS6000_CONST_NO_SPLAT))
{
+ if (constant_generates_lxvkq (&vsx_const))
+ return true;
+
if (constant_generates_xxspltidp (&vsx_const))
return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 0b93bc3cc0e..20cb092e159 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -260,6 +260,7 @@ extern bool constant_to_bytes (rtx, machine_mode, rs6000_const *,
rs6000_const_splat);
extern unsigned constant_generates_xxspltidp (rs6000_const *);
extern unsigned constant_generates_xxspltiw (rs6000_const *);
+extern unsigned constant_generates_lxvkq (rs6000_const *);
#endif /* RTX_CODE */
#ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 59b338085b1..22f5d701908 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -7001,7 +7001,14 @@ output_vec_const_move (rtx *operands)
if (constant_to_bytes (vec, mode, &vsx_const,
RS6000_CONST_SPLAT_16_BYTES))
{
- unsigned imm = constant_generates_xxspltidp (&vsx_const);
+ unsigned imm = constant_generates_lxvkq (&vsx_const);
+ if (imm)
+ {
+ operands[2] = GEN_INT (imm);
+ return "lxvkq %x0,%2";
+ }
+
+ imm = constant_generates_xxspltidp (&vsx_const);
if (imm)
{
operands[2] = GEN_INT (imm);
@@ -29059,6 +29066,61 @@ constant_generates_xxspltiw (rs6000_const *vsx_const)
return vsx_const->words[0];
}
+/* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
+ if the LXVKQ instruction cannot be used. Otherwise return the immediate
+ value to be used with the LXVKQ instruction. */
+
+unsigned
+constant_generates_lxvkq (rs6000_const *vsx_const)
+{
+ /* Is the instruction supported with power10 code generation, IEEE 128-bit
+ floating point hardware and VSX registers are available. */
+ if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+ || !TARGET_VSX)
+ return 0;
+
+ /* Only recognize LXVKQ for 16-byte (4 word) vector constants. */
+ unsigned total_size = vsx_const->total_size;
+ if (total_size != 16)
+ return 0;
+
+ /* Verify that all of the bottom 3 words in the constants loaded by the
+ LXVKQ instruction are zero. */
+ if (vsx_const->words[1] != 0
+ || vsx_const->words[2] != 0
+ || vsx_const->words[3] != 0)
+ return 0;
+
+ /* See if we have a match. */
+ switch (vsx_const->words[0])
+ {
+ case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
+ case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
+ case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
+ case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
+ case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
+ case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
+ case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
+ case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
+ case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
+ case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
+ case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
+ case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
+ case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
+ case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
+ case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
+ case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
+ case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
+ case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
+
+ /* anything else cannot be loaded. */
+ default:
+ break;
+ }
+
+ return 0;
+}
+
\f
struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index ec607a7aee7..3ddac80289c 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -648,6 +648,10 @@ msplat-word-constant
Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save
Generate (do not generate) code that uses the XXSPLTIW instruction.
+mieee128-constant
+Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
+Generate (do not generate) code that uses the LXVKQ instruction.
+
-param=rs6000-density-pct-threshold=
Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0ceecc1975c..ce8402101ef 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1192,19 +1192,19 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
-;; XXLSPLTI*
+;; XXLSPLTI* LXVKQ
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- wa,
+ wa, wa,
?wa, v, <??r>, wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- eP,
+ eP, eQ,
?jwM, W, <nW>, v, wZ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1216,46 +1216,46 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
vecsimple, *, *, vecstore, vecload")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, 5, 2, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *,
+ *, *,
*, 20, 8, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- p10,
+ p10, p10,
<VSisa>, *, *, *, *")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
;; XXSPLTIB VSPLTISW VSX 0/-1
-;; XXSPLTI*
+;; XXSPLTI* LXVKQ
;; VMX const GPR const
;; LVX (VMX) STVX (VMX)
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
wa, v, ?wa,
- wa,
+ wa, wa,
v, <??r>,
wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
wE, jwM, ?jwM,
- eP,
+ eP, eQ,
W, <nW>,
v, wZ"))]
@@ -1268,19 +1268,19 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
vecsimple, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
*, *,
vecstore, vecload")
(set_attr "length"
"*, *, *, 16, 16, 16,
*, *, *,
- *,
+ *, *,
20, 16,
*, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
p9v, *, <VSisa>,
- p10,
+ p10, p10,
*, *,
*, *")])
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 13b56279565..41a568b7d4e 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3340,6 +3340,10 @@ A signed 34-bit integer constant if prefixed instructions are supported.
A scalar floating point constant or a vector constant that can be
loaded with one prefixed instruction to a VSX register.
+@item eQ
+An IEEE 128-bit constant that can be loaded into a VSX register with a
+single instruction.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..e3286a786a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,160 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+ constants. */
+
+_Float128
+return_0 (void)
+{
+ return 0.0f128; /* XXSPLTIB 34,0. */
+}
+
+_Float128
+return_1 (void)
+{
+ return 1.0f128; /* LXVKQ 34,1. */
+}
+
+_Float128
+return_2 (void)
+{
+ return 2.0f128; /* LXVKQ 34,2. */
+}
+
+_Float128
+return_3 (void)
+{
+ return 3.0f128; /* LXVKQ 34,3. */
+}
+
+_Float128
+return_4 (void)
+{
+ return 4.0f128; /* LXVKQ 34,4. */
+}
+
+_Float128
+return_5 (void)
+{
+ return 5.0f128; /* LXVKQ 34,5. */
+}
+
+_Float128
+return_6 (void)
+{
+ return 6.0f128; /* LXVKQ 34,6. */
+}
+
+_Float128
+return_7 (void)
+{
+ return 7.0f128; /* LXVKQ 34,7. */
+}
+
+_Float128
+return_m0 (void)
+{
+ return -0.0f128; /* LXVKQ 34,16. */
+}
+
+_Float128
+return_m1 (void)
+{
+ return -1.0f128; /* LXVKQ 34,17. */
+}
+
+_Float128
+return_m2 (void)
+{
+ return -2.0f128; /* LXVKQ 34,18. */
+}
+
+_Float128
+return_m3 (void)
+{
+ return -3.0f128; /* LXVKQ 34,19. */
+}
+
+_Float128
+return_m4 (void)
+{
+ return -4.0f128; /* LXVKQ 34,20. */
+}
+
+_Float128
+return_m5 (void)
+{
+ return -5.0f128; /* LXVKQ 34,21. */
+}
+
+_Float128
+return_m6 (void)
+{
+ return -6.0f128; /* LXVKQ 34,22. */
+}
+
+_Float128
+return_m7 (void)
+{
+ return -7.0f128; /* LXVKQ 34,23. */
+}
+
+_Float128
+return_inf (void)
+{
+ return __builtin_inff128 (); /* LXVKQ 34,8. */
+}
+
+_Float128
+return_minf (void)
+{
+ return - __builtin_inff128 (); /* LXVKQ 34,24. */
+}
+
+_Float128
+return_nan (void)
+{
+ return __builtin_nanf128 (""); /* LXVKQ 34,9. */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction. */
+_Float128
+return_mnan (void)
+{
+ return - __builtin_nanf128 (""); /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+ return __builtin_nanf128 ("1"); /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+ return __builtin_nansf128 (""); /* PLXV 34,... */
+}
+
+vector long long
+return_longlong_neg_0 (void)
+{
+ /* This vector is the same pattern as -0.0F128. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define FIRST 0x8000000000000000
+#define SECOND 0x0000000000000000
+
+#else
+#define FIRST 0x0000000000000000
+#define SECOND 0x8000000000000000
+#endif
+
+ return (vector long long) { FIRST, SECOND }; /* LXVKQ 34,16. */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M} 19 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */
+
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.
@ 2021-10-21 2:39 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2021-10-21 2:39 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:0f1a4a0956f477239528acee6c8d30a2ddd067ba
commit 0f1a4a0956f477239528acee6c8d30a2ddd067ba
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Wed Oct 20 22:38:57 2021 -0400
Add LXVKQ support.
This patch adds support to generate the LXVKQ instruction to load specific
IEEE-128 floating point constants.
Compared to the last time I submitted this patch, I modified it so that it
uses the bit pattern of the vector to see if it can generate the LXVKQ
instruction. This means on a little endian Power<xxx> system, the
following code will generate a LXVKQ 34,16 instruction:
vector long long foo (void)
{
return (vector long long) { 0x0000000000000000, 0x8000000000000000 };
}
because that vector pattern is the same bit pattern as -0.0F128.
2021-10-20 Michael Meissner <meissner@the-meissners.org>
gcc/
* config/rs6000/constraints.md (eQ): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating the LXVKQ instruction.
(easy_vector_constant_ieee128): New predicate.
(easy_vector_constant): Add support for generating the LXVKQ
instruction.
* config/rs6000/rs6000-protos.h (constant_generates_lxvkq): New
declaration.
* config/rs6000/rs6000.c (output_vec_const_move): Add support for
generating LXVKQ.
(constant_generates_lxvkq): New function.
* config/rs6000/rs6000.opt (-mieee128-constant): New debug
option.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
generating LXVKQ.
(vsx_mov<mode>_32bit): Likewise.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
eQ constraint.
gcc/testsuite/
* gcc.target/powerpc/float128-constant.c: New test.
Diff:
---
gcc/config/rs6000/constraints.md | 6 +
gcc/config/rs6000/predicates.md | 23 +++
gcc/config/rs6000/rs6000-protos.h | 1 +
gcc/config/rs6000/rs6000.c | 64 ++++++++-
gcc/config/rs6000/rs6000.opt | 4 +
gcc/config/rs6000/vsx.md | 28 ++--
gcc/doc/md.texi | 4 +
.../gcc.target/powerpc/float128-constant.c | 160 +++++++++++++++++++++
8 files changed, 275 insertions(+), 15 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 0f0513f2171..a4b05837fa6 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -219,6 +219,12 @@
"A constant that can be loaded into a VSX register with one prefixed insn."
(match_operand 0 "vsx_prefixed_constant"))
+;; A TF/KF scalar constant or a vector constant that can load certain IEEE
+;; 128-bit constants into vector registers using LXVKQ.
+(define_constraint "eQ"
+ "An IEEE 128-bit constant that can be loaded into VSX registers."
+ (match_operand 0 "easy_vector_constant_ieee128"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4b07850eb64..46ea61d64ac 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,9 @@
if (TARGET_POWER10
&& constant_to_bytes (op, mode, &vsx_const, RS6000_CONST_SPLAT_16_BYTES))
{
+ if (constant_generates_lxvkq (&vsx_const))
+ return true;
+
if (constant_generates_xxspltidp (&vsx_const))
return true;
@@ -660,6 +663,23 @@
return false;
})
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+ (match_code "const_vector,const_double")
+{
+ rs6000_const vsx_const;
+
+ /* Can we generate the LXVKQ instruction? */
+ if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+ || !TARGET_VSX)
+ return false;
+
+ return (constant_to_bytes (op, mode, &vsx_const, RS6000_CONST_NO_SPLAT)
+ && constant_generates_lxvkq (&vsx_const));
+})
+
;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
@@ -710,6 +730,9 @@
if (TARGET_POWER10
&& constant_to_bytes (op, mode, &vsx_const, RS6000_CONST_NO_SPLAT))
{
+ if (constant_generates_lxvkq (&vsx_const))
+ return true;
+
if (constant_generates_xxspltidp (&vsx_const))
return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 0b93bc3cc0e..20cb092e159 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -260,6 +260,7 @@ extern bool constant_to_bytes (rtx, machine_mode, rs6000_const *,
rs6000_const_splat);
extern unsigned constant_generates_xxspltidp (rs6000_const *);
extern unsigned constant_generates_xxspltiw (rs6000_const *);
+extern unsigned constant_generates_lxvkq (rs6000_const *);
#endif /* RTX_CODE */
#ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 59b338085b1..22f5d701908 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -7001,7 +7001,14 @@ output_vec_const_move (rtx *operands)
if (constant_to_bytes (vec, mode, &vsx_const,
RS6000_CONST_SPLAT_16_BYTES))
{
- unsigned imm = constant_generates_xxspltidp (&vsx_const);
+ unsigned imm = constant_generates_lxvkq (&vsx_const);
+ if (imm)
+ {
+ operands[2] = GEN_INT (imm);
+ return "lxvkq %x0,%2";
+ }
+
+ imm = constant_generates_xxspltidp (&vsx_const);
if (imm)
{
operands[2] = GEN_INT (imm);
@@ -29059,6 +29066,61 @@ constant_generates_xxspltiw (rs6000_const *vsx_const)
return vsx_const->words[0];
}
+/* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
+ if the LXVKQ instruction cannot be used. Otherwise return the immediate
+ value to be used with the LXVKQ instruction. */
+
+unsigned
+constant_generates_lxvkq (rs6000_const *vsx_const)
+{
+ /* Is the instruction supported with power10 code generation, IEEE 128-bit
+ floating point hardware and VSX registers are available. */
+ if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+ || !TARGET_VSX)
+ return 0;
+
+ /* Only recognize LXVKQ for 16-byte (4 word) vector constants. */
+ unsigned total_size = vsx_const->total_size;
+ if (total_size != 16)
+ return 0;
+
+ /* Verify that all of the bottom 3 words in the constants loaded by the
+ LXVKQ instruction are zero. */
+ if (vsx_const->words[1] != 0
+ || vsx_const->words[2] != 0
+ || vsx_const->words[3] != 0)
+ return 0;
+
+ /* See if we have a match. */
+ switch (vsx_const->words[0])
+ {
+ case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
+ case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
+ case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
+ case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
+ case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
+ case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
+ case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
+ case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
+ case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
+ case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
+ case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
+ case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
+ case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
+ case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
+ case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
+ case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
+ case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
+ case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
+
+ /* anything else cannot be loaded. */
+ default:
+ break;
+ }
+
+ return 0;
+}
+
\f
struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index ec607a7aee7..3ddac80289c 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -648,6 +648,10 @@ msplat-word-constant
Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save
Generate (do not generate) code that uses the XXSPLTIW instruction.
+mieee128-constant
+Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
+Generate (do not generate) code that uses the LXVKQ instruction.
+
-param=rs6000-density-pct-threshold=
Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0ceecc1975c..ce8402101ef 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1192,19 +1192,19 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
-;; XXLSPLTI*
+;; XXLSPLTI* LXVKQ
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- wa,
+ wa, wa,
?wa, v, <??r>, wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- eP,
+ eP, eQ,
?jwM, W, <nW>, v, wZ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1216,46 +1216,46 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
vecsimple, *, *, vecstore, vecload")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, 5, 2, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *,
+ *, *,
*, 20, 8, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- p10,
+ p10, p10,
<VSisa>, *, *, *, *")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
;; XXSPLTIB VSPLTISW VSX 0/-1
-;; XXSPLTI*
+;; XXSPLTI* LXVKQ
;; VMX const GPR const
;; LVX (VMX) STVX (VMX)
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
wa, v, ?wa,
- wa,
+ wa, wa,
v, <??r>,
wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
wE, jwM, ?jwM,
- eP,
+ eP, eQ,
W, <nW>,
v, wZ"))]
@@ -1268,19 +1268,19 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
vecsimple, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
*, *,
vecstore, vecload")
(set_attr "length"
"*, *, *, 16, 16, 16,
*, *, *,
- *,
+ *, *,
20, 16,
*, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
p9v, *, <VSisa>,
- p10,
+ p10, p10,
*, *,
*, *")])
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 13b56279565..41a568b7d4e 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3340,6 +3340,10 @@ A signed 34-bit integer constant if prefixed instructions are supported.
A scalar floating point constant or a vector constant that can be
loaded with one prefixed instruction to a VSX register.
+@item eQ
+An IEEE 128-bit constant that can be loaded into a VSX register with a
+single instruction.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..e3286a786a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,160 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+ constants. */
+
+_Float128
+return_0 (void)
+{
+ return 0.0f128; /* XXSPLTIB 34,0. */
+}
+
+_Float128
+return_1 (void)
+{
+ return 1.0f128; /* LXVKQ 34,1. */
+}
+
+_Float128
+return_2 (void)
+{
+ return 2.0f128; /* LXVKQ 34,2. */
+}
+
+_Float128
+return_3 (void)
+{
+ return 3.0f128; /* LXVKQ 34,3. */
+}
+
+_Float128
+return_4 (void)
+{
+ return 4.0f128; /* LXVKQ 34,4. */
+}
+
+_Float128
+return_5 (void)
+{
+ return 5.0f128; /* LXVKQ 34,5. */
+}
+
+_Float128
+return_6 (void)
+{
+ return 6.0f128; /* LXVKQ 34,6. */
+}
+
+_Float128
+return_7 (void)
+{
+ return 7.0f128; /* LXVKQ 34,7. */
+}
+
+_Float128
+return_m0 (void)
+{
+ return -0.0f128; /* LXVKQ 34,16. */
+}
+
+_Float128
+return_m1 (void)
+{
+ return -1.0f128; /* LXVKQ 34,17. */
+}
+
+_Float128
+return_m2 (void)
+{
+ return -2.0f128; /* LXVKQ 34,18. */
+}
+
+_Float128
+return_m3 (void)
+{
+ return -3.0f128; /* LXVKQ 34,19. */
+}
+
+_Float128
+return_m4 (void)
+{
+ return -4.0f128; /* LXVKQ 34,20. */
+}
+
+_Float128
+return_m5 (void)
+{
+ return -5.0f128; /* LXVKQ 34,21. */
+}
+
+_Float128
+return_m6 (void)
+{
+ return -6.0f128; /* LXVKQ 34,22. */
+}
+
+_Float128
+return_m7 (void)
+{
+ return -7.0f128; /* LXVKQ 34,23. */
+}
+
+_Float128
+return_inf (void)
+{
+ return __builtin_inff128 (); /* LXVKQ 34,8. */
+}
+
+_Float128
+return_minf (void)
+{
+ return - __builtin_inff128 (); /* LXVKQ 34,24. */
+}
+
+_Float128
+return_nan (void)
+{
+ return __builtin_nanf128 (""); /* LXVKQ 34,9. */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction. */
+_Float128
+return_mnan (void)
+{
+ return - __builtin_nanf128 (""); /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+ return __builtin_nanf128 ("1"); /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+ return __builtin_nansf128 (""); /* PLXV 34,... */
+}
+
+vector long long
+return_longlong_neg_0 (void)
+{
+ /* This vector is the same pattern as -0.0F128. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define FIRST 0x8000000000000000
+#define SECOND 0x0000000000000000
+
+#else
+#define FIRST 0x0000000000000000
+#define SECOND 0x8000000000000000
+#endif
+
+ return (vector long long) { FIRST, SECOND }; /* LXVKQ 34,16. */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M} 19 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */
+
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.
@ 2021-10-21 2:20 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2021-10-21 2:20 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:57878eabefae88ecb4b6a65a7cee57e00146b261
commit 57878eabefae88ecb4b6a65a7cee57e00146b261
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Wed Oct 20 22:20:10 2021 -0400
Add LXVKQ support.
This patch adds support to generate the LXVKQ instruction to load specific
IEEE-128 floating point constants.
Compared to the last time I submitted this patch, I modified it so that it
uses the bit pattern of the vector to see if it can generate the LXVKQ
instruction. This means on a little endian Power<xxx> system, the
following code will generate a LXVKQ 34,16 instruction:
vector long long foo (void)
{
return (vector long long) { 0x0000000000000000, 0x8000000000000000 };
}
because that vector pattern is the same bit pattern as -0.0F128.
2021-10-20 Michael Meissner <meissner@the-meissners.org>
gcc/
* config/rs6000/constraints.md (eQ): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating the LXVKQ instruction.
(easy_vector_constant_ieee128): New predicate.
(easy_vector_constant): Add support for generating the LXVKQ
instruction.
* config/rs6000/rs6000-protos.h (constant_generates_lxvkq): New
declaration.
* config/rs6000/rs6000.c (output_vec_const_move): Add support for
generating LXVKQ.
(constant_generates_lxvkq): New function.
* config/rs6000/rs6000.opt (-mieee128-constant): New debug
option.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
generating LXVKQ.
(vsx_mov<mode>_32bit): Likewise.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
eQ constraint.
gcc/testsuite/
* gcc.target/powerpc/float128-constant.c: New test.
Diff:
---
gcc/config/rs6000/constraints.md | 6 +
gcc/config/rs6000/predicates.md | 23 +++
gcc/config/rs6000/rs6000-protos.h | 1 +
gcc/config/rs6000/rs6000.c | 64 ++++++++-
gcc/config/rs6000/rs6000.opt | 4 +
gcc/config/rs6000/vsx.md | 28 ++--
gcc/doc/md.texi | 4 +
.../gcc.target/powerpc/float128-constant.c | 160 +++++++++++++++++++++
8 files changed, 275 insertions(+), 15 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 7d594872a78..906fa44bec3 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -219,6 +219,12 @@
"A constant that can be loaded into a VSX register with one prefixed insn."
(match_operand 0 "vsx_prefixed_constant"))
+;; A TF/KF scalar constant or a vector constant that can load certain IEEE
+;; 128-bit constants into vector registers using LXVKQ.
+(define_constraint "eQ"
+ "An IEEE 128-bit constant that can be loaded into VSX registers."
+ (match_operand 0 "easy_vector_constant_ieee128"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4b07850eb64..46ea61d64ac 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,9 @@
if (TARGET_POWER10
&& constant_to_bytes (op, mode, &vsx_const, RS6000_CONST_SPLAT_16_BYTES))
{
+ if (constant_generates_lxvkq (&vsx_const))
+ return true;
+
if (constant_generates_xxspltidp (&vsx_const))
return true;
@@ -660,6 +663,23 @@
return false;
})
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+ (match_code "const_vector,const_double")
+{
+ rs6000_const vsx_const;
+
+ /* Can we generate the LXVKQ instruction? */
+ if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+ || !TARGET_VSX)
+ return false;
+
+ return (constant_to_bytes (op, mode, &vsx_const, RS6000_CONST_NO_SPLAT)
+ && constant_generates_lxvkq (&vsx_const));
+})
+
;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
@@ -710,6 +730,9 @@
if (TARGET_POWER10
&& constant_to_bytes (op, mode, &vsx_const, RS6000_CONST_NO_SPLAT))
{
+ if (constant_generates_lxvkq (&vsx_const))
+ return true;
+
if (constant_generates_xxspltidp (&vsx_const))
return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 0b93bc3cc0e..20cb092e159 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -260,6 +260,7 @@ extern bool constant_to_bytes (rtx, machine_mode, rs6000_const *,
rs6000_const_splat);
extern unsigned constant_generates_xxspltidp (rs6000_const *);
extern unsigned constant_generates_xxspltiw (rs6000_const *);
+extern unsigned constant_generates_lxvkq (rs6000_const *);
#endif /* RTX_CODE */
#ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 4f24d9491da..282505471ff 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -7001,7 +7001,14 @@ output_vec_const_move (rtx *operands)
if (constant_to_bytes (vec, mode, &vsx_const,
RS6000_CONST_SPLAT_16_BYTES))
{
- unsigned imm = constant_generates_xxspltidp (&vsx_const);
+ unsigned imm = constant_generates_lxvkq (&vsx_const);
+ if (imm)
+ {
+ operands[2] = GEN_INT (imm);
+ return "lxvkq %x0,%2";
+ }
+
+ imm = constant_generates_xxspltidp (&vsx_const);
if (imm)
{
operands[2] = GEN_INT (imm);
@@ -29054,6 +29061,61 @@ constant_generates_xxspltiw (rs6000_const *vsx_const)
return vsx_const->words[0];
}
+/* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
+ if the LXVKQ instruction cannot be used. Otherwise return the immediate
+ value to be used with the LXVKQ instruction. */
+
+unsigned
+constant_generates_lxvkq (rs6000_const *vsx_const)
+{
+ /* Is the instruction supported with power10 code generation, IEEE 128-bit
+ floating point hardware and VSX registers are available. */
+ if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+ || !TARGET_VSX)
+ return 0;
+
+ /* Only recognize LXVKQ for 16-byte (4 word) vector constants. */
+ unsigned total_size = vsx_const->total_size;
+ if (total_size != 16)
+ return 0;
+
+ /* Verify that all of the bottom 3 words in the constants loaded by the
+ LXVKQ instruction are zero. */
+ if (vsx_const->words[1] != 0
+ || vsx_const->words[2] != 0
+ || vsx_const->words[3] != 0)
+ return 0;
+
+ /* See if we have a match. */
+ switch (vsx_const->words[0])
+ {
+ case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
+ case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
+ case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
+ case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
+ case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
+ case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
+ case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
+ case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
+ case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
+ case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
+ case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
+ case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
+ case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
+ case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
+ case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
+ case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
+ case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
+ case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
+
+ /* anything else cannot be loaded. */
+ default:
+ break;
+ }
+
+ return 0;
+}
+
\f
struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index ec607a7aee7..3ddac80289c 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -648,6 +648,10 @@ msplat-word-constant
Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save
Generate (do not generate) code that uses the XXSPLTIW instruction.
+mieee128-constant
+Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
+Generate (do not generate) code that uses the LXVKQ instruction.
+
-param=rs6000-density-pct-threshold=
Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0ceecc1975c..ce8402101ef 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1192,19 +1192,19 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
-;; XXLSPLTI*
+;; XXLSPLTI* LXVKQ
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- wa,
+ wa, wa,
?wa, v, <??r>, wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- eP,
+ eP, eQ,
?jwM, W, <nW>, v, wZ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1216,46 +1216,46 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
vecsimple, *, *, vecstore, vecload")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, 5, 2, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *,
+ *, *,
*, 20, 8, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- p10,
+ p10, p10,
<VSisa>, *, *, *, *")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
;; XXSPLTIB VSPLTISW VSX 0/-1
-;; XXSPLTI*
+;; XXSPLTI* LXVKQ
;; VMX const GPR const
;; LVX (VMX) STVX (VMX)
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
wa, v, ?wa,
- wa,
+ wa, wa,
v, <??r>,
wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
wE, jwM, ?jwM,
- eP,
+ eP, eQ,
W, <nW>,
v, wZ"))]
@@ -1268,19 +1268,19 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
vecsimple, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
*, *,
vecstore, vecload")
(set_attr "length"
"*, *, *, 16, 16, 16,
*, *, *,
- *,
+ *, *,
20, 16,
*, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
p9v, *, <VSisa>,
- p10,
+ p10, p10,
*, *,
*, *")])
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 13b56279565..41a568b7d4e 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3340,6 +3340,10 @@ A signed 34-bit integer constant if prefixed instructions are supported.
A scalar floating point constant or a vector constant that can be
loaded with one prefixed instruction to a VSX register.
+@item eQ
+An IEEE 128-bit constant that can be loaded into a VSX register with a
+single instruction.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..e3286a786a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,160 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+ constants. */
+
+_Float128
+return_0 (void)
+{
+ return 0.0f128; /* XXSPLTIB 34,0. */
+}
+
+_Float128
+return_1 (void)
+{
+ return 1.0f128; /* LXVKQ 34,1. */
+}
+
+_Float128
+return_2 (void)
+{
+ return 2.0f128; /* LXVKQ 34,2. */
+}
+
+_Float128
+return_3 (void)
+{
+ return 3.0f128; /* LXVKQ 34,3. */
+}
+
+_Float128
+return_4 (void)
+{
+ return 4.0f128; /* LXVKQ 34,4. */
+}
+
+_Float128
+return_5 (void)
+{
+ return 5.0f128; /* LXVKQ 34,5. */
+}
+
+_Float128
+return_6 (void)
+{
+ return 6.0f128; /* LXVKQ 34,6. */
+}
+
+_Float128
+return_7 (void)
+{
+ return 7.0f128; /* LXVKQ 34,7. */
+}
+
+_Float128
+return_m0 (void)
+{
+ return -0.0f128; /* LXVKQ 34,16. */
+}
+
+_Float128
+return_m1 (void)
+{
+ return -1.0f128; /* LXVKQ 34,17. */
+}
+
+_Float128
+return_m2 (void)
+{
+ return -2.0f128; /* LXVKQ 34,18. */
+}
+
+_Float128
+return_m3 (void)
+{
+ return -3.0f128; /* LXVKQ 34,19. */
+}
+
+_Float128
+return_m4 (void)
+{
+ return -4.0f128; /* LXVKQ 34,20. */
+}
+
+_Float128
+return_m5 (void)
+{
+ return -5.0f128; /* LXVKQ 34,21. */
+}
+
+_Float128
+return_m6 (void)
+{
+ return -6.0f128; /* LXVKQ 34,22. */
+}
+
+_Float128
+return_m7 (void)
+{
+ return -7.0f128; /* LXVKQ 34,23. */
+}
+
+_Float128
+return_inf (void)
+{
+ return __builtin_inff128 (); /* LXVKQ 34,8. */
+}
+
+_Float128
+return_minf (void)
+{
+ return - __builtin_inff128 (); /* LXVKQ 34,24. */
+}
+
+_Float128
+return_nan (void)
+{
+ return __builtin_nanf128 (""); /* LXVKQ 34,9. */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction. */
+_Float128
+return_mnan (void)
+{
+ return - __builtin_nanf128 (""); /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+ return __builtin_nanf128 ("1"); /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+ return __builtin_nansf128 (""); /* PLXV 34,... */
+}
+
+vector long long
+return_longlong_neg_0 (void)
+{
+ /* This vector is the same pattern as -0.0F128. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define FIRST 0x8000000000000000
+#define SECOND 0x0000000000000000
+
+#else
+#define FIRST 0x0000000000000000
+#define SECOND 0x8000000000000000
+#endif
+
+ return (vector long long) { FIRST, SECOND }; /* LXVKQ 34,16. */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M} 19 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */
+
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.
@ 2021-10-15 3:39 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2021-10-15 3:39 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:d810d1c017fa0742f3a0f8d617c1b5f118da6b81
commit d810d1c017fa0742f3a0f8d617c1b5f118da6b81
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Oct 14 23:38:50 2021 -0400
Add LXVKQ support.
This patch adds support to generate the LXVKQ instruction to load specific
IEEE-128 floating point constants.
Compared to the last time I submitted this patch, I modified it so that it
uses the bit pattern of the vector to see if it can generate the LXVKQ
instruction. This means on a little endian Power<xxx> system, the
following code will generate a LXVKQ 34,16 instruction:
vector long long foo (void)
{
return (vector long long) { 0x0000000000000000, 0x8000000000000000 };
}
because that vector pattern is the same bit pattern as -0.0F128.
2021-10-14 Michael Meissner <meissner@the-meissners.org>
gcc/
* config/rs6000/constraints.md (eQ): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating the LXVKQ instruction.
(easy_vector_constant_ieee128): New predicate.
(easy_vector_constant): Add support for generating the LXVKQ
instruction.
* config/rs6000/rs6000-protos.h (rs6000_vec_concat): Add fields
for generating LXVKQ.
* config/rs6000/rs6000.c (output_vec_const_move): Add support for
generating LXVKQ.
(vec_const_use_lxvkq): New function.
* config/rs6000/rs6000.opt (-mlxvkq): New debug option.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
generating LXVKQ.
(vsx_mov<mode>_32bit): Likewise.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
eQ constraint.
gcc/testsuite/
* gcc.target/powerpc/float128-constant.c: New test.
Diff:
---
gcc/config/rs6000/constraints.md | 5 +
gcc/config/rs6000/predicates.md | 22 +++
gcc/config/rs6000/rs6000-protos.h | 2 +
gcc/config/rs6000/rs6000.c | 56 ++++++++
gcc/config/rs6000/rs6000.opt | 4 +
gcc/config/rs6000/vsx.md | 28 ++--
gcc/doc/md.texi | 3 +
.../gcc.target/powerpc/float128-constant.c | 160 +++++++++++++++++++++
8 files changed, 266 insertions(+), 14 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index f4f4794eef3..e645f405588 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -225,6 +225,11 @@
"A vector constant that can be loaded with one prefixed instruction."
(match_operand 0 "vsx_prefixed_vector_constant"))
+;; 128-bit IEEE 128-bit constant
+(define_constraint "eQ"
+ "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction."
+ (match_operand 0 "easy_vector_constant_ieee128"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 06d7f34006d..517ce08f03d 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,9 @@
if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
+
if (vec_const_use_xxspltidp (&vec_const))
return true;
}
@@ -666,6 +669,22 @@
return false;
})
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+ (match_code "const_vector,const_double")
+{
+ rs6000_vec_const vec_const;
+
+ /* Can we generate the LXVKQ instruction? */
+ if (!TARGET_LXVKQ || !TARGET_FLOAT128_HW || !TARGET_POWER10 || !TARGET_VSX)
+ return false;
+
+ return (vec_const_to_bytes (op, mode, &vec_const)
+ && vec_const_use_lxvkq (&vec_const));
+})
+
;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
@@ -720,6 +739,9 @@
if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
+
if (vec_const_use_xxspltidp (&vec_const))
return true;
}
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index df4ae364bfb..6e8b81cb134 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -240,10 +240,12 @@ typedef struct {
unsigned char bytes[VECTOR_CONST_BYTES];
machine_mode orig_mode; /* Original mode. */
unsigned int xxspltidp_immediate; /* Immediate value for XXSPLTIDP. */
+ unsigned int lxvkq_immediate; /* Immediate to use with LXVKQ. */
} rs6000_vec_const;
extern bool vec_const_to_bytes (rtx, machine_mode, rs6000_vec_const *);
extern bool vec_const_use_xxspltidp (rs6000_vec_const *);
+extern bool vec_const_use_lxvkq (rs6000_vec_const *);
#endif /* RTX_CODE */
#ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 593903ff8c9..d238dd84fe7 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6993,6 +6993,12 @@ output_vec_const_move (rtx *operands)
rs6000_vec_const vec_const;
if (TARGET_POWER10 && vec_const_to_bytes (vec, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ {
+ operands[2] = GEN_INT (vec_const.lxvkq_immediate);
+ return "lxvkq %x0,%2";
+ }
+
if (vec_const_use_xxspltidp (&vec_const))
{
operands[2] = GEN_INT (vec_const.xxspltidp_immediate);
@@ -28778,6 +28784,56 @@ vec_const_use_xxspltidp (rs6000_vec_const *vec_const)
return true;
}
+/* Determine if a vector constant can be loaded with LXVKQ. If so, fill out
+ the fields used to generate the instruction. */
+
+bool
+vec_const_use_lxvkq (rs6000_vec_const *vec_const)
+{
+ unsigned immediate;
+
+ if (!TARGET_LXVKQ || !TARGET_PREFIXED || !TARGET_VSX)
+ return false;
+
+ /* Verify that all of the bottom 3 words in the constants loaded by the
+ LXVKQ instruction are zero. */
+ for (size_t i = 1; i < VECTOR_CONST_32BIT; i++)
+ if (vec_const->words[i] != 0)
+ return false;
+
+ /* See if we have a match. */
+ switch (vec_const->words[0])
+ {
+ case 0x3FFF0000U: immediate = 1; break; /* IEEE 128-bit +1.0. */
+ case 0x40000000U: immediate = 2; break; /* IEEE 128-bit +2.0. */
+ case 0x40008000U: immediate = 3; break; /* IEEE 128-bit +3.0. */
+ case 0x40010000U: immediate = 4; break; /* IEEE 128-bit +4.0. */
+ case 0x40014000U: immediate = 5; break; /* IEEE 128-bit +5.0. */
+ case 0x40018000U: immediate = 6; break; /* IEEE 128-bit +6.0. */
+ case 0x4001C000U: immediate = 7; break; /* IEEE 128-bit +7.0. */
+ case 0x7FFF0000U: immediate = 8; break; /* IEEE 128-bit +Infinity. */
+ case 0x7FFF8000U: immediate = 9; break; /* IEEE 128-bit quiet NaN. */
+ case 0x80000000U: immediate = 16; break; /* IEEE 128-bit -0.0. */
+ case 0xBFFF0000U: immediate = 17; break; /* IEEE 128-bit -1.0. */
+ case 0xC0000000U: immediate = 18; break; /* IEEE 128-bit -2.0. */
+ case 0xC0008000U: immediate = 19; break; /* IEEE 128-bit -3.0. */
+ case 0xC0010000U: immediate = 20; break; /* IEEE 128-bit -4.0. */
+ case 0xC0014000U: immediate = 21; break; /* IEEE 128-bit -5.0. */
+ case 0xC0018000U: immediate = 22; break; /* IEEE 128-bit -6.0. */
+ case 0xC001C000U: immediate = 23; break; /* IEEE 128-bit -7.0. */
+ case 0xFFFF0000U: immediate = 24; break; /* IEEE 128-bit -Infinity. */
+
+ /* anything else cannot be loaded. */
+ default:
+ return false;
+ }
+
+ /* We can use the LXVKQ instruction, record the immediate needed for the
+ instruction. */
+ vec_const->lxvkq_immediate = immediate;
+ return true;
+}
+
/* Convert a vector constant to an internal structure, breaking it out to
bytes, half words, words, and double words. Return true if we have
successfully broken it out. */
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 1d7ce4cc94a..c9eb78952d6 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -644,6 +644,10 @@ mxxspltidp
Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save
Generate (do not generate) XXSPLTIDP instructions.
+mlxvkq
+Target Undocumented Var(TARGET_LXVKQ) Init(1) Save
+Generate (do not generate) LXVKQ instructions.
+
-param=rs6000-density-pct-threshold=
Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index ef5f43eb820..15a22525000 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1192,19 +1192,19 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
-;; XXLSPLTIDP
+;; XXLSPLTIDP LXVKQ
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- wa,
+ wa, wa,
?wa, v, <??r>, wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- eV,
+ eV, eQ,
?jwM, W, <nW>, v, wZ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1216,46 +1216,46 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
vecsimple, *, *, vecstore, vecload")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, 5, 2, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *,
+ *, *,
*, 20, 8, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- p10,
+ p10, p10,
<VSisa>, *, *, *, *")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
;; XXSPLTIB VSPLTISW VSX 0/-1
-;; XXSPLTIDP
+;; XXSPLTIDP LXVKQ
;; VMX const GPR const
;; LVX (VMX) STVX (VMX)
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
wa, v, ?wa,
- wa,
+ wa, wa,
v, <??r>,
wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
wE, jwM, ?jwM,
- eV,
+ eV, eQ,
W, <nW>,
v, wZ"))]
@@ -1268,19 +1268,19 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
vecsimple, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
*, *,
vecstore, vecload")
(set_attr "length"
"*, *, *, 16, 16, 16,
*, *, *,
- *,
+ *, *,
20, 16,
*, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
p9v, *, <VSisa>,
- p10,
+ p10, p10,
*, *,
*, *")])
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 4b9ca062688..0e87ad1f200 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3344,6 +3344,9 @@ a VSX register.
A vector constant that can be loaded with one prefixed instruction to
a VSX register.
+@item eQ
+A constant that can be loaded with the LXVKQ instruction.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..f6becac1075
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,160 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -mlxvkq -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+ constants. */
+
+_Float128
+return_0 (void)
+{
+ return 0.0f128; /* XXSPLTIB 34,0. */
+}
+
+_Float128
+return_1 (void)
+{
+ return 1.0f128; /* LXVKQ 34,1. */
+}
+
+_Float128
+return_2 (void)
+{
+ return 2.0f128; /* LXVKQ 34,2. */
+}
+
+_Float128
+return_3 (void)
+{
+ return 3.0f128; /* LXVKQ 34,3. */
+}
+
+_Float128
+return_4 (void)
+{
+ return 4.0f128; /* LXVKQ 34,4. */
+}
+
+_Float128
+return_5 (void)
+{
+ return 5.0f128; /* LXVKQ 34,5. */
+}
+
+_Float128
+return_6 (void)
+{
+ return 6.0f128; /* LXVKQ 34,6. */
+}
+
+_Float128
+return_7 (void)
+{
+ return 7.0f128; /* LXVKQ 34,7. */
+}
+
+_Float128
+return_m0 (void)
+{
+ return -0.0f128; /* LXVKQ 34,16. */
+}
+
+_Float128
+return_m1 (void)
+{
+ return -1.0f128; /* LXVKQ 34,17. */
+}
+
+_Float128
+return_m2 (void)
+{
+ return -2.0f128; /* LXVKQ 34,18. */
+}
+
+_Float128
+return_m3 (void)
+{
+ return -3.0f128; /* LXVKQ 34,19. */
+}
+
+_Float128
+return_m4 (void)
+{
+ return -4.0f128; /* LXVKQ 34,20. */
+}
+
+_Float128
+return_m5 (void)
+{
+ return -5.0f128; /* LXVKQ 34,21. */
+}
+
+_Float128
+return_m6 (void)
+{
+ return -6.0f128; /* LXVKQ 34,22. */
+}
+
+_Float128
+return_m7 (void)
+{
+ return -7.0f128; /* LXVKQ 34,23. */
+}
+
+_Float128
+return_inf (void)
+{
+ return __builtin_inff128 (); /* LXVKQ 34,8. */
+}
+
+_Float128
+return_minf (void)
+{
+ return - __builtin_inff128 (); /* LXVKQ 34,24. */
+}
+
+_Float128
+return_nan (void)
+{
+ return __builtin_nanf128 (""); /* LXVKQ 34,9. */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction. */
+_Float128
+return_mnan (void)
+{
+ return - __builtin_nanf128 (""); /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+ return __builtin_nanf128 ("1"); /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+ return __builtin_nansf128 (""); /* PLXV 34,... */
+}
+
+vector long long
+return_longlong_neg_0 (void)
+{
+ /* This vector is the same pattern as -0.0F128. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define FIRST 0x8000000000000000
+#define SECOND 0x0000000000000000
+
+#else
+#define FIRST 0x0000000000000000
+#define SECOND 0x8000000000000000
+#endif
+
+ return (vector long long) { FIRST, SECOND }; /* LXVKQ 34,16. */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M} 19 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */
+
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.
@ 2021-10-14 16:51 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2021-10-14 16:51 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:f77237f641d422a76607b1006d45dde09d9b5a4f
commit f77237f641d422a76607b1006d45dde09d9b5a4f
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Oct 14 12:50:46 2021 -0400
Add LXVKQ support.
This patch adds support to generate the LXVKQ instruction to load specific
IEEE-128 floating point constants.
Compared to the last time I submitted this patch, I modified it so that it
uses the bit pattern of the vector to see if it can generate the LXVKQ
instruction. This means on a little endian Power<xxx> system, the
following code will generate a LXVKQ 34,16 instruction:
vector long long foo (void)
{
return (vector long long) { 0x0000000000000000, 0x8000000000000000 };
}
because that vector pattern is the same bit pattern as -0.0F128.
2021-10-14 Michael Meissner <meissner@the-meissners.org>
gcc/testsuite/
* gcc.target/powerpc/float128-constant.c: New test.
Diff:
---
.../gcc.target/powerpc/float128-constant.c | 160 +++++++++++++++++++++
1 file changed, 160 insertions(+)
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..f6becac1075
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,160 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -mlxvkq -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+ constants. */
+
+_Float128
+return_0 (void)
+{
+ return 0.0f128; /* XXSPLTIB 34,0. */
+}
+
+_Float128
+return_1 (void)
+{
+ return 1.0f128; /* LXVKQ 34,1. */
+}
+
+_Float128
+return_2 (void)
+{
+ return 2.0f128; /* LXVKQ 34,2. */
+}
+
+_Float128
+return_3 (void)
+{
+ return 3.0f128; /* LXVKQ 34,3. */
+}
+
+_Float128
+return_4 (void)
+{
+ return 4.0f128; /* LXVKQ 34,4. */
+}
+
+_Float128
+return_5 (void)
+{
+ return 5.0f128; /* LXVKQ 34,5. */
+}
+
+_Float128
+return_6 (void)
+{
+ return 6.0f128; /* LXVKQ 34,6. */
+}
+
+_Float128
+return_7 (void)
+{
+ return 7.0f128; /* LXVKQ 34,7. */
+}
+
+_Float128
+return_m0 (void)
+{
+ return -0.0f128; /* LXVKQ 34,16. */
+}
+
+_Float128
+return_m1 (void)
+{
+ return -1.0f128; /* LXVKQ 34,17. */
+}
+
+_Float128
+return_m2 (void)
+{
+ return -2.0f128; /* LXVKQ 34,18. */
+}
+
+_Float128
+return_m3 (void)
+{
+ return -3.0f128; /* LXVKQ 34,19. */
+}
+
+_Float128
+return_m4 (void)
+{
+ return -4.0f128; /* LXVKQ 34,20. */
+}
+
+_Float128
+return_m5 (void)
+{
+ return -5.0f128; /* LXVKQ 34,21. */
+}
+
+_Float128
+return_m6 (void)
+{
+ return -6.0f128; /* LXVKQ 34,22. */
+}
+
+_Float128
+return_m7 (void)
+{
+ return -7.0f128; /* LXVKQ 34,23. */
+}
+
+_Float128
+return_inf (void)
+{
+ return __builtin_inff128 (); /* LXVKQ 34,8. */
+}
+
+_Float128
+return_minf (void)
+{
+ return - __builtin_inff128 (); /* LXVKQ 34,24. */
+}
+
+_Float128
+return_nan (void)
+{
+ return __builtin_nanf128 (""); /* LXVKQ 34,9. */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction. */
+_Float128
+return_mnan (void)
+{
+ return - __builtin_nanf128 (""); /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+ return __builtin_nanf128 ("1"); /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+ return __builtin_nansf128 (""); /* PLXV 34,... */
+}
+
+vector long long
+return_longlong_neg_0 (void)
+{
+ /* This vector is the same pattern as -0.0F128. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define FIRST 0x8000000000000000
+#define SECOND 0x0000000000000000
+
+#else
+#define FIRST 0x0000000000000000
+#define SECOND 0x8000000000000000
+#endif
+
+ return (vector long long) { FIRST, SECOND }; /* LXVKQ 34,16. */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M} 19 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */
+
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.
@ 2021-10-14 16:50 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2021-10-14 16:50 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:a5c6803bed76301da412ce64c00cf12bae7c998e
commit a5c6803bed76301da412ce64c00cf12bae7c998e
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Oct 14 12:49:56 2021 -0400
Add LXVKQ support.
This patch adds support to generate the LXVKQ instruction to load specific
IEEE-128 floating point constants.
Compared to the last time I submitted this patch, I modified it so that it
uses the bit pattern of the vector to see if it can generate the LXVKQ
instruction. This means on a little endian Power<xxx> system, the
following code will generate a LXVKQ 34,16 instruction:
vector long long foo (void)
{
return (vector long long) { 0x0000000000000000, 0x8000000000000000 };
}
because that vector pattern is the same bit pattern as -0.0F128.
2021-10-14 Michael Meissner <meissner@the-meissners.org>
gcc/
* config/rs6000/constraints.md (eQ): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating the LXVKQ instruction.
(easy_vector_constant_ieee128): New predicate.
(easy_vector_constant): Add support for generating the LXVKQ
instruction.
* config/rs6000/rs6000-protos.h (rs6000_vec_concat): Add fields
for generating LXVKQ.
* config/rs6000/rs6000.c (output_vec_const_move): Add support for
generating LXVKQ.
(vec_const_use_lxvkq): New function.
* config/rs6000/rs6000.opt (-mlxvkq): New debug option.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
generating LXVKQ.
(vsx_mov<mode>_32bit): Likewise.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
eQ constraint.
Diff:
---
gcc/config/rs6000/constraints.md | 5 ++++
gcc/config/rs6000/predicates.md | 22 +++++++++++++++
gcc/config/rs6000/rs6000-protos.h | 2 ++
gcc/config/rs6000/rs6000.c | 56 +++++++++++++++++++++++++++++++++++++++
gcc/config/rs6000/rs6000.opt | 4 +++
gcc/config/rs6000/vsx.md | 28 ++++++++++----------
gcc/doc/md.texi | 3 +++
7 files changed, 106 insertions(+), 14 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index d26c8940104..a15b659d9d7 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -218,6 +218,11 @@
"A signed 34-bit integer constant if prefixed instructions are supported."
(match_operand 0 "cint34_operand"))
+;; 128-bit IEEE 128-bit constant
+(define_constraint "eQ"
+ "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction."
+ (match_operand 0 "easy_vector_constant_ieee128"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d4b50276bac..de191fff08a 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,9 @@
if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
+
if (vec_const_use_xxspltidp (&vec_const))
return true;
}
@@ -635,6 +638,22 @@
&& vec_const_use_xxspltidp (&vec_const));
})
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+ (match_code "const_vector,vec_duplicate,const_int,const_double")
+{
+ rs6000_vec_const vec_const;
+
+ /* Can we generate the LXVKQ instruction? */
+ if (!TARGET_LXVKQ || !TARGET_FLOAT128_HW || !TARGET_POWER10 || !TARGET_VSX)
+ return false;
+
+ return (vec_const_to_bytes (op, mode, &vec_const)
+ && vec_const_use_lxvkq (&vec_const));
+})
+
;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
@@ -689,6 +708,9 @@
if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
+
if (vec_const_use_xxspltidp (&vec_const))
return true;
}
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index df4ae364bfb..43c0f96aab5 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -240,10 +240,12 @@ typedef struct {
unsigned char bytes[VECTOR_CONST_BYTES];
machine_mode orig_mode; /* Original mode. */
unsigned int xxspltidp_immediate; /* Immediate value for XXSPLTIDP. */
+ unsigned lxvkq_immediate; /* Immediate to use with LXVKQ. */
} rs6000_vec_const;
extern bool vec_const_to_bytes (rtx, machine_mode, rs6000_vec_const *);
extern bool vec_const_use_xxspltidp (rs6000_vec_const *);
+extern bool vec_const_use_lxvkq (rs6000_vec_const *);
#endif /* RTX_CODE */
#ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 3ec59ed2a5e..359379348bb 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6993,6 +6993,12 @@ output_vec_const_move (rtx *operands)
rs6000_vec_const vec_const;
if (TARGET_POWER10 && vec_const_to_bytes (vec, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ {
+ operands[2] = GEN_INT (vec_const.lxvkq_immediate);
+ return "lxvkq %x0,%2";
+ }
+
if (vec_const_use_xxspltidp (&vec_const))
{
operands[2] = GEN_INT (vec_const.xxspltidp_immediate);
@@ -28787,6 +28793,56 @@ vec_const_use_xxspltidp (rs6000_vec_const *vec_const)
return true;
}
+/* Determine if a vector constant can be loaded with LXVKQ. If so, fill out
+ the fields used to generate the instruction. */
+
+bool
+vec_const_use_lxvkq (rs6000_vec_const *vec_const)
+{
+ unsigned immediate;
+
+ if (!TARGET_LXVKQ || !TARGET_PREFIXED || !TARGET_VSX)
+ return false;
+
+ /* Verify that all of the bottom 3 words in the constants loaded by the
+ LXVKQ instruction are zero. */
+ for (size_t i = 1; i < VECTOR_CONST_32BIT; i++)
+ if (vec_const->words[i] != 0)
+ return false;
+
+ /* See if we have a match. */
+ switch (vec_const->words[0])
+ {
+ case 0x3FFF0000U: immediate = 1; break; /* IEEE 128-bit +1.0. */
+ case 0x40000000U: immediate = 2; break; /* IEEE 128-bit +2.0. */
+ case 0x40008000U: immediate = 3; break; /* IEEE 128-bit +3.0. */
+ case 0x40010000U: immediate = 4; break; /* IEEE 128-bit +4.0. */
+ case 0x40014000U: immediate = 5; break; /* IEEE 128-bit +5.0. */
+ case 0x40018000U: immediate = 6; break; /* IEEE 128-bit +6.0. */
+ case 0x4001C000U: immediate = 7; break; /* IEEE 128-bit +7.0. */
+ case 0x7FFF0000U: immediate = 8; break; /* IEEE 128-bit +Infinity. */
+ case 0x7FFF8000U: immediate = 9; break; /* IEEE 128-bit quiet NaN. */
+ case 0x80000000U: immediate = 16; break; /* IEEE 128-bit -0.0. */
+ case 0xBFFF0000U: immediate = 17; break; /* IEEE 128-bit -1.0. */
+ case 0xC0000000U: immediate = 18; break; /* IEEE 128-bit -2.0. */
+ case 0xC0008000U: immediate = 19; break; /* IEEE 128-bit -3.0. */
+ case 0xC0010000U: immediate = 20; break; /* IEEE 128-bit -4.0. */
+ case 0xC0014000U: immediate = 21; break; /* IEEE 128-bit -5.0. */
+ case 0xC0018000U: immediate = 22; break; /* IEEE 128-bit -6.0. */
+ case 0xC001C000U: immediate = 23; break; /* IEEE 128-bit -7.0. */
+ case 0xFFFF0000U: immediate = 24; break; /* IEEE 128-bit -Infinity. */
+
+ /* anything else cannot be loaded. */
+ default:
+ return false;
+ }
+
+ /* We can use the LXVKQ instruction, record the immediate needed for the
+ instruction. */
+ vec_const->lxvkq_immediate = immediate;
+ return true;
+}
+
/* Convert a vector constant to an internal structure, breaking it out to
bytes, half words, words, and double words. Return true if we have
successfully broken it out. */
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 1d7ce4cc94a..c9eb78952d6 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -644,6 +644,10 @@ mxxspltidp
Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save
Generate (do not generate) XXSPLTIDP instructions.
+mlxvkq
+Target Undocumented Var(TARGET_LXVKQ) Init(1) Save
+Generate (do not generate) LXVKQ instructions.
+
-param=rs6000-density-pct-threshold=
Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 67ba121ed77..b36bbcd2b4e 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1192,19 +1192,19 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
-;; XXLSPLTIDP
+;; XXLSPLTIDP LXVKQ
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- wa,
+ wa, wa,
?wa, v, <??r>, wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- eD,
+ eD, eQ,
?jwM, W, <nW>, v, wZ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1216,46 +1216,46 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
vecsimple, *, *, vecstore, vecload")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, 5, 2, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *,
+ *, *,
*, 20, 8, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- p10,
+ p10, p10,
<VSisa>, *, *, *, *")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
;; XXSPLTIB VSPLTISW VSX 0/-1
-;; XXSPLTIDP
+;; XXSPLTIDP LXVKQ
;; VMX const GPR const
;; LVX (VMX) STVX (VMX)
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
wa, v, ?wa,
- wa,
+ wa, wa,
v, <??r>,
wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
wE, jwM, ?jwM,
- eD,
+ eD, eQ,
W, <nW>,
v, wZ"))]
@@ -1268,19 +1268,19 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
vecsimple, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
*, *,
vecstore, vecload")
(set_attr "length"
"*, *, *, 16, 16, 16,
*, *, *,
- *,
+ *, *,
20, 16,
*, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
p9v, *, <VSisa>,
- p10,
+ p10, p10,
*, *,
*, *")])
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index b9dfcaf0d44..501e0069ebb 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3339,6 +3339,9 @@ A constant that can be loaded with the XXSPLTIDP instruction.
@item eI
A signed 34-bit integer constant if prefixed instructions are supported.
+@item eQ
+A constant that can be loaded with the LXVKQ instruction.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.
@ 2021-10-14 16:46 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2021-10-14 16:46 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:5f876e6f4a6cb957ee80e45cca768062d1962df7
commit 5f876e6f4a6cb957ee80e45cca768062d1962df7
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Oct 14 12:45:48 2021 -0400
Add LXVKQ support.
This patch adds support to generate the LXVKQ instruction to load specific
IEEE-128 floating point constants.
Compared to the last time I submitted this patch, I modified it so that it
uses the bit pattern of the vector to see if it can generate the LXVKQ
instruction. This means on a little endian Power<xxx> system, the
following code will generate a LXVKQ 34,16 instruction:
vector long long foo (void)
{
return (vector long long) { 0x0000000000000000, 0x8000000000000000 };
}
because that vector pattern is the same bit pattern as -0.0F128.
2021-10-14 Michael Meissner <meissner@the-meissners.org>
gcc/
* config/rs6000/constraints.md (eQ): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating the LXVKQ instruction.
(easy_vector_constant_ieee128): New predicate.
(easy_vector_constant): Add support for generating the LXVKQ
instruction.
* config/rs6000/rs6000-protos.h (rs6000_vec_concat): Add fields
for generating LXVKQ.
* config/rs6000/rs6000.c (output_vec_const_move): Add support for
generating LXVKQ.
(vec_const_use_lxvkq): New function.
* config/rs6000/rs6000.opt (-mlxvkq): New debug option.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
generating LXVKQ.
(vsx_mov<mode>_32bit): Likewise.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
eQ constraint.
gcc/testsuite/
* gcc.target/powerpc/float128-constant.c: New test.
Diff:
---
gcc/config/rs6000/constraints.md | 5 +
gcc/config/rs6000/predicates.md | 22 +++
gcc/config/rs6000/rs6000-protos.h | 2 +
gcc/config/rs6000/rs6000.c | 56 ++++++++
gcc/config/rs6000/rs6000.opt | 4 +
gcc/config/rs6000/vsx.md | 28 ++--
gcc/doc/md.texi | 3 +
.../gcc.target/powerpc/float128-constant.c | 160 +++++++++++++++++++++
8 files changed, 266 insertions(+), 14 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index d26c8940104..a15b659d9d7 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -218,6 +218,11 @@
"A signed 34-bit integer constant if prefixed instructions are supported."
(match_operand 0 "cint34_operand"))
+;; 128-bit IEEE 128-bit constant
+(define_constraint "eQ"
+ "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction."
+ (match_operand 0 "easy_vector_constant_ieee128"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d4b50276bac..de191fff08a 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,9 @@
if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
+
if (vec_const_use_xxspltidp (&vec_const))
return true;
}
@@ -635,6 +638,22 @@
&& vec_const_use_xxspltidp (&vec_const));
})
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+ (match_code "const_vector,vec_duplicate,const_int,const_double")
+{
+ rs6000_vec_const vec_const;
+
+ /* Can we generate the LXVKQ instruction? */
+ if (!TARGET_LXVKQ || !TARGET_FLOAT128_HW || !TARGET_POWER10 || !TARGET_VSX)
+ return false;
+
+ return (vec_const_to_bytes (op, mode, &vec_const)
+ && vec_const_use_lxvkq (&vec_const));
+})
+
;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
@@ -689,6 +708,9 @@
if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
+
if (vec_const_use_xxspltidp (&vec_const))
return true;
}
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index df4ae364bfb..43c0f96aab5 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -240,10 +240,12 @@ typedef struct {
unsigned char bytes[VECTOR_CONST_BYTES];
machine_mode orig_mode; /* Original mode. */
unsigned int xxspltidp_immediate; /* Immediate value for XXSPLTIDP. */
+ unsigned lxvkq_immediate; /* Immediate to use with LXVKQ. */
} rs6000_vec_const;
extern bool vec_const_to_bytes (rtx, machine_mode, rs6000_vec_const *);
extern bool vec_const_use_xxspltidp (rs6000_vec_const *);
+extern bool vec_const_use_lxvkq (rs6000_vec_const *);
#endif /* RTX_CODE */
#ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 3ec59ed2a5e..359379348bb 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6993,6 +6993,12 @@ output_vec_const_move (rtx *operands)
rs6000_vec_const vec_const;
if (TARGET_POWER10 && vec_const_to_bytes (vec, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ {
+ operands[2] = GEN_INT (vec_const.lxvkq_immediate);
+ return "lxvkq %x0,%2";
+ }
+
if (vec_const_use_xxspltidp (&vec_const))
{
operands[2] = GEN_INT (vec_const.xxspltidp_immediate);
@@ -28787,6 +28793,56 @@ vec_const_use_xxspltidp (rs6000_vec_const *vec_const)
return true;
}
+/* Determine if a vector constant can be loaded with LXVKQ. If so, fill out
+ the fields used to generate the instruction. */
+
+bool
+vec_const_use_lxvkq (rs6000_vec_const *vec_const)
+{
+ unsigned immediate;
+
+ if (!TARGET_LXVKQ || !TARGET_PREFIXED || !TARGET_VSX)
+ return false;
+
+ /* Verify that all of the bottom 3 words in the constants loaded by the
+ LXVKQ instruction are zero. */
+ for (size_t i = 1; i < VECTOR_CONST_32BIT; i++)
+ if (vec_const->words[i] != 0)
+ return false;
+
+ /* See if we have a match. */
+ switch (vec_const->words[0])
+ {
+ case 0x3FFF0000U: immediate = 1; break; /* IEEE 128-bit +1.0. */
+ case 0x40000000U: immediate = 2; break; /* IEEE 128-bit +2.0. */
+ case 0x40008000U: immediate = 3; break; /* IEEE 128-bit +3.0. */
+ case 0x40010000U: immediate = 4; break; /* IEEE 128-bit +4.0. */
+ case 0x40014000U: immediate = 5; break; /* IEEE 128-bit +5.0. */
+ case 0x40018000U: immediate = 6; break; /* IEEE 128-bit +6.0. */
+ case 0x4001C000U: immediate = 7; break; /* IEEE 128-bit +7.0. */
+ case 0x7FFF0000U: immediate = 8; break; /* IEEE 128-bit +Infinity. */
+ case 0x7FFF8000U: immediate = 9; break; /* IEEE 128-bit quiet NaN. */
+ case 0x80000000U: immediate = 16; break; /* IEEE 128-bit -0.0. */
+ case 0xBFFF0000U: immediate = 17; break; /* IEEE 128-bit -1.0. */
+ case 0xC0000000U: immediate = 18; break; /* IEEE 128-bit -2.0. */
+ case 0xC0008000U: immediate = 19; break; /* IEEE 128-bit -3.0. */
+ case 0xC0010000U: immediate = 20; break; /* IEEE 128-bit -4.0. */
+ case 0xC0014000U: immediate = 21; break; /* IEEE 128-bit -5.0. */
+ case 0xC0018000U: immediate = 22; break; /* IEEE 128-bit -6.0. */
+ case 0xC001C000U: immediate = 23; break; /* IEEE 128-bit -7.0. */
+ case 0xFFFF0000U: immediate = 24; break; /* IEEE 128-bit -Infinity. */
+
+ /* anything else cannot be loaded. */
+ default:
+ return false;
+ }
+
+ /* We can use the LXVKQ instruction, record the immediate needed for the
+ instruction. */
+ vec_const->lxvkq_immediate = immediate;
+ return true;
+}
+
/* Convert a vector constant to an internal structure, breaking it out to
bytes, half words, words, and double words. Return true if we have
successfully broken it out. */
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 1d7ce4cc94a..c9eb78952d6 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -644,6 +644,10 @@ mxxspltidp
Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save
Generate (do not generate) XXSPLTIDP instructions.
+mlxvkq
+Target Undocumented Var(TARGET_LXVKQ) Init(1) Save
+Generate (do not generate) LXVKQ instructions.
+
-param=rs6000-density-pct-threshold=
Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 6be3376f5d1..b36bbcd2b4e 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1192,19 +1192,19 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
-;; XXLSPLTIDP
+;; XXLSPLTIDP LXVKQ
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- wa,
+ wa, wa,
?wa, v, <??r>, wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- wD,
+ eD, eQ,
?jwM, W, <nW>, v, wZ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1216,46 +1216,46 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
vecsimple, *, *, vecstore, vecload")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, 5, 2, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *,
+ *, *,
*, 20, 8, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- p10,
+ p10, p10,
<VSisa>, *, *, *, *")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
;; XXSPLTIB VSPLTISW VSX 0/-1
-;; XXSPLTIDP
+;; XXSPLTIDP LXVKQ
;; VMX const GPR const
;; LVX (VMX) STVX (VMX)
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
wa, v, ?wa,
- wa,
+ wa, wa,
v, <??r>,
wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
wE, jwM, ?jwM,
- eD,
+ eD, eQ,
W, <nW>,
v, wZ"))]
@@ -1268,19 +1268,19 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
vecsimple, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
*, *,
vecstore, vecload")
(set_attr "length"
"*, *, *, 16, 16, 16,
*, *, *,
- *,
+ *, *,
20, 16,
*, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
p9v, *, <VSisa>,
- p10,
+ p10, p10,
*, *,
*, *")])
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index b9dfcaf0d44..501e0069ebb 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3339,6 +3339,9 @@ A constant that can be loaded with the XXSPLTIDP instruction.
@item eI
A signed 34-bit integer constant if prefixed instructions are supported.
+@item eQ
+A constant that can be loaded with the LXVKQ instruction.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..f6becac1075
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,160 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -mlxvkq -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+ constants. */
+
+_Float128
+return_0 (void)
+{
+ return 0.0f128; /* XXSPLTIB 34,0. */
+}
+
+_Float128
+return_1 (void)
+{
+ return 1.0f128; /* LXVKQ 34,1. */
+}
+
+_Float128
+return_2 (void)
+{
+ return 2.0f128; /* LXVKQ 34,2. */
+}
+
+_Float128
+return_3 (void)
+{
+ return 3.0f128; /* LXVKQ 34,3. */
+}
+
+_Float128
+return_4 (void)
+{
+ return 4.0f128; /* LXVKQ 34,4. */
+}
+
+_Float128
+return_5 (void)
+{
+ return 5.0f128; /* LXVKQ 34,5. */
+}
+
+_Float128
+return_6 (void)
+{
+ return 6.0f128; /* LXVKQ 34,6. */
+}
+
+_Float128
+return_7 (void)
+{
+ return 7.0f128; /* LXVKQ 34,7. */
+}
+
+_Float128
+return_m0 (void)
+{
+ return -0.0f128; /* LXVKQ 34,16. */
+}
+
+_Float128
+return_m1 (void)
+{
+ return -1.0f128; /* LXVKQ 34,17. */
+}
+
+_Float128
+return_m2 (void)
+{
+ return -2.0f128; /* LXVKQ 34,18. */
+}
+
+_Float128
+return_m3 (void)
+{
+ return -3.0f128; /* LXVKQ 34,19. */
+}
+
+_Float128
+return_m4 (void)
+{
+ return -4.0f128; /* LXVKQ 34,20. */
+}
+
+_Float128
+return_m5 (void)
+{
+ return -5.0f128; /* LXVKQ 34,21. */
+}
+
+_Float128
+return_m6 (void)
+{
+ return -6.0f128; /* LXVKQ 34,22. */
+}
+
+_Float128
+return_m7 (void)
+{
+ return -7.0f128; /* LXVKQ 34,23. */
+}
+
+_Float128
+return_inf (void)
+{
+ return __builtin_inff128 (); /* LXVKQ 34,8. */
+}
+
+_Float128
+return_minf (void)
+{
+ return - __builtin_inff128 (); /* LXVKQ 34,24. */
+}
+
+_Float128
+return_nan (void)
+{
+ return __builtin_nanf128 (""); /* LXVKQ 34,9. */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction. */
+_Float128
+return_mnan (void)
+{
+ return - __builtin_nanf128 (""); /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+ return __builtin_nanf128 ("1"); /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+ return __builtin_nansf128 (""); /* PLXV 34,... */
+}
+
+vector long long
+return_longlong_neg_0 (void)
+{
+ /* This vector is the same pattern as -0.0F128. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define FIRST 0x8000000000000000
+#define SECOND 0x0000000000000000
+
+#else
+#define FIRST 0x0000000000000000
+#define SECOND 0x8000000000000000
+#endif
+
+ return (vector long long) { FIRST, SECOND }; /* LXVKQ 34,16. */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M} 19 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */
+
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.
@ 2021-10-14 15:32 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2021-10-14 15:32 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:b4b42ae3e307418aa6c629ee811ef101ad643b76
commit b4b42ae3e307418aa6c629ee811ef101ad643b76
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Oct 14 11:32:39 2021 -0400
Add LXVKQ support.
This patch adds support to generate the LXVKQ instruction to load specific
IEEE-128 floating point constants.
Compared to the last time I submitted this patch, I modified it so that it
uses the bit pattern of the vector to see if it can generate the LXVKQ
instruction. This means on a little endian Power<xxx> system, the
following code will generate a LXVKQ 34,16 instruction:
vector long long foo (void)
{
return (vector long long) { 0x0000000000000000, 0x8000000000000000 };
}
because that vector pattern is the same bit pattern as -0.0F128.
2021-10-14 Michael Meissner <meissner@the-meissners.org>
gcc/
* config/rs6000/constraints.md (eQ): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating the LXVKQ instruction.
(easy_vector_constant_ieee128): New predicate.
(easy_vector_constant): Add support for generating the LXVKQ
instruction.
* config/rs6000/rs6000-protos.h (rs6000_vec_concat): Add fields
for generating LXVKQ.
* config/rs6000/rs6000.c (output_vec_const_move): Add support for
generating LXVKQ.
(vec_const_use_lxvkq): New function.
* config/rs6000/rs6000.opt (-mlxvkq): New debug option.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
generating LXVKQ.
(vsx_mov<mode>_32bit): Likewise.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
eQ constraint.
gcc/testsuite/
* gcc.target/powerpc/float128-constant.c: New test.
Diff:
---
gcc/config/rs6000/constraints.md | 5 +
gcc/config/rs6000/predicates.md | 23 +++
gcc/config/rs6000/rs6000-protos.h | 2 +
gcc/config/rs6000/rs6000.c | 56 ++++++++
gcc/config/rs6000/rs6000.opt | 4 +
gcc/config/rs6000/vsx.md | 28 ++--
gcc/doc/md.texi | 3 +
.../gcc.target/powerpc/float128-constant.c | 160 +++++++++++++++++++++
8 files changed, 267 insertions(+), 14 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index d26c8940104..a15b659d9d7 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -218,6 +218,11 @@
"A signed 34-bit integer constant if prefixed instructions are supported."
(match_operand 0 "cint34_operand"))
+;; 128-bit IEEE 128-bit constant
+(define_constraint "eQ"
+ "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction."
+ (match_operand 0 "easy_vector_constant_ieee128"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d4b50276bac..21eff6c1ec2 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,9 @@
if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
+
if (vec_const_use_xxspltidp (&vec_const))
return true;
}
@@ -635,6 +638,23 @@
&& vec_const_use_xxspltidp (&vec_const));
})
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+ (match_code "const_vector,vec_duplicate,const_int,const_double")
+{
+ rs6000_vec_const vec_const;
+
+ /* Can we generate the LXVKQ instruction? */
+ if (!TARGET_LXVKQ || !TARGET_FLOAT128_HW || !TARGET_VSX)
+ return false;
+
+ /* Convert the vector constant to bytes. */
+ return (vec_const_to_bytes (op, mode, &vec_const)
+ && vec_const_use_lxvkq (&vec_const));
+})
+
;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
@@ -689,6 +709,9 @@
if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
+
if (vec_const_use_xxspltidp (&vec_const))
return true;
}
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index df4ae364bfb..43c0f96aab5 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -240,10 +240,12 @@ typedef struct {
unsigned char bytes[VECTOR_CONST_BYTES];
machine_mode orig_mode; /* Original mode. */
unsigned int xxspltidp_immediate; /* Immediate value for XXSPLTIDP. */
+ unsigned lxvkq_immediate; /* Immediate to use with LXVKQ. */
} rs6000_vec_const;
extern bool vec_const_to_bytes (rtx, machine_mode, rs6000_vec_const *);
extern bool vec_const_use_xxspltidp (rs6000_vec_const *);
+extern bool vec_const_use_lxvkq (rs6000_vec_const *);
#endif /* RTX_CODE */
#ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6d9359b6e88..326920bbad9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6993,6 +6993,12 @@ output_vec_const_move (rtx *operands)
rs6000_vec_const vec_const;
if (TARGET_POWER10 && vec_const_to_bytes (vec, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ {
+ operands[2] = GEN_INT (vec_const.lxvkq_immediate);
+ return "lxvkq %x0,%2";
+ }
+
if (vec_const_use_xxspltidp (&vec_const))
{
operands[2] = GEN_INT (vec_const.xxspltidp_immediate);
@@ -28787,6 +28793,56 @@ vec_const_use_xxspltidp (rs6000_vec_const *vec_const)
return true;
}
+/* Determine if a vector constant can be loaded with LXVKQ. If so, fill out
+ the fields used to generate the instruction. */
+
+bool
+vec_const_use_lxvkq (rs6000_vec_const *vec_const)
+{
+ unsigned immediate;
+
+ if (!TARGET_LXVKQ || !TARGET_PREFIXED || !TARGET_VSX)
+ return false;
+
+ /* Verify that all of the bottom 3 words in the constants loaded by the
+ LXVKQ instruction are zero. */
+ for (size_t i = 1; i < VECTOR_CONST_32BIT; i++)
+ if (vec_const->words[i] != 0)
+ return false;
+
+ /* See if we have a match. */
+ switch (vec_const->words[0])
+ {
+ case 0x3FFF0000U: immediate = 1; break; /* IEEE 128-bit +1.0. */
+ case 0x40000000U: immediate = 2; break; /* IEEE 128-bit +2.0. */
+ case 0x40008000U: immediate = 3; break; /* IEEE 128-bit +3.0. */
+ case 0x40010000U: immediate = 4; break; /* IEEE 128-bit +4.0. */
+ case 0x40014000U: immediate = 5; break; /* IEEE 128-bit +5.0. */
+ case 0x40018000U: immediate = 6; break; /* IEEE 128-bit +6.0. */
+ case 0x4001C000U: immediate = 7; break; /* IEEE 128-bit +7.0. */
+ case 0x7FFF0000U: immediate = 8; break; /* IEEE 128-bit +Infinity. */
+ case 0x7FFF8000U: immediate = 9; break; /* IEEE 128-bit quiet NaN. */
+ case 0x80000000U: immediate = 16; break; /* IEEE 128-bit -0.0. */
+ case 0xBFFF0000U: immediate = 17; break; /* IEEE 128-bit -1.0. */
+ case 0xC0000000U: immediate = 18; break; /* IEEE 128-bit -2.0. */
+ case 0xC0008000U: immediate = 19; break; /* IEEE 128-bit -3.0. */
+ case 0xC0010000U: immediate = 20; break; /* IEEE 128-bit -4.0. */
+ case 0xC0014000U: immediate = 21; break; /* IEEE 128-bit -5.0. */
+ case 0xC0018000U: immediate = 22; break; /* IEEE 128-bit -6.0. */
+ case 0xC001C000U: immediate = 23; break; /* IEEE 128-bit -7.0. */
+ case 0xFFFF0000U: immediate = 24; break; /* IEEE 128-bit -Infinity. */
+
+ /* anything else cannot be loaded. */
+ default:
+ return false;
+ }
+
+ /* We can use the LXVKQ instruction, record the immediate needed for the
+ instruction. */
+ vec_const->lxvkq_immediate = immediate;
+ return true;
+}
+
/* Convert a vector constant to an internal structure, breaking it out to
bytes, half words, words, and double words. Return true if we have
successfully broken it out. */
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 1d7ce4cc94a..c9eb78952d6 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -644,6 +644,10 @@ mxxspltidp
Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save
Generate (do not generate) XXSPLTIDP instructions.
+mlxvkq
+Target Undocumented Var(TARGET_LXVKQ) Init(1) Save
+Generate (do not generate) LXVKQ instructions.
+
-param=rs6000-density-pct-threshold=
Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 6be3376f5d1..baf2e78e7dc 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1192,19 +1192,19 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
-;; XXLSPLTIDP
+;; XXLSPLTIDP LXVKQ
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- wa,
+ wa, wa,
?wa, v, <??r>, wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- wD,
+ wD, wQ,
?jwM, W, <nW>, v, wZ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1216,46 +1216,46 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
vecsimple, *, *, vecstore, vecload")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, 5, 2, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *,
+ *, *,
*, 20, 8, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- p10,
+ p10, p10,
<VSisa>, *, *, *, *")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
;; XXSPLTIB VSPLTISW VSX 0/-1
-;; XXSPLTIDP
+;; XXSPLTIDP LXVKQ
;; VMX const GPR const
;; LVX (VMX) STVX (VMX)
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
wa, v, ?wa,
- wa,
+ wa, wa,
v, <??r>,
wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
wE, jwM, ?jwM,
- eD,
+ eD, eQ,
W, <nW>,
v, wZ"))]
@@ -1268,19 +1268,19 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
vecsimple, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
*, *,
vecstore, vecload")
(set_attr "length"
"*, *, *, 16, 16, 16,
*, *, *,
- *,
+ *, *,
20, 16,
*, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
p9v, *, <VSisa>,
- p10,
+ p10, p10,
*, *,
*, *")])
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index b9dfcaf0d44..501e0069ebb 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3339,6 +3339,9 @@ A constant that can be loaded with the XXSPLTIDP instruction.
@item eI
A signed 34-bit integer constant if prefixed instructions are supported.
+@item eQ
+A constant that can be loaded with the LXVKQ instruction.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..f6becac1075
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,160 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -mlxvkq -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+ constants. */
+
+_Float128
+return_0 (void)
+{
+ return 0.0f128; /* XXSPLTIB 34,0. */
+}
+
+_Float128
+return_1 (void)
+{
+ return 1.0f128; /* LXVKQ 34,1. */
+}
+
+_Float128
+return_2 (void)
+{
+ return 2.0f128; /* LXVKQ 34,2. */
+}
+
+_Float128
+return_3 (void)
+{
+ return 3.0f128; /* LXVKQ 34,3. */
+}
+
+_Float128
+return_4 (void)
+{
+ return 4.0f128; /* LXVKQ 34,4. */
+}
+
+_Float128
+return_5 (void)
+{
+ return 5.0f128; /* LXVKQ 34,5. */
+}
+
+_Float128
+return_6 (void)
+{
+ return 6.0f128; /* LXVKQ 34,6. */
+}
+
+_Float128
+return_7 (void)
+{
+ return 7.0f128; /* LXVKQ 34,7. */
+}
+
+_Float128
+return_m0 (void)
+{
+ return -0.0f128; /* LXVKQ 34,16. */
+}
+
+_Float128
+return_m1 (void)
+{
+ return -1.0f128; /* LXVKQ 34,17. */
+}
+
+_Float128
+return_m2 (void)
+{
+ return -2.0f128; /* LXVKQ 34,18. */
+}
+
+_Float128
+return_m3 (void)
+{
+ return -3.0f128; /* LXVKQ 34,19. */
+}
+
+_Float128
+return_m4 (void)
+{
+ return -4.0f128; /* LXVKQ 34,20. */
+}
+
+_Float128
+return_m5 (void)
+{
+ return -5.0f128; /* LXVKQ 34,21. */
+}
+
+_Float128
+return_m6 (void)
+{
+ return -6.0f128; /* LXVKQ 34,22. */
+}
+
+_Float128
+return_m7 (void)
+{
+ return -7.0f128; /* LXVKQ 34,23. */
+}
+
+_Float128
+return_inf (void)
+{
+ return __builtin_inff128 (); /* LXVKQ 34,8. */
+}
+
+_Float128
+return_minf (void)
+{
+ return - __builtin_inff128 (); /* LXVKQ 34,24. */
+}
+
+_Float128
+return_nan (void)
+{
+ return __builtin_nanf128 (""); /* LXVKQ 34,9. */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction. */
+_Float128
+return_mnan (void)
+{
+ return - __builtin_nanf128 (""); /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+ return __builtin_nanf128 ("1"); /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+ return __builtin_nansf128 (""); /* PLXV 34,... */
+}
+
+vector long long
+return_longlong_neg_0 (void)
+{
+ /* This vector is the same pattern as -0.0F128. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define FIRST 0x8000000000000000
+#define SECOND 0x0000000000000000
+
+#else
+#define FIRST 0x0000000000000000
+#define SECOND 0x8000000000000000
+#endif
+
+ return (vector long long) { FIRST, SECOND }; /* LXVKQ 34,16. */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M} 19 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */
+
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.
@ 2021-10-14 3:39 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2021-10-14 3:39 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:d67cab0e2d7eb2338d26c812d1ab0df3d15b2852
commit d67cab0e2d7eb2338d26c812d1ab0df3d15b2852
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Wed Oct 13 23:38:00 2021 -0400
Add LXVKQ support.
This patch adds support to generate the LXVKQ instruction to load specific
IEEE-128 floating point constants.
Compared to the last time I submitted this patch, I modified it so that it
uses the bit pattern of the vector to see if it can generate the LXVKQ
instruction. This means on a little endian Power<xxx> system, the
following code will generate a LXVKQ 34,16 instruction:
vector long long foo (void)
{
return (vector long long) { 0x0000000000000000, 0x8000000000000000 };
}
because that vector pattern is the same bit pattern as -0.0F128.
2021-10-13 Michael Meissner <meissner@the-meissners.org>
gcc/
* config/rs6000/constraints.md (eQ): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating the LXVKQ instruction.
(easy_vector_constant_ieee128): New predicate.
(easy_vector_constant): Add support for generating the LXVKQ
instruction.
* config/rs6000/rs6000-protos.h (rs6000_vec_concat): Add fields
for generating LXVKQ.
* config/rs6000/rs6000.c (output_vec_const_move): Add support for
generating LXVKQ.
(vec_const_use_lxvkq): New function.
* config/rs6000/rs6000.opt (-mlxvkq): New debug option.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
generating LXVKQ.
(vsx_mov<mode>_32bit): Likewise.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
eQ constraint.
gcc/testsuite/
* gcc.target/powerpc/float128-constant.c: New test.
Diff:
---
gcc/config/rs6000/constraints.md | 5 +
gcc/config/rs6000/predicates.md | 25 ++++
gcc/config/rs6000/rs6000-protos.h | 3 +
gcc/config/rs6000/rs6000.c | 57 ++++++++
gcc/config/rs6000/rs6000.opt | 4 +
gcc/config/rs6000/vsx.md | 32 +++--
gcc/doc/md.texi | 3 +
.../gcc.target/powerpc/float128-constant.c | 160 +++++++++++++++++++++
8 files changed, 277 insertions(+), 12 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index d26c8940104..a15b659d9d7 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -218,6 +218,11 @@
"A signed 34-bit integer constant if prefixed instructions are supported."
(match_operand 0 "cint34_operand"))
+;; 128-bit IEEE 128-bit constant
+(define_constraint "eQ"
+ "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction."
+ (match_operand 0 "easy_vector_constant_ieee128"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index ddad7ca3ae9..2c9c0a29845 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -609,6 +609,9 @@
{
if (vec_const_use_xxspltidp (&vec_const))
return true;
+
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
}
/* Otherwise consider floating point constants hard, so that the
@@ -639,6 +642,25 @@
return vec_const_use_xxspltidp (&vec_const);
})
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+ (match_code "const_vector,vec_duplicate,const_int,const_double")
+{
+ rs6000_vec_const vec_const;
+
+ /* Can we do the LXVKQ instruction? */
+ if (!TARGET_LXVKQ || !TARGET_PREFIXED || !TARGET_VSX)
+ return false;
+
+ /* Convert the vector constant to bytes. */
+ if (!vec_const_to_bytes (op, mode, &vec_const))
+ return false;
+
+ return vec_const_use_lxvkq (&vec_const);
+})
+
;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
@@ -695,6 +717,9 @@
{
if (vec_const_use_xxspltidp (&vec_const))
return true;
+
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
}
return easy_altivec_constant (op, mode);
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index da9502bcb33..388fe18e314 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -243,11 +243,14 @@ typedef struct {
bool is_xxspltidp; /* Use XXSPLTIDP to load constant. */
machine_mode xxspltidp_mode; /* Mode to use for XXSPLTIDP. */
unsigned int xxspltidp_immediate; /* Immediate value for XXSPLTIDP. */
+ bool is_lxvkq; /* LXVKQ can load the constant. */
+ unsigned lxvkq_immediate; /* Immediate to use with LXVKQ. */
bool is_prefixed; /* Prefixed instruction used. */
} rs6000_vec_const;
extern bool vec_const_to_bytes (rtx, machine_mode, rs6000_vec_const *);
extern bool vec_const_use_xxspltidp (rs6000_vec_const *);
+extern bool vec_const_use_lxvkq (rs6000_vec_const *);
#endif /* RTX_CODE */
#ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 05b2691d38a..2a038ea7dea 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6993,6 +6993,12 @@ output_vec_const_move (rtx *operands)
rs6000_vec_const vec_const;
if (vec_const_to_bytes (vec, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ {
+ operands[2] = GEN_INT (vec_const.lxvkq_immediate);
+ return "lxvkq %x0,%2";
+ }
+
if (vec_const_use_xxspltidp (&vec_const))
{
operands[2] = GEN_INT (vec_const.xxspltidp_immediate);
@@ -28786,6 +28792,57 @@ vec_const_use_xxspltidp (rs6000_vec_const *vec_const)
return true;
}
+/* Determine if a vector constant can be loaded with LXVKQ. If so, fill out
+ the fields used to generate the instruction. */
+
+bool
+vec_const_use_lxvkq (rs6000_vec_const *vec_const)
+{
+ unsigned immediate;
+
+ if (!TARGET_LXVKQ || !TARGET_PREFIXED || !TARGET_VSX)
+ return false;
+
+ /* Verify that all of the bottom 3 words in the constants loaded by the
+ LXVKQ instruction are zero. */
+ for (size_t i = 1; i < VECTOR_CONST_32BIT; i++)
+ if (vec_const->words[i] != 0)
+ return false;
+
+ /* See if we have a match. */
+ switch (vec_const->words[0])
+ {
+ case 0x3FFF0000U: immediate = 1; break; /* IEEE 128-bit +1.0. */
+ case 0x40000000U: immediate = 2; break; /* IEEE 128-bit +2.0. */
+ case 0x40008000U: immediate = 3; break; /* IEEE 128-bit +3.0. */
+ case 0x40010000U: immediate = 4; break; /* IEEE 128-bit +4.0. */
+ case 0x40014000U: immediate = 5; break; /* IEEE 128-bit +5.0. */
+ case 0x40018000U: immediate = 6; break; /* IEEE 128-bit +6.0. */
+ case 0x4001C000U: immediate = 7; break; /* IEEE 128-bit +7.0. */
+ case 0x7FFF0000U: immediate = 8; break; /* IEEE 128-bit +Infinity. */
+ case 0x7FFF8000U: immediate = 9; break; /* IEEE 128-bit quiet NaN. */
+ case 0x80000000U: immediate = 16; break; /* IEEE 128-bit -0.0. */
+ case 0xBFFF0000U: immediate = 17; break; /* IEEE 128-bit -1.0. */
+ case 0xC0000000U: immediate = 18; break; /* IEEE 128-bit -2.0. */
+ case 0xC0008000U: immediate = 19; break; /* IEEE 128-bit -3.0. */
+ case 0xC0010000U: immediate = 20; break; /* IEEE 128-bit -4.0. */
+ case 0xC0014000U: immediate = 21; break; /* IEEE 128-bit -5.0. */
+ case 0xC0018000U: immediate = 22; break; /* IEEE 128-bit -6.0. */
+ case 0xC001C000U: immediate = 23; break; /* IEEE 128-bit -7.0. */
+ case 0xFFFF0000U: immediate = 24; break; /* IEEE 128-bit -Infinity. */
+
+ /* anything else cannot be loaded. */
+ default:
+ return false;
+ }
+
+ /* We can use the LXVKQ instruction. */
+ vec_const->lxvkq_immediate = immediate;
+ vec_const->is_lxvkq = true;
+ vec_const->is_prefixed = false;
+ return true;
+}
+
/* Convert a vector constant to an internal structure, breaking it out to
bytes, half words, words, and double words. Return true if we have
successfully broken it out. */
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 1d7ce4cc94a..c9eb78952d6 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -644,6 +644,10 @@ mxxspltidp
Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save
Generate (do not generate) XXSPLTIDP instructions.
+mlxvkq
+Target Undocumented Var(TARGET_LXVKQ) Init(1) Save
+Generate (do not generate) LXVKQ instructions.
+
-param=rs6000-density-pct-threshold=
Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7b2d2551c7b..eddbf395e77 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1193,16 +1193,19 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) XXLSPLTIDP
+;; LXVKQ
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- ?wa, v, <??r>, wZ, v, wa")
+ ?wa, v, <??r>, wZ, v, wa,
+ wa")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- ?jwM, W, <nW>, v, wZ, eD"))]
+ ?jwM, W, <nW>, v, wZ, eD,
+ eQ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
&& (register_operand (operands[0], <MODE>mode)
@@ -1213,23 +1216,28 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecsimple, *, *, vecstore, vecload, vecperm")
+ vecsimple, *, *, vecstore, vecload, vecperm,
+ vecperm")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *, 5, 2, *, *, *")
+ *, 5, 2, *, *, *,
+ *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *, *, *, *, *, *")
+ *, *, *, *, *, *,
+ *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *, 20, 8, *, *, *")
+ *, 20, 8, *, *, *,
+ *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- <VSisa>, *, *, *, *, p10")])
+ <VSisa>, *, *, *, *, p10,
+ p10")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
@@ -1238,12 +1246,12 @@
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
wa, v, ?wa, v, <??r>,
- wZ, v, wa")
+ wZ, v, wa, wa")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
wE, jwM, ?jwM, W, <nW>,
- v, wZ, eD"))]
+ v, wZ, eD, eQ"))]
"!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
&& (register_operand (operands[0], <MODE>mode)
@@ -1254,15 +1262,15 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
vecsimple, vecsimple, vecsimple, *, *,
- vecstore, vecload, vecperm")
+ vecstore, vecload, vecperm, vecperm")
(set_attr "length"
"*, *, *, 16, 16, 16,
*, *, *, 20, 16,
- *, *, *")
+ *, *, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
p9v, *, <VSisa>, *, *,
- *, *, p10")])
+ *, *, p10, p10")])
;; Explicit load/store expanders for the builtin functions
(define_expand "vsx_load_<mode>"
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index b9dfcaf0d44..501e0069ebb 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3339,6 +3339,9 @@ A constant that can be loaded with the XXSPLTIDP instruction.
@item eI
A signed 34-bit integer constant if prefixed instructions are supported.
+@item eQ
+A constant that can be loaded with the LXVKQ instruction.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..f6becac1075
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,160 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -mlxvkq -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+ constants. */
+
+_Float128
+return_0 (void)
+{
+ return 0.0f128; /* XXSPLTIB 34,0. */
+}
+
+_Float128
+return_1 (void)
+{
+ return 1.0f128; /* LXVKQ 34,1. */
+}
+
+_Float128
+return_2 (void)
+{
+ return 2.0f128; /* LXVKQ 34,2. */
+}
+
+_Float128
+return_3 (void)
+{
+ return 3.0f128; /* LXVKQ 34,3. */
+}
+
+_Float128
+return_4 (void)
+{
+ return 4.0f128; /* LXVKQ 34,4. */
+}
+
+_Float128
+return_5 (void)
+{
+ return 5.0f128; /* LXVKQ 34,5. */
+}
+
+_Float128
+return_6 (void)
+{
+ return 6.0f128; /* LXVKQ 34,6. */
+}
+
+_Float128
+return_7 (void)
+{
+ return 7.0f128; /* LXVKQ 34,7. */
+}
+
+_Float128
+return_m0 (void)
+{
+ return -0.0f128; /* LXVKQ 34,16. */
+}
+
+_Float128
+return_m1 (void)
+{
+ return -1.0f128; /* LXVKQ 34,17. */
+}
+
+_Float128
+return_m2 (void)
+{
+ return -2.0f128; /* LXVKQ 34,18. */
+}
+
+_Float128
+return_m3 (void)
+{
+ return -3.0f128; /* LXVKQ 34,19. */
+}
+
+_Float128
+return_m4 (void)
+{
+ return -4.0f128; /* LXVKQ 34,20. */
+}
+
+_Float128
+return_m5 (void)
+{
+ return -5.0f128; /* LXVKQ 34,21. */
+}
+
+_Float128
+return_m6 (void)
+{
+ return -6.0f128; /* LXVKQ 34,22. */
+}
+
+_Float128
+return_m7 (void)
+{
+ return -7.0f128; /* LXVKQ 34,23. */
+}
+
+_Float128
+return_inf (void)
+{
+ return __builtin_inff128 (); /* LXVKQ 34,8. */
+}
+
+_Float128
+return_minf (void)
+{
+ return - __builtin_inff128 (); /* LXVKQ 34,24. */
+}
+
+_Float128
+return_nan (void)
+{
+ return __builtin_nanf128 (""); /* LXVKQ 34,9. */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction. */
+_Float128
+return_mnan (void)
+{
+ return - __builtin_nanf128 (""); /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+ return __builtin_nanf128 ("1"); /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+ return __builtin_nansf128 (""); /* PLXV 34,... */
+}
+
+vector long long
+return_longlong_neg_0 (void)
+{
+ /* This vector is the same pattern as -0.0F128. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define FIRST 0x8000000000000000
+#define SECOND 0x0000000000000000
+
+#else
+#define FIRST 0x0000000000000000
+#define SECOND 0x8000000000000000
+#endif
+
+ return (vector long long) { FIRST, SECOND }; /* LXVKQ 34,16. */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M} 19 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */
+
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.
@ 2021-10-14 1:56 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2021-10-14 1:56 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:15d4edf91934579e414d52e5d6a1131af626645c
commit 15d4edf91934579e414d52e5d6a1131af626645c
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Wed Oct 13 21:56:31 2021 -0400
Add LXVKQ support.
This patch adds the basic support for generating the LXVKQ instruction.
2021-10-13 Michael Meissner <meissner@the-meissners.org>
gcc/
* config/rs6000/constraints.md (eQ): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating the LXVKQ instruction.
(easy_vector_constant_ieee128): New predicate.
(easy_vector_constant): Add support for generating the LXVKQ
instruction.
* config/rs6000/rs6000-protos.h (rs6000_vec_concat): Add fields
for generating LXVKQ.
* config/rs6000/rs6000.c (output_vec_const_move): Add support for
generating LXVKQ.
(vec_const_use_lxvkq): New function.
* config/rs6000/rs6000.opt (-mlxvkq): New debug option.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
generating LXVKQ.
(vsx_mov<mode>_32bit): Likewise.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
eQ constraint.
gcc/testsuite/
* gcc.target/powerpc/float128-constant.c: New test.
Diff:
---
gcc/config/rs6000/constraints.md | 5 +
gcc/config/rs6000/predicates.md | 25 ++++
gcc/config/rs6000/rs6000-protos.h | 3 +
gcc/config/rs6000/rs6000.c | 57 ++++++++
gcc/config/rs6000/rs6000.opt | 4 +
gcc/config/rs6000/vsx.md | 32 +++--
gcc/doc/md.texi | 3 +
.../gcc.target/powerpc/float128-constant.c | 144 +++++++++++++++++++++
8 files changed, 261 insertions(+), 12 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index d26c8940104..a15b659d9d7 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -218,6 +218,11 @@
"A signed 34-bit integer constant if prefixed instructions are supported."
(match_operand 0 "cint34_operand"))
+;; 128-bit IEEE 128-bit constant
+(define_constraint "eQ"
+ "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction."
+ (match_operand 0 "easy_vector_constant_ieee128"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index ddad7ca3ae9..2c9c0a29845 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -609,6 +609,9 @@
{
if (vec_const_use_xxspltidp (&vec_const))
return true;
+
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
}
/* Otherwise consider floating point constants hard, so that the
@@ -639,6 +642,25 @@
return vec_const_use_xxspltidp (&vec_const);
})
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+ (match_code "const_vector,vec_duplicate,const_int,const_double")
+{
+ rs6000_vec_const vec_const;
+
+ /* Can we do the LXVKQ instruction? */
+ if (!TARGET_LXVKQ || !TARGET_PREFIXED || !TARGET_VSX)
+ return false;
+
+ /* Convert the vector constant to bytes. */
+ if (!vec_const_to_bytes (op, mode, &vec_const))
+ return false;
+
+ return vec_const_use_lxvkq (&vec_const);
+})
+
;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
@@ -695,6 +717,9 @@
{
if (vec_const_use_xxspltidp (&vec_const))
return true;
+
+ if (vec_const_use_lxvkq (&vec_const))
+ return true;
}
return easy_altivec_constant (op, mode);
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index da9502bcb33..388fe18e314 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -243,11 +243,14 @@ typedef struct {
bool is_xxspltidp; /* Use XXSPLTIDP to load constant. */
machine_mode xxspltidp_mode; /* Mode to use for XXSPLTIDP. */
unsigned int xxspltidp_immediate; /* Immediate value for XXSPLTIDP. */
+ bool is_lxvkq; /* LXVKQ can load the constant. */
+ unsigned lxvkq_immediate; /* Immediate to use with LXVKQ. */
bool is_prefixed; /* Prefixed instruction used. */
} rs6000_vec_const;
extern bool vec_const_to_bytes (rtx, machine_mode, rs6000_vec_const *);
extern bool vec_const_use_xxspltidp (rs6000_vec_const *);
+extern bool vec_const_use_lxvkq (rs6000_vec_const *);
#endif /* RTX_CODE */
#ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 05b2691d38a..2a038ea7dea 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6993,6 +6993,12 @@ output_vec_const_move (rtx *operands)
rs6000_vec_const vec_const;
if (vec_const_to_bytes (vec, mode, &vec_const))
{
+ if (vec_const_use_lxvkq (&vec_const))
+ {
+ operands[2] = GEN_INT (vec_const.lxvkq_immediate);
+ return "lxvkq %x0,%2";
+ }
+
if (vec_const_use_xxspltidp (&vec_const))
{
operands[2] = GEN_INT (vec_const.xxspltidp_immediate);
@@ -28786,6 +28792,57 @@ vec_const_use_xxspltidp (rs6000_vec_const *vec_const)
return true;
}
+/* Determine if a vector constant can be loaded with LXVKQ. If so, fill out
+ the fields used to generate the instruction. */
+
+bool
+vec_const_use_lxvkq (rs6000_vec_const *vec_const)
+{
+ unsigned immediate;
+
+ if (!TARGET_LXVKQ || !TARGET_PREFIXED || !TARGET_VSX)
+ return false;
+
+ /* Verify that all of the bottom 3 words in the constants loaded by the
+ LXVKQ instruction are zero. */
+ for (size_t i = 1; i < VECTOR_CONST_32BIT; i++)
+ if (vec_const->words[i] != 0)
+ return false;
+
+ /* See if we have a match. */
+ switch (vec_const->words[0])
+ {
+ case 0x3FFF0000U: immediate = 1; break; /* IEEE 128-bit +1.0. */
+ case 0x40000000U: immediate = 2; break; /* IEEE 128-bit +2.0. */
+ case 0x40008000U: immediate = 3; break; /* IEEE 128-bit +3.0. */
+ case 0x40010000U: immediate = 4; break; /* IEEE 128-bit +4.0. */
+ case 0x40014000U: immediate = 5; break; /* IEEE 128-bit +5.0. */
+ case 0x40018000U: immediate = 6; break; /* IEEE 128-bit +6.0. */
+ case 0x4001C000U: immediate = 7; break; /* IEEE 128-bit +7.0. */
+ case 0x7FFF0000U: immediate = 8; break; /* IEEE 128-bit +Infinity. */
+ case 0x7FFF8000U: immediate = 9; break; /* IEEE 128-bit quiet NaN. */
+ case 0x80000000U: immediate = 16; break; /* IEEE 128-bit -0.0. */
+ case 0xBFFF0000U: immediate = 17; break; /* IEEE 128-bit -1.0. */
+ case 0xC0000000U: immediate = 18; break; /* IEEE 128-bit -2.0. */
+ case 0xC0008000U: immediate = 19; break; /* IEEE 128-bit -3.0. */
+ case 0xC0010000U: immediate = 20; break; /* IEEE 128-bit -4.0. */
+ case 0xC0014000U: immediate = 21; break; /* IEEE 128-bit -5.0. */
+ case 0xC0018000U: immediate = 22; break; /* IEEE 128-bit -6.0. */
+ case 0xC001C000U: immediate = 23; break; /* IEEE 128-bit -7.0. */
+ case 0xFFFF0000U: immediate = 24; break; /* IEEE 128-bit -Infinity. */
+
+ /* anything else cannot be loaded. */
+ default:
+ return false;
+ }
+
+ /* We can use the LXVKQ instruction. */
+ vec_const->lxvkq_immediate = immediate;
+ vec_const->is_lxvkq = true;
+ vec_const->is_prefixed = false;
+ return true;
+}
+
/* Convert a vector constant to an internal structure, breaking it out to
bytes, half words, words, and double words. Return true if we have
successfully broken it out. */
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 1d7ce4cc94a..c9eb78952d6 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -644,6 +644,10 @@ mxxspltidp
Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save
Generate (do not generate) XXSPLTIDP instructions.
+mlxvkq
+Target Undocumented Var(TARGET_LXVKQ) Init(1) Save
+Generate (do not generate) LXVKQ instructions.
+
-param=rs6000-density-pct-threshold=
Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7b2d2551c7b..eddbf395e77 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1193,16 +1193,19 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) XXLSPLTIDP
+;; LXVKQ
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- ?wa, v, <??r>, wZ, v, wa")
+ ?wa, v, <??r>, wZ, v, wa,
+ wa")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- ?jwM, W, <nW>, v, wZ, eD"))]
+ ?jwM, W, <nW>, v, wZ, eD,
+ eQ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
&& (register_operand (operands[0], <MODE>mode)
@@ -1213,23 +1216,28 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecsimple, *, *, vecstore, vecload, vecperm")
+ vecsimple, *, *, vecstore, vecload, vecperm,
+ vecperm")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *, 5, 2, *, *, *")
+ *, 5, 2, *, *, *,
+ *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *, *, *, *, *, *")
+ *, *, *, *, *, *,
+ *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *, 20, 8, *, *, *")
+ *, 20, 8, *, *, *,
+ *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- <VSisa>, *, *, *, *, p10")])
+ <VSisa>, *, *, *, *, p10,
+ p10")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
@@ -1238,12 +1246,12 @@
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
wa, v, ?wa, v, <??r>,
- wZ, v, wa")
+ wZ, v, wa, wa")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
wE, jwM, ?jwM, W, <nW>,
- v, wZ, eD"))]
+ v, wZ, eD, eQ"))]
"!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
&& (register_operand (operands[0], <MODE>mode)
@@ -1254,15 +1262,15 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
vecsimple, vecsimple, vecsimple, *, *,
- vecstore, vecload, vecperm")
+ vecstore, vecload, vecperm, vecperm")
(set_attr "length"
"*, *, *, 16, 16, 16,
*, *, *, 20, 16,
- *, *, *")
+ *, *, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
p9v, *, <VSisa>, *, *,
- *, *, p10")])
+ *, *, p10, p10")])
;; Explicit load/store expanders for the builtin functions
(define_expand "vsx_load_<mode>"
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index b9dfcaf0d44..501e0069ebb 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3339,6 +3339,9 @@ A constant that can be loaded with the XXSPLTIDP instruction.
@item eI
A signed 34-bit integer constant if prefixed instructions are supported.
+@item eQ
+A constant that can be loaded with the LXVKQ instruction.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..23ee7e85d84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,144 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -mlxvkq -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+ constants. */
+
+_Float128
+return_0 (void)
+{
+ return 0.0f128; /* XXSPLTIB 34,0. */
+}
+
+_Float128
+return_1 (void)
+{
+ return 1.0f128; /* LXVKQ 34,1. */
+}
+
+_Float128
+return_2 (void)
+{
+ return 2.0f128; /* LXVKQ 34,2. */
+}
+
+_Float128
+return_3 (void)
+{
+ return 3.0f128; /* LXVKQ 34,3. */
+}
+
+_Float128
+return_4 (void)
+{
+ return 4.0f128; /* LXVKQ 34,4. */
+}
+
+_Float128
+return_5 (void)
+{
+ return 5.0f128; /* LXVKQ 34,5. */
+}
+
+_Float128
+return_6 (void)
+{
+ return 6.0f128; /* LXVKQ 34,6. */
+}
+
+_Float128
+return_7 (void)
+{
+ return 7.0f128; /* LXVKQ 34,7. */
+}
+
+_Float128
+return_m0 (void)
+{
+ return -0.0f128; /* LXVKQ 34,16. */
+}
+
+_Float128
+return_m1 (void)
+{
+ return -1.0f128; /* LXVKQ 34,17. */
+}
+
+_Float128
+return_m2 (void)
+{
+ return -2.0f128; /* LXVKQ 34,18. */
+}
+
+_Float128
+return_m3 (void)
+{
+ return -3.0f128; /* LXVKQ 34,19. */
+}
+
+_Float128
+return_m4 (void)
+{
+ return -4.0f128; /* LXVKQ 34,20. */
+}
+
+_Float128
+return_m5 (void)
+{
+ return -5.0f128; /* LXVKQ 34,21. */
+}
+
+_Float128
+return_m6 (void)
+{
+ return -6.0f128; /* LXVKQ 34,22. */
+}
+
+_Float128
+return_m7 (void)
+{
+ return -7.0f128; /* LXVKQ 34,23. */
+}
+
+_Float128
+return_inf (void)
+{
+ return __builtin_inff128 (); /* LXVKQ 34,8. */
+}
+
+_Float128
+return_minf (void)
+{
+ return - __builtin_inff128 (); /* LXVKQ 34,24. */
+}
+
+_Float128
+return_nan (void)
+{
+ return __builtin_nanf128 (""); /* LXVKQ 34,9. */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction. */
+_Float128
+return_mnan (void)
+{
+ return - __builtin_nanf128 (""); /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+ return __builtin_nanf128 ("1"); /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+ return __builtin_nansf128 (""); /* PLXV 34,... */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M} 18 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */
+
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2021-10-21 2:54 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-18 19:12 [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support Michael Meissner
-- strict thread matches above, loose matches on Subject: below --
2021-10-21 2:54 Michael Meissner
2021-10-21 2:39 Michael Meissner
2021-10-21 2:20 Michael Meissner
2021-10-15 3:39 Michael Meissner
2021-10-14 16:51 Michael Meissner
2021-10-14 16:50 Michael Meissner
2021-10-14 16:46 Michael Meissner
2021-10-14 15:32 Michael Meissner
2021-10-14 3:39 Michael Meissner
2021-10-14 1:56 Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).