public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work081)] Optimize extendditi2 GPR to VSX register.
@ 2022-03-12 6:36 Michael Meissner
0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2022-03-12 6:36 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:d4ff248ef1e0788ca0e9ccfceb0d094ac584e2c8
commit d4ff248ef1e0788ca0e9ccfceb0d094ac584e2c8
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Sat Mar 12 01:35:42 2022 -0500
Optimize extendditi2 GPR to VSX register.
2022-03-12 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/vsx.md (extendditi2): Optimize extendditi2 GPR to
VSX register.
Diff:
---
gcc/config/rs6000/vsx.md | 82 +++++++++++++++++++++++++-----------------------
1 file changed, 43 insertions(+), 39 deletions(-)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 25ef1702f49..c9d94032580 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5035,11 +5035,12 @@
;; We also need the GPR code for power9 so that we can optimize to use the
;; multiply-add instructions.
(define_insn_and_split "extendditi2"
- [(set (match_operand:TI 0 "register_operand" "=r,r,wa,v,v")
- (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,b, v,Z")))
- (clobber (match_scratch:DI 2 "=&X,X,r, X,X"))
+ [(set (match_operand:TI 0 "register_operand" "=r, wa, v, r, v")
+ (sign_extend:TI
+ (match_operand:DI 1 "input_operand" "r, b, v, m, Z")))
+ (clobber (match_scratch:DI 2 "=&X, r, X, X, X"))
(clobber (reg:DI CA_REGNO))]
- "TARGET_POWERPC64 && TARGET_MADDLD"
+ "TARGET_POWERPC64 && TARGET_P9_VECTOR"
"#"
"&& reload_completed"
[(pc)]
@@ -5047,27 +5048,10 @@
rtx dest = operands[0];
rtx src = operands[1];
int dest_regno = reg_or_subregno (dest);
- int src_regno = ((REG_P (src) || SUBREG_P (src))
- ? reg_or_subregno (src)
- : -1);
-
- /* If we are converting from a GPR to a vector register, do the sign
- extension in a scratch GPR register, and then do the mtvsrdd. */
- if (VSX_REGNO_P (dest_regno) && INT_REGNO_P (src_regno))
- {
- rtx tmp = operands[2];
- rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
- emit_insn (gen_ashrdi3 (tmp, src, GEN_INT (63)));
- if (BYTES_BIG_ENDIAN)
- emit_insn (gen_vsx_concat_v2di (dest_v2di, tmp, src));
- else
- emit_insn (gen_vsx_concat_v2di (dest_v2di, src, tmp));
- DONE;
- }
/* Handle conversion to GPR registers. Load up the low part and then do
a sign extension to the upper part. */
- else if (INT_REGNO_P (dest_regno))
+ if (INT_REGNO_P (dest_regno))
{
rtx dest_hi = gen_highpart (DImode, dest);
rtx dest_lo = gen_lowpart (DImode, dest);
@@ -5075,36 +5059,56 @@
emit_move_insn (dest_lo, src);
/* In case src is a MEM, we have to use the destination, which is a
register, instead of re-using the source. */
- rtx src2 = (REG_P (src) || SUBREG_P (src)) ? src : dest_lo;
+ rtx src2 = int_reg_operand (src, DImode) ? src : dest_lo;
emit_insn (gen_ashrdi3 (dest_hi, src2, GEN_INT (63)));
DONE;
}
- /* For conversion to an Altivec register, generate either a splat operation
- or a load rightmost double word instruction. Both instructions gets the
- DImode value into the lower 64 bits, and then do the vextsd2q
- instruction. */
-
- else if (ALTIVEC_REGNO_P (dest_regno))
+ /* For memory, use lxvrdx to load the value into the bottom of the
+ register and do the sign extension. */
+ else if (ALTIVEC_REGNO_P (dest_regno) && MEM_P (src))
{
- if (MEM_P (src))
- emit_insn (gen_vsx_lxvrdx (dest, src));
- else
+ emit_insn (gen_vsx_lxvrdx (dest, src));
+ emit_insn (gen_extendditi2_vector (dest, dest));
+ DONE;
+ }
+
+ else
+ {
+ int src_regno = reg_or_subregno (src);
+
+ /* If we are converting from a GPR to a vector register, do the
+ sign extension in a scratch GPR register, and then do the
+ mtvsrdd. */
+ if (VSX_REGNO_P (dest_regno) && INT_REGNO_P (src_regno))
+ {
+ rtx tmp = operands[2];
+ rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
+ emit_insn (gen_ashrdi3 (tmp, src, GEN_INT (63)));
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_vsx_concat_v2di (dest_v2di, tmp, src));
+ else
+ emit_insn (gen_vsx_concat_v2di (dest_v2di, src, tmp));
+ DONE;
+ }
+
+ /* For conversion to an Altivec register, generate a splat operation to
+ to get the value in the bottom 64-bits. */
+ else if (ALTIVEC_REGNO_P (dest_regno) && ALTIVEC_REGNO_P (src_regno))
{
rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
emit_insn (gen_vsx_splat_v2di (dest_v2di, src));
+ emit_insn (gen_extendditi2_vector (dest, dest));
+ DONE;
}
- emit_insn (gen_extendditi2_vector (dest, dest));
- DONE;
+ else
+ gcc_unreachable ();
}
-
- else
- gcc_unreachable ();
}
[(set_attr "length" "8")
- (set_attr "type" "shift,load,mtvsr,vecperm,load")
- (set_attr "isa" "p9,p9,p9,p10,p10")])
+ (set_attr "type" "shift,mtvsr,vecperm,load,vecload")
+ (set_attr "isa" "*, *, p10, *, p10")])
;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
(define_insn "extendditi2_vector"
^ permalink raw reply [flat|nested] 2+ messages in thread
* [gcc(refs/users/meissner/heads/work081)] Optimize extendditi2 GPR to VSX register.
@ 2022-03-15 21:15 Michael Meissner
0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2022-03-15 21:15 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:fad5716a6579578052024626445b6a645a1c8a76
commit fad5716a6579578052024626445b6a645a1c8a76
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Mar 15 17:14:42 2022 -0400
Optimize extendditi2 GPR to VSX register.
2022-03-15 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/vsx.md (extendditi2): Optimize extendditi2 GPR to
VSX register.
gcc/testsuite/
* gcc.target/powerpc/pr104698-2.c: Update instruction counts.
Diff:
---
gcc/config/rs6000/vsx.md | 84 ++++++++++++++-------------
gcc/testsuite/gcc.target/powerpc/pr104698-2.c | 34 +++++++----
2 files changed, 68 insertions(+), 50 deletions(-)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 25ef1702f49..2fab6843c5f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5035,11 +5035,12 @@
;; We also need the GPR code for power9 so that we can optimize to use the
;; multiply-add instructions.
(define_insn_and_split "extendditi2"
- [(set (match_operand:TI 0 "register_operand" "=r,r,wa,v,v")
- (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,b, v,Z")))
- (clobber (match_scratch:DI 2 "=&X,X,r, X,X"))
+ [(set (match_operand:TI 0 "register_operand" "=r, wa, v, r, v")
+ (sign_extend:TI
+ (match_operand:DI 1 "input_operand" "r, b, v, m, Z")))
+ (clobber (match_scratch:DI 2 "=X, &b, X, X, X"))
(clobber (reg:DI CA_REGNO))]
- "TARGET_POWERPC64 && TARGET_MADDLD"
+ "TARGET_POWERPC64 && TARGET_P9_VECTOR"
"#"
"&& reload_completed"
[(pc)]
@@ -5047,27 +5048,10 @@
rtx dest = operands[0];
rtx src = operands[1];
int dest_regno = reg_or_subregno (dest);
- int src_regno = ((REG_P (src) || SUBREG_P (src))
- ? reg_or_subregno (src)
- : -1);
-
- /* If we are converting from a GPR to a vector register, do the sign
- extension in a scratch GPR register, and then do the mtvsrdd. */
- if (VSX_REGNO_P (dest_regno) && INT_REGNO_P (src_regno))
- {
- rtx tmp = operands[2];
- rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
- emit_insn (gen_ashrdi3 (tmp, src, GEN_INT (63)));
- if (BYTES_BIG_ENDIAN)
- emit_insn (gen_vsx_concat_v2di (dest_v2di, tmp, src));
- else
- emit_insn (gen_vsx_concat_v2di (dest_v2di, src, tmp));
- DONE;
- }
/* Handle conversion to GPR registers. Load up the low part and then do
a sign extension to the upper part. */
- else if (INT_REGNO_P (dest_regno))
+ if (INT_REGNO_P (dest_regno))
{
rtx dest_hi = gen_highpart (DImode, dest);
rtx dest_lo = gen_lowpart (DImode, dest);
@@ -5075,36 +5059,58 @@
emit_move_insn (dest_lo, src);
/* In case src is a MEM, we have to use the destination, which is a
register, instead of re-using the source. */
- rtx src2 = (REG_P (src) || SUBREG_P (src)) ? src : dest_lo;
+ rtx src2 = int_reg_operand (src, DImode) ? src : dest_lo;
emit_insn (gen_ashrdi3 (dest_hi, src2, GEN_INT (63)));
DONE;
}
- /* For conversion to an Altivec register, generate either a splat operation
- or a load rightmost double word instruction. Both instructions gets the
- DImode value into the lower 64 bits, and then do the vextsd2q
- instruction. */
-
- else if (ALTIVEC_REGNO_P (dest_regno))
+ /* For memory, use lxvrdx to load the value into the bottom of the
+ register and do the sign extension. */
+ else if (ALTIVEC_REGNO_P (dest_regno) && MEM_P (src))
{
- if (MEM_P (src))
- emit_insn (gen_vsx_lxvrdx (dest, src));
- else
+ emit_insn (gen_vsx_lxvrdx (dest, src));
+ emit_insn (gen_extendditi2_vector (dest, dest));
+ DONE;
+ }
+
+ else
+ {
+ int src_regno = reg_or_subregno (src);
+
+ /* If we are converting from a GPR to a vector register, do the
+ sign extension in a scratch GPR register, and then do the
+ mtvsrdd. Note, the vsx_concat_v2di function will reverse
+ the arguments on little endian systems, so we need to build
+ the insn so the appropriate registers are generated. */
+ if (VSX_REGNO_P (dest_regno) && INT_REGNO_P (src_regno))
+ {
+ rtx tmp = operands[2];
+ rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
+ emit_insn (gen_ashrdi3 (tmp, src, GEN_INT (63)));
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_vsx_concat_v2di (dest_v2di, tmp, src));
+ else
+ emit_insn (gen_vsx_concat_v2di (dest_v2di, src, tmp));
+ DONE;
+ }
+
+ /* For conversion to an Altivec register, generate a splat operation to
+ to get the value in the bottom 64-bits. */
+ else if (ALTIVEC_REGNO_P (dest_regno) && ALTIVEC_REGNO_P (src_regno))
{
rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
emit_insn (gen_vsx_splat_v2di (dest_v2di, src));
+ emit_insn (gen_extendditi2_vector (dest, dest));
+ DONE;
}
- emit_insn (gen_extendditi2_vector (dest, dest));
- DONE;
+ else
+ gcc_unreachable ();
}
-
- else
- gcc_unreachable ();
}
[(set_attr "length" "8")
- (set_attr "type" "shift,load,mtvsr,vecperm,load")
- (set_attr "isa" "p9,p9,p9,p10,p10")])
+ (set_attr "type" "shift,mtvsr,vecperm,load,vecload")
+ (set_attr "isa" "*, *, p10, *, p10")])
;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
(define_insn "extendditi2_vector"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr104698-2.c b/gcc/testsuite/gcc.target/powerpc/pr104698-2.c
index 6966fce2ba9..10132d10e8e 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr104698-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr104698-2.c
@@ -3,31 +3,43 @@
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
/* PR target/104694 involved GCC generating vextsd2q to convent long long to
- __int128_t when the long long value was in the GPR register. This test
- verifies that if the result is in the Altivec registers, we still want to
- generate vextsd2q. We use __int128_t to indicate that we want the result of
- the conversion to be in an Altivec register. */
+ __int128_t when the long long value was in the GPR register.
+
+ This test verifies that the code we want when converting a GPR to a vector
+ register, that we generate the sign extend in the GPR register, and then do
+ a mtvsrdd to get it into the vector register. */
void do_div_1 (__int128_t *p, __int128_t *q, long long r)
{
- *p = *q / r; /* mtvsrdd, vextsd2q, vdivsq. */
+ *p = *q / r; /* sradi, mtvsrdd, vdivsq. */
}
-/* Test the optimization in vsx.md to use lxvrdx instead of ld and mtvsrdd if
- the value is coming from memory. */
+/* Test the optimization in vsx.md to use lxvrdx instead of ld and then do
+ vextsd2q to sign extend it. */
void do_div_2 (__int128_t *p, __int128_t *q, long long *r)
{
*p = *q / r[2]; /* lxvrdx, vextsd2q, vdivsq. */
}
+/* If the 64-bit integer is in a vector register (after conversion from
+ double), don't move it back to a GPR register to do the sign extension.
+ Instead use vextsd2q. */
+
+void do_div_3 (__int128_t *p, __int128_t *q, double r)
+{
+ *p = *q / (long long)r; /* xscvdpsxds, xxpermdi, vextsd2q, vdivsq. */
+}
+
+
/* { dg-final { scan-assembler-not {\mld\M} } } */
/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
/* { dg-final { scan-assembler-not {\mmfvsrld\M} } } */
-/* { dg-final { scan-assembler-not {\msradi\M} } } */
-/* { dg-final { scan-assembler-times {\mlxv\M} 2 } } */
+/* { dg-final { scan-assembler-times {\msradi\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mlxv\M} 3 } } */
/* { dg-final { scan-assembler-times {\mlxvrdx\M} 1 } } */
/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mstxv\M} 2 } } */
-/* { dg-final { scan-assembler-times {\mvdivsq\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mvdivsq\M} 3 } } */
/* { dg-final { scan-assembler-times {\mvextsd2q\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-03-15 21:15 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-12 6:36 [gcc(refs/users/meissner/heads/work081)] Optimize extendditi2 GPR to VSX register Michael Meissner
2022-03-15 21:15 Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).