public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work087)] Optimize multiply/add of DImode extended to TImode, PR target/103109.
@ 2022-04-28 20:59 Michael Meissner
0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2022-04-28 20:59 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:4449a625e9d3ddb8d77f8f4ccc7b39761390f4a5
commit 4449a625e9d3ddb8d77f8f4ccc7b39761390f4a5
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Apr 28 16:59:07 2022 -0400
Optimize multiply/add of DImode extended to TImode, PR target/103109.
On power9 and power10 systems, we have instructions that support doing
64-bit integers converted to 128-bit integers and producing 128-bit
results. This patch adds support to generate these instructions.
Previously GCC had define_expands to handle conversion of the 64-bit
extend to 128-bit and multiply. This patch changes these define_expands
to define_insn_and_split and then it provides combiner patterns to
generate thes multiply/add instructions.
To support using this optimization on power9, this patch extend the sign
extend DImode to TImode to also run on power9 (added for PR
target/104698).
This patch needs the previous patch to add unsigned DImode to TImode
conversion so that the combiner can combine the extend, multiply, and add
instructions.
2022-04-28 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/103109
* config/rs6000/rs6000.md (su_int32): New code attribute.
(<u>mul<mode><dmode>3): Convert from define_expand to
define_insn_and_split.
(maddld<mode>4): Add generator function.
(<u>mulditi3_<u>adddi3): New insn.
(<u>mulditi3_add_const): New insn.
(<u>mulditi3_<u>adddi3_upper): New insn.
gcc/testsuite/
PR target/103109
* gcc.target/powerpc/pr103109.c: New test.
Diff:
---
gcc/config/rs6000/rs6000.md | 128 +++++++++++++++++++++++++++++++++++++++++---
1 file changed, 122 insertions(+), 6 deletions(-)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index c9e5b5f10e0..a78f419f5f3 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -687,6 +687,9 @@
(float "")
(unsigned_float "uns")])
+(define_code_attr su_int32 [(sign_extend "s32bit_cint_operand")
+ (zero_extend "c32bit_cint_operand")])
+
; Various instructions that come in SI and DI forms.
; A generic w/d attribute, for things like cmpw/cmpd.
(define_mode_attr wd [(QI "b")
@@ -3210,13 +3213,16 @@
"mulhw<u> %0,%1,%2"
[(set_attr "type" "mul")])
-(define_expand "<u>mul<mode><dmode>3"
- [(set (match_operand:<DMODE> 0 "gpc_reg_operand")
+(define_insn_and_split "<u>mul<mode><dmode>3"
+ [(set (match_operand:<DMODE> 0 "gpc_reg_operand" "=&r")
(mult:<DMODE> (any_extend:<DMODE>
- (match_operand:GPR 1 "gpc_reg_operand"))
+ (match_operand:GPR 1 "gpc_reg_operand" "r"))
(any_extend:<DMODE>
- (match_operand:GPR 2 "gpc_reg_operand"))))]
+ (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
"!(<MODE>mode == SImode && TARGET_POWERPC64)"
+ "#"
+ "&& 1"
+ [(pc)]
{
rtx l = gen_reg_rtx (<MODE>mode);
rtx h = gen_reg_rtx (<MODE>mode);
@@ -3225,9 +3231,10 @@
emit_move_insn (gen_lowpart (<MODE>mode, operands[0]), l);
emit_move_insn (gen_highpart (<MODE>mode, operands[0]), h);
DONE;
-})
+}
+ [(set_attr "length" "8")])
-(define_insn "*maddld<mode>4"
+(define_insn "maddld<mode>4"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
(match_operand:GPR 2 "gpc_reg_operand" "r"))
@@ -3236,6 +3243,115 @@
"maddld %0,%1,%2,%3"
[(set_attr "type" "mul")])
+(define_insn_and_split "*<u>mulditi3_<u>adddi3"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")])
+
+;; Optimize 128-bit multiply with zero/sign extend and adding a constant. We
+;; force the constant into a register to generate li, maddhd, and maddld,
+;; instead of mulld, mulhd, addic, and addze. We can't combine this pattern
+;; with the pattern that handles registers, since constants don't have a sign
+;; or zero extend around them.
+(define_insn_and_split "*<u>mulditi3_add_const"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (match_operand 3 "<su_int32>" "r")))]
+ "TARGET_MADDLD && TARGET_POWERPC64
+"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = force_reg (DImode, operands[3]);
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "mul")
+ (set_attr "size" "64")])
+
+(define_insn "<u>mulditi3_<u>adddi3_upper"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (truncate:DI
+ (lshiftrt:TI
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+ (const_int 64))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "maddhd<u> %0,%1,%2,%3"
+ [(set_attr "type" "mul")
+ (set_attr "size" "64")])
+
(define_insn "udiv<mode>3"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
^ permalink raw reply [flat|nested] 2+ messages in thread
* [gcc(refs/users/meissner/heads/work087)] Optimize multiply/add of DImode extended to TImode, PR target/103109.
@ 2022-04-28 21:03 Michael Meissner
0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2022-04-28 21:03 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:d649d771b8b436d7d8c8c5901b1c349f8351ca98
commit d649d771b8b436d7d8c8c5901b1c349f8351ca98
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Apr 28 17:03:09 2022 -0400
Optimize multiply/add of DImode extended to TImode, PR target/103109.
On power9 and power10 systems, we have instructions that support doing
64-bit integers converted to 128-bit integers and producing 128-bit
results. This patch adds support to generate these instructions.
Previously GCC had define_expands to handle conversion of the 64-bit
extend to 128-bit and multiply. This patch changes these define_expands
to define_insn_and_split and then it provides combiner patterns to
generate thes multiply/add instructions.
To support using this optimization on power9, this patch extend the sign
extend DImode to TImode to also run on power9 (added for PR
target/104698).
This patch needs the previous patch to add unsigned DImode to TImode
conversion so that the combiner can combine the extend, multiply, and add
instructions.
2022-04-28 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/103109
* config/rs6000/rs6000.md (su_int32): New code attribute.
(<u>mul<mode><dmode>3): Convert from define_expand to
define_insn_and_split.
(maddld<mode>4): Add generator function.
(<u>mulditi3_<u>adddi3): New insn.
(<u>mulditi3_add_const): New insn.
(<u>mulditi3_<u>adddi3_upper): New insn.
gcc/testsuite/
PR target/103109
* gcc.target/powerpc/pr103109.c: New test.
Diff:
---
gcc/config/rs6000/rs6000.md | 128 ++++++++++++++++++++++++++--
gcc/testsuite/gcc.target/powerpc/pr103109.c | 62 ++++++++++++++
2 files changed, 184 insertions(+), 6 deletions(-)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index c9e5b5f10e0..a78f419f5f3 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -687,6 +687,9 @@
(float "")
(unsigned_float "uns")])
+(define_code_attr su_int32 [(sign_extend "s32bit_cint_operand")
+ (zero_extend "c32bit_cint_operand")])
+
; Various instructions that come in SI and DI forms.
; A generic w/d attribute, for things like cmpw/cmpd.
(define_mode_attr wd [(QI "b")
@@ -3210,13 +3213,16 @@
"mulhw<u> %0,%1,%2"
[(set_attr "type" "mul")])
-(define_expand "<u>mul<mode><dmode>3"
- [(set (match_operand:<DMODE> 0 "gpc_reg_operand")
+(define_insn_and_split "<u>mul<mode><dmode>3"
+ [(set (match_operand:<DMODE> 0 "gpc_reg_operand" "=&r")
(mult:<DMODE> (any_extend:<DMODE>
- (match_operand:GPR 1 "gpc_reg_operand"))
+ (match_operand:GPR 1 "gpc_reg_operand" "r"))
(any_extend:<DMODE>
- (match_operand:GPR 2 "gpc_reg_operand"))))]
+ (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
"!(<MODE>mode == SImode && TARGET_POWERPC64)"
+ "#"
+ "&& 1"
+ [(pc)]
{
rtx l = gen_reg_rtx (<MODE>mode);
rtx h = gen_reg_rtx (<MODE>mode);
@@ -3225,9 +3231,10 @@
emit_move_insn (gen_lowpart (<MODE>mode, operands[0]), l);
emit_move_insn (gen_highpart (<MODE>mode, operands[0]), h);
DONE;
-})
+}
+ [(set_attr "length" "8")])
-(define_insn "*maddld<mode>4"
+(define_insn "maddld<mode>4"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
(match_operand:GPR 2 "gpc_reg_operand" "r"))
@@ -3236,6 +3243,115 @@
"maddld %0,%1,%2,%3"
[(set_attr "type" "mul")])
+(define_insn_and_split "*<u>mulditi3_<u>adddi3"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")])
+
+;; Optimize 128-bit multiply with zero/sign extend and adding a constant. We
+;; force the constant into a register to generate li, maddhd, and maddld,
+;; instead of mulld, mulhd, addic, and addze. We can't combine this pattern
+;; with the pattern that handles registers, since constants don't have a sign
+;; or zero extend around them.
+(define_insn_and_split "*<u>mulditi3_add_const"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (match_operand 3 "<su_int32>" "r")))]
+ "TARGET_MADDLD && TARGET_POWERPC64
+"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = force_reg (DImode, operands[3]);
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "mul")
+ (set_attr "size" "64")])
+
+(define_insn "<u>mulditi3_<u>adddi3_upper"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (truncate:DI
+ (lshiftrt:TI
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+ (const_int 64))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "maddhd<u> %0,%1,%2,%3"
+ [(set_attr "type" "mul")
+ (set_attr "size" "64")])
+
(define_insn "udiv<mode>3"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c b/gcc/testsuite/gcc.target/powerpc/pr103109.c
new file mode 100644
index 00000000000..ae2cfb9eda7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c
@@ -0,0 +1,62 @@
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* This test makes sure that GCC generates the maddhd, maddhdu, and maddld
+ power9 instructions when doing some forms of 64-bit integers converted to
+ 128-bit integers and used with multiply/add operations. */
+
+__int128_t
+s_mult_add (long long a,
+ long long b,
+ long long c)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) + (__int128_t)c;
+}
+
+/* Test 32-bit constants that are loaded into GPRs instead of doing the
+ mulld/mulhd and then addic/addime or addc/addze. */
+__int128_t
+s_mult_add_m10 (long long a,
+ long long b)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) - 10;
+}
+
+__int128_t
+s_mult_add_70000 (long long a,
+ long long b)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) + 70000;
+}
+
+__uint128_t
+u_mult_add (unsigned long long a,
+ unsigned long long b,
+ unsigned long long c)
+{
+ /* maddhd, maddld. */
+ return ((__uint128_t)a * (__uint128_t)b) + (__uint128_t)c;
+}
+
+__uint128_t
+u_mult_add_0x80000000 (unsigned long long a,
+ unsigned long long b)
+{
+ /* maddhd, maddld. */
+ return ((__uint128_t)a * (__uint128_t)b) + 0x80000000UL;
+}
+
+/* { dg-final { scan-assembler-not {\maddc\M} } } */
+/* { dg-final { scan-assembler-not {\madde\M} } } */
+/* { dg-final { scan-assembler-not {\maddid\M} } } */
+/* { dg-final { scan-assembler-not {\maddme\M} } } */
+/* { dg-final { scan-assembler-not {\maddze\M} } } */
+/* { dg-final { scan-assembler-not {\mmulhd\M} } } */
+/* { dg-final { scan-assembler-not {\mmulld\M} } } */
+/* { dg-final { scan-assembler-times {\mmaddhd\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mmaddhdu\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mmaddld\M} 5 } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-04-28 21:03 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-28 20:59 [gcc(refs/users/meissner/heads/work087)] Optimize multiply/add of DImode extended to TImode, PR target/103109 Michael Meissner
2022-04-28 21:03 Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).