public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work082)] Optimize multiply/add of DImode extended to TImode.
@ 2022-03-23 1:04 Michael Meissner
0 siblings, 0 replies; 4+ messages in thread
From: Michael Meissner @ 2022-03-23 1:04 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:4003ac62c170c5edfdbbc2114926f5661fb20a60
commit 4003ac62c170c5edfdbbc2114926f5661fb20a60
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Mar 22 21:04:01 2022 -0400
Optimize multiply/add of DImode extended to TImode.
On power9 and power10 systems, we have instructions that support doing
64-bit integers converted to 128-bit integers and producing 128-bit
results. This patch adds support to generate these instructions.
Previously we had define_expands to handle conversion of the 64-bit extend
to 128-bit and multiply. This patch changes these define_expands to
define_insn_and_split and then it provides combiner patterns to generate
thes multiply/add instructions.
To support using this optimization on power9, we extend the sign extend
DImode to TImode to also run on power9 (added for PR target/104698).
We add support for doing an unsigned DImode to TImode conversion. We need
these conversions to exist on power9 so that the combiner can properly
combine the extend, multiply, and add instructions.
2022-03-22 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/103109
* config/rs6000/rs6000.md (su_int32): New code attribute.
(<u>mul<mode><dmode>3): Convert from define_expand to
define_insn_and_split.
(maddld<mode>4): Add generator function.
(<u>mulditi3_<u>adddi3): New insn.
(<u>mulditi3_add_const): New insn.
(<u>mulditi3_<u>adddi3_upper): New insn.
gcc/testsuite/
PR target/103109
* gcc.target/powerpc/pr103109.c: New test.
Diff:
---
gcc/config/rs6000/rs6000.md | 128 ++++++++++++++++++++++++++--
gcc/testsuite/gcc.target/powerpc/pr103109.c | 62 ++++++++++++++
2 files changed, 184 insertions(+), 6 deletions(-)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index df8a750d945..be907ab3518 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -676,6 +676,9 @@
(float "")
(unsigned_float "uns")])
+(define_code_attr su_int32 [(sign_extend "s32bit_cint_operand")
+ (zero_extend "c32bit_cint_operand")])
+
; Various instructions that come in SI and DI forms.
; A generic w/d attribute, for things like cmpw/cmpd.
(define_mode_attr wd [(QI "b")
@@ -3199,13 +3202,16 @@
"mulhw<u> %0,%1,%2"
[(set_attr "type" "mul")])
-(define_expand "<u>mul<mode><dmode>3"
- [(set (match_operand:<DMODE> 0 "gpc_reg_operand")
+(define_insn_and_split "<u>mul<mode><dmode>3"
+ [(set (match_operand:<DMODE> 0 "gpc_reg_operand" "=&r")
(mult:<DMODE> (any_extend:<DMODE>
- (match_operand:GPR 1 "gpc_reg_operand"))
+ (match_operand:GPR 1 "gpc_reg_operand" "r"))
(any_extend:<DMODE>
- (match_operand:GPR 2 "gpc_reg_operand"))))]
+ (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
"!(<MODE>mode == SImode && TARGET_POWERPC64)"
+ "#"
+ "&& 1"
+ [(pc)]
{
rtx l = gen_reg_rtx (<MODE>mode);
rtx h = gen_reg_rtx (<MODE>mode);
@@ -3214,9 +3220,10 @@
emit_move_insn (gen_lowpart (<MODE>mode, operands[0]), l);
emit_move_insn (gen_highpart (<MODE>mode, operands[0]), h);
DONE;
-})
+}
+ [(set_attr "length" "8")])
-(define_insn "*maddld<mode>4"
+(define_insn "maddld<mode>4"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
(match_operand:GPR 2 "gpc_reg_operand" "r"))
@@ -3225,6 +3232,115 @@
"maddld %0,%1,%2,%3"
[(set_attr "type" "mul")])
+(define_insn_and_split "*<u>mulditi3_<u>adddi3"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")])
+
+;; Optimize 128-bit multiply with zero/sign extend and adding a constant. We
+;; force the constant into a register to generate li, maddhd, and maddld,
+;; instead of mulld, mulhd, addic, and addze. We can't combine this pattern
+;; with the pattern that handles registers, since constants don't have a sign
+;; or zero extend around them.
+(define_insn_and_split "*<u>mulditi3_add_const"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (match_operand 3 "<su_int32>" "r")))]
+ "TARGET_MADDLD && TARGET_POWERPC64
+"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = force_reg (DImode, operands[3]);
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "mul")
+ (set_attr "size" "64")])
+
+(define_insn "<u>mulditi3_<u>adddi3_upper"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (truncate:DI
+ (lshiftrt:TI
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+ (const_int 64))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "maddhd<u> %0,%1,%2,%3"
+ [(set_attr "type" "mul")
+ (set_attr "size" "64")])
+
(define_insn "udiv<mode>3"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c b/gcc/testsuite/gcc.target/powerpc/pr103109.c
new file mode 100644
index 00000000000..ae2cfb9eda7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c
@@ -0,0 +1,62 @@
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* This test makes sure that GCC generates the maddhd, maddhdu, and maddld
+ power9 instructions when doing some forms of 64-bit integers converted to
+ 128-bit integers and used with multiply/add operations. */
+
+__int128_t
+s_mult_add (long long a,
+ long long b,
+ long long c)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) + (__int128_t)c;
+}
+
+/* Test 32-bit constants that are loaded into GPRs instead of doing the
+ mulld/mulhd and then addic/addime or addc/addze. */
+__int128_t
+s_mult_add_m10 (long long a,
+ long long b)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) - 10;
+}
+
+__int128_t
+s_mult_add_70000 (long long a,
+ long long b)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) + 70000;
+}
+
+__uint128_t
+u_mult_add (unsigned long long a,
+ unsigned long long b,
+ unsigned long long c)
+{
+ /* maddhd, maddld. */
+ return ((__uint128_t)a * (__uint128_t)b) + (__uint128_t)c;
+}
+
+__uint128_t
+u_mult_add_0x80000000 (unsigned long long a,
+ unsigned long long b)
+{
+ /* maddhd, maddld. */
+ return ((__uint128_t)a * (__uint128_t)b) + 0x80000000UL;
+}
+
+/* { dg-final { scan-assembler-not {\maddc\M} } } */
+/* { dg-final { scan-assembler-not {\madde\M} } } */
+/* { dg-final { scan-assembler-not {\maddid\M} } } */
+/* { dg-final { scan-assembler-not {\maddme\M} } } */
+/* { dg-final { scan-assembler-not {\maddze\M} } } */
+/* { dg-final { scan-assembler-not {\mmulhd\M} } } */
+/* { dg-final { scan-assembler-not {\mmulld\M} } } */
+/* { dg-final { scan-assembler-times {\mmaddhd\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mmaddhdu\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mmaddld\M} 5 } } */
^ permalink raw reply [flat|nested] 4+ messages in thread
* [gcc(refs/users/meissner/heads/work082)] Optimize multiply/add of DImode extended to TImode.
@ 2022-03-22 19:56 Michael Meissner
0 siblings, 0 replies; 4+ messages in thread
From: Michael Meissner @ 2022-03-22 19:56 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:80b76da5c4520f4bd6fc89d87860a9c5e47f3929
commit 80b76da5c4520f4bd6fc89d87860a9c5e47f3929
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Mar 22 15:55:47 2022 -0400
Optimize multiply/add of DImode extended to TImode.
On power9 and power10 systems, we have instructions that support doing
64-bit integers converted to 128-bit integers and producing 128-bit
results. This patch adds support to generate these instructions.
Previously we had define_expands to handle conversion of the 64-bit extend
to 128-bit and multiply. This patch changes these define_expands to
define_insn_and_split and then it provides combiner patterns to generate
thes multiply/add instructions.
To support using this optimization on power9, we extend the sign extend
DImode to TImode to also run on power9 (added for PR target/104698).
We add support for doing an unsigned DImode to TImode conversion. We need
these conversions to exist on power9 so that the combiner can properly
combine the extend, multiply, and add instructions.
2022-03-22 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/103109
* config/rs6000/rs6000.md (su_int32): New code attribute.
(<u>mul<mode><dmode>3): Convert from define_expand to
define_insn_and_split.
(maddld<mode>4): Add generator function.
(<u>mulditi3_<u>adddi3): New insn.
(<u>mulditi3_add_const): New insn.
(<u>mulditi3_<u>adddi3_upper): New insn.
gcc/testsuite/
PR target/103109
* gcc.target/powerpc/pr103109.c: New test.
Diff:
---
gcc/config/rs6000/rs6000.md | 128 ++++++++++++++++++++++++++--
gcc/testsuite/gcc.target/powerpc/pr103109.c | 62 ++++++++++++++
2 files changed, 184 insertions(+), 6 deletions(-)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index a74c48efae7..da7367ee642 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -676,6 +676,9 @@
(float "")
(unsigned_float "uns")])
+(define_code_attr su_int32 [(sign_extend "s32bit_cint_operand")
+ (zero_extend "c32bit_cint_operand")])
+
; Various instructions that come in SI and DI forms.
; A generic w/d attribute, for things like cmpw/cmpd.
(define_mode_attr wd [(QI "b")
@@ -3199,13 +3202,16 @@
"mulhw<u> %0,%1,%2"
[(set_attr "type" "mul")])
-(define_expand "<u>mul<mode><dmode>3"
- [(set (match_operand:<DMODE> 0 "gpc_reg_operand")
+(define_insn_and_split "<u>mul<mode><dmode>3"
+ [(set (match_operand:<DMODE> 0 "gpc_reg_operand" "=&r")
(mult:<DMODE> (any_extend:<DMODE>
- (match_operand:GPR 1 "gpc_reg_operand"))
+ (match_operand:GPR 1 "gpc_reg_operand" "r"))
(any_extend:<DMODE>
- (match_operand:GPR 2 "gpc_reg_operand"))))]
+ (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
"!(<MODE>mode == SImode && TARGET_POWERPC64)"
+ "#"
+ "&& 1"
+ [(pc)]
{
rtx l = gen_reg_rtx (<MODE>mode);
rtx h = gen_reg_rtx (<MODE>mode);
@@ -3214,9 +3220,10 @@
emit_move_insn (gen_lowpart (<MODE>mode, operands[0]), l);
emit_move_insn (gen_highpart (<MODE>mode, operands[0]), h);
DONE;
-})
+}
+ [(set_attr "length" "8")])
-(define_insn "*maddld<mode>4"
+(define_insn "maddld<mode>4"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
(match_operand:GPR 2 "gpc_reg_operand" "r"))
@@ -3225,6 +3232,115 @@
"maddld %0,%1,%2,%3"
[(set_attr "type" "mul")])
+(define_insn_and_split "*<u>mulditi3_<u>adddi3"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")])
+
+;; Optimize 128-bit multiply with zero/sign extend and adding a constant. We
+;; force the constant into a register to generate li, maddhd, and maddld,
+;; instead of mulld, mulhd, addic, and addze. We can't combine this pattern
+;; with the pattern that handles registers, since constants don't have a sign
+;; or zero extend around them.
+(define_insn_and_split "*<u>mulditi3_add_const"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (match_operand 3 "<su_int32>" "r")))]
+ "TARGET_MADDLD && TARGET_POWERPC64
+"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = force_reg (DImode, operands[3]);
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "mul")
+ (set_attr "size" "64")])
+
+(define_insn "<u>mulditi3_<u>adddi3_upper"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (truncate:DI
+ (lshiftrt:TI
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+ (const_int 64))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "maddhd<u> %0,%1,%2,%3"
+ [(set_attr "type" "mul")
+ (set_attr "size" "64")])
+
(define_insn "udiv<mode>3"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c b/gcc/testsuite/gcc.target/powerpc/pr103109.c
new file mode 100644
index 00000000000..ae2cfb9eda7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c
@@ -0,0 +1,62 @@
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* This test makes sure that GCC generates the maddhd, maddhdu, and maddld
+ power9 instructions when doing some forms of 64-bit integers converted to
+ 128-bit integers and used with multiply/add operations. */
+
+__int128_t
+s_mult_add (long long a,
+ long long b,
+ long long c)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) + (__int128_t)c;
+}
+
+/* Test 32-bit constants that are loaded into GPRs instead of doing the
+ mulld/mulhd and then addic/addime or addc/addze. */
+__int128_t
+s_mult_add_m10 (long long a,
+ long long b)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) - 10;
+}
+
+__int128_t
+s_mult_add_70000 (long long a,
+ long long b)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) + 70000;
+}
+
+__uint128_t
+u_mult_add (unsigned long long a,
+ unsigned long long b,
+ unsigned long long c)
+{
+ /* maddhd, maddld. */
+ return ((__uint128_t)a * (__uint128_t)b) + (__uint128_t)c;
+}
+
+__uint128_t
+u_mult_add_0x80000000 (unsigned long long a,
+ unsigned long long b)
+{
+ /* maddhd, maddld. */
+ return ((__uint128_t)a * (__uint128_t)b) + 0x80000000UL;
+}
+
+/* { dg-final { scan-assembler-not {\maddc\M} } } */
+/* { dg-final { scan-assembler-not {\madde\M} } } */
+/* { dg-final { scan-assembler-not {\maddid\M} } } */
+/* { dg-final { scan-assembler-not {\maddme\M} } } */
+/* { dg-final { scan-assembler-not {\maddze\M} } } */
+/* { dg-final { scan-assembler-not {\mmulhd\M} } } */
+/* { dg-final { scan-assembler-not {\mmulld\M} } } */
+/* { dg-final { scan-assembler-times {\mmaddhd\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mmaddhdu\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mmaddld\M} 5 } } */
^ permalink raw reply [flat|nested] 4+ messages in thread
* [gcc(refs/users/meissner/heads/work082)] Optimize multiply/add of DImode extended to TImode.
@ 2022-03-22 17:41 Michael Meissner
0 siblings, 0 replies; 4+ messages in thread
From: Michael Meissner @ 2022-03-22 17:41 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:b900b65b8bf619e807911f38a0a6d9336aae82b9
commit b900b65b8bf619e807911f38a0a6d9336aae82b9
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Mar 22 13:37:51 2022 -0400
Optimize multiply/add of DImode extended to TImode.
On power9 and power10 systems, we have instructions that support doing
64-bit integers converted to 128-bit integers and producing 128-bit
results. This patch adds support to generate these instructions.
Previously we had define_expands to handle conversion of the 64-bit extend
to 128-bit and multiply. This patch changes these define_expands to
define_insn_and_split and then it provides combiner patterns to generate
thes multiply/add instructions.
To support using this optimization on power9, we extend the sign extend
DImode to TImode to also run on power9 (added for PR target/104698).
We add support for doing an unsigned DImode to TImode conversion. We need
these conversions to exist on power9 so that the combiner can properly
combine the extend, multiply, and add instructions.
2022-03-22 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/103109
* config/rs6000/rs6000.md (su_int32): New code attribute.
(<u>mul<mode><dmode>3): Convert from define_expand to
define_insn_and_split.
(maddld<mode>4): Add generator function.
(<u>mulditi3_<u>adddi3): New insn.
(<u>mulditi3_add_const): New insn.
(<u>mulditi3_<u>adddi3_upper): New insn.
gcc/testsuite/
PR target/103109
* gcc.target/powerpc/pr103109.c: New test.
Diff:
---
gcc/config/rs6000/rs6000.md | 128 ++++++++++++++++++++++++++--
gcc/testsuite/gcc.target/powerpc/pr103109.c | 62 ++++++++++++++
2 files changed, 184 insertions(+), 6 deletions(-)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index a74c48efae7..da7367ee642 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -676,6 +676,9 @@
(float "")
(unsigned_float "uns")])
+(define_code_attr su_int32 [(sign_extend "s32bit_cint_operand")
+ (zero_extend "c32bit_cint_operand")])
+
; Various instructions that come in SI and DI forms.
; A generic w/d attribute, for things like cmpw/cmpd.
(define_mode_attr wd [(QI "b")
@@ -3199,13 +3202,16 @@
"mulhw<u> %0,%1,%2"
[(set_attr "type" "mul")])
-(define_expand "<u>mul<mode><dmode>3"
- [(set (match_operand:<DMODE> 0 "gpc_reg_operand")
+(define_insn_and_split "<u>mul<mode><dmode>3"
+ [(set (match_operand:<DMODE> 0 "gpc_reg_operand" "=&r")
(mult:<DMODE> (any_extend:<DMODE>
- (match_operand:GPR 1 "gpc_reg_operand"))
+ (match_operand:GPR 1 "gpc_reg_operand" "r"))
(any_extend:<DMODE>
- (match_operand:GPR 2 "gpc_reg_operand"))))]
+ (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
"!(<MODE>mode == SImode && TARGET_POWERPC64)"
+ "#"
+ "&& 1"
+ [(pc)]
{
rtx l = gen_reg_rtx (<MODE>mode);
rtx h = gen_reg_rtx (<MODE>mode);
@@ -3214,9 +3220,10 @@
emit_move_insn (gen_lowpart (<MODE>mode, operands[0]), l);
emit_move_insn (gen_highpart (<MODE>mode, operands[0]), h);
DONE;
-})
+}
+ [(set_attr "length" "8")])
-(define_insn "*maddld<mode>4"
+(define_insn "maddld<mode>4"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
(match_operand:GPR 2 "gpc_reg_operand" "r"))
@@ -3225,6 +3232,115 @@
"maddld %0,%1,%2,%3"
[(set_attr "type" "mul")])
+(define_insn_and_split "*<u>mulditi3_<u>adddi3"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")])
+
+;; Optimize 128-bit multiply with zero/sign extend and adding a constant. We
+;; force the constant into a register to generate li, maddhd, and maddld,
+;; instead of mulld, mulhd, addic, and addze. We can't combine this pattern
+;; with the pattern that handles registers, since constants don't have a sign
+;; or zero extend around them.
+(define_insn_and_split "*<u>mulditi3_add_const"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (match_operand 3 "<su_int32>" "r")))]
+ "TARGET_MADDLD && TARGET_POWERPC64
+"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = force_reg (DImode, operands[3]);
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "mul")
+ (set_attr "size" "64")])
+
+(define_insn "<u>mulditi3_<u>adddi3_upper"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (truncate:DI
+ (lshiftrt:TI
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+ (const_int 64))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "maddhd<u> %0,%1,%2,%3"
+ [(set_attr "type" "mul")
+ (set_attr "size" "64")])
+
(define_insn "udiv<mode>3"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c b/gcc/testsuite/gcc.target/powerpc/pr103109.c
new file mode 100644
index 00000000000..7f67816edda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c
@@ -0,0 +1,62 @@
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* This test makes sure that GCC generates the maddhd, maddhdu, and maddld
+ power9 instructions when doing some forms of 64-bit integers converted to
+ 128-bit integers and used with multiply/add operations. */
+
+__int128_t
+s_mult_add (long long a,
+ long long b,
+ long long c)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) + (__int128_t)c;
+}
+
+/* Test 32-bit constants that are loaded into GPRs instead of doing the
+ mulld/mulhd and then addic/addime or addc/addze. */
+__int128_t
+s_mult_add_m10 (long long a,
+ long long b)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) - 10;
+}
+
+__int128_t
+s_mult_add_70000 (long long a,
+ long long b)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) + 70000;
+}
+
+__uint128_t
+u_mult_add (unsigned long long a,
+ unsigned long long b,
+ unsigned long long c)
+{
+ /* maddhd, maddld. */
+ return ((__uint128_t)a * (__uint128_t)b) + (__uint128_t)c;
+}
+
+__uint128_t
+u_mult_add_0x80000000 (unsigned long long a,
+ unsigned long long b)
+{
+ /* maddhd, maddld. */
+ return ((__uint128_t)a * (__uint128_t)b) + 0x80000000UL;
+}
+
+/* { dg-final { scan-assembler-not {\maddc\M} } } */
+/* { dg-final { scan-assembler-not {\madde\M} } } */
+/* { dg-final { scan-assembler-not {\maddid\M} } } */
+/* { dg-final { scan-assembler-not {\maddme\M} } } */
+/* { dg-final { scan-assembler-not {\maddze\M} } } */
+/* { dg-final { scan-assembler-not {\mmulhd\M} } } */
+/* { dg-final { scan-assembler-not {\mmulld\M} } } */
+/* { dg-final { scan-assembler-times {\mmaddhd\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mmaddld\M} 3 } } */
+
^ permalink raw reply [flat|nested] 4+ messages in thread
* [gcc(refs/users/meissner/heads/work082)] Optimize multiply/add of DImode extended to TImode.
@ 2022-03-22 17:38 Michael Meissner
0 siblings, 0 replies; 4+ messages in thread
From: Michael Meissner @ 2022-03-22 17:38 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:bb4394b0cd698f0ac7bdc16aa68b09948e1d3858
commit bb4394b0cd698f0ac7bdc16aa68b09948e1d3858
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Mar 22 13:37:51 2022 -0400
Optimize multiply/add of DImode extended to TImode.
On power9 and power10 systems, we have instructions that support doing
64-bit integers converted to 128-bit integers and producing 128-bit
results. This patch adds support to generate these instructions.
Previously we had define_expands to handle conversion of the 64-bit extend
to 128-bit and multiply. This patch changes these define_expands to
define_insn_and_split and then it provides combiner patterns to generate
thes multiply/add instructions.
To support using this optimization on power9, we extend the sign extend
DImode to TImode to also run on power9 (added for PR target/104698).
We add support for doing an unsigned DImode to TImode conversion. We need
these conversions to exist on power9 so that the combiner can properly
combine the extend, multiply, and add instructions.
2022-03-22 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/103109
* config/rs6000/rs6000.md (su_int32): New code attribute.
(<u>mul<mode><dmode>3): Convert from define_expand to
define_insn_and_split.
(maddld<mode>4): Add generator function.
(<u>mulditi3_<u>adddi3): New insn.
(<u>mulditi3_add_const): New insn.
(<u>mulditi3_<u>adddi3_upper): New insn.
gcc/testsuite/
PR target/103109
* gcc.target/powerpc/pr103109.c: New test.
Diff:
---
gcc/config/rs6000/rs6000.md | 128 ++++++++++++++++++++++++++--
gcc/testsuite/gcc.target/powerpc/pr103109.c | 62 ++++++++++++++
2 files changed, 184 insertions(+), 6 deletions(-)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index a74c48efae7..da7367ee642 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -676,6 +676,9 @@
(float "")
(unsigned_float "uns")])
+(define_code_attr su_int32 [(sign_extend "s32bit_cint_operand")
+ (zero_extend "c32bit_cint_operand")])
+
; Various instructions that come in SI and DI forms.
; A generic w/d attribute, for things like cmpw/cmpd.
(define_mode_attr wd [(QI "b")
@@ -3199,13 +3202,16 @@
"mulhw<u> %0,%1,%2"
[(set_attr "type" "mul")])
-(define_expand "<u>mul<mode><dmode>3"
- [(set (match_operand:<DMODE> 0 "gpc_reg_operand")
+(define_insn_and_split "<u>mul<mode><dmode>3"
+ [(set (match_operand:<DMODE> 0 "gpc_reg_operand" "=&r")
(mult:<DMODE> (any_extend:<DMODE>
- (match_operand:GPR 1 "gpc_reg_operand"))
+ (match_operand:GPR 1 "gpc_reg_operand" "r"))
(any_extend:<DMODE>
- (match_operand:GPR 2 "gpc_reg_operand"))))]
+ (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
"!(<MODE>mode == SImode && TARGET_POWERPC64)"
+ "#"
+ "&& 1"
+ [(pc)]
{
rtx l = gen_reg_rtx (<MODE>mode);
rtx h = gen_reg_rtx (<MODE>mode);
@@ -3214,9 +3220,10 @@
emit_move_insn (gen_lowpart (<MODE>mode, operands[0]), l);
emit_move_insn (gen_highpart (<MODE>mode, operands[0]), h);
DONE;
-})
+}
+ [(set_attr "length" "8")])
-(define_insn "*maddld<mode>4"
+(define_insn "maddld<mode>4"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
(match_operand:GPR 2 "gpc_reg_operand" "r"))
@@ -3225,6 +3232,115 @@
"maddld %0,%1,%2,%3"
[(set_attr "type" "mul")])
+(define_insn_and_split "*<u>mulditi3_<u>adddi3"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")])
+
+;; Optimize 128-bit multiply with zero/sign extend and adding a constant. We
+;; force the constant into a register to generate li, maddhd, and maddld,
+;; instead of mulld, mulhd, addic, and addze. We can't combine this pattern
+;; with the pattern that handles registers, since constants don't have a sign
+;; or zero extend around them.
+(define_insn_and_split "*<u>mulditi3_add_const"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (match_operand 3 "<su_int32>" "r")))]
+ "TARGET_MADDLD && TARGET_POWERPC64
+"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = force_reg (DImode, operands[3]);
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "mul")
+ (set_attr "size" "64")])
+
+(define_insn "<u>mulditi3_<u>adddi3_upper"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (truncate:DI
+ (lshiftrt:TI
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+ (const_int 64))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "maddhd<u> %0,%1,%2,%3"
+ [(set_attr "type" "mul")
+ (set_attr "size" "64")])
+
(define_insn "udiv<mode>3"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c b/gcc/testsuite/gcc.target/powerpc/pr103109.c
new file mode 100644
index 00000000000..7f67816edda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c
@@ -0,0 +1,62 @@
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* This test makes sure that GCC generates the maddhd, maddhdu, and maddld
+ power9 instructions when doing some forms of 64-bit integers converted to
+ 128-bit integers and used with multiply/add operations. */
+
+__int128_t
+s_mult_add (long long a,
+ long long b,
+ long long c)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) + (__int128_t)c;
+}
+
+/* Test 32-bit constants that are loaded into GPRs instead of doing the
+ mulld/mulhd and then addic/addime or addc/addze. */
+__int128_t
+s_mult_add_m10 (long long a,
+ long long b)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) - 10;
+}
+
+__int128_t
+s_mult_add_70000 (long long a,
+ long long b)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) + 70000;
+}
+
+__uint128_t
+u_mult_add (unsigned long long a,
+ unsigned long long b,
+ unsigned long long c)
+{
+ /* maddhd, maddld. */
+ return ((__uint128_t)a * (__uint128_t)b) + (__uint128_t)c;
+}
+
+__uint128_t
+u_mult_add_0x80000000 (unsigned long long a,
+ unsigned long long b)
+{
+ /* maddhd, maddld. */
+ return ((__uint128_t)a * (__uint128_t)b) + 0x80000000UL;
+}
+
+/* { dg-final { scan-assembler-not {\maddc\M} } } */
+/* { dg-final { scan-assembler-not {\madde\M} } } */
+/* { dg-final { scan-assembler-not {\maddid\M} } } */
+/* { dg-final { scan-assembler-not {\maddme\M} } } */
+/* { dg-final { scan-assembler-not {\maddze\M} } } */
+/* { dg-final { scan-assembler-not {\mmulhd\M} } } */
+/* { dg-final { scan-assembler-not {\mmulld\M} } } */
+/* { dg-final { scan-assembler-times {\mmaddhd\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mmaddld\M} 3 } } */
+
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-03-23 1:04 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-23 1:04 [gcc(refs/users/meissner/heads/work082)] Optimize multiply/add of DImode extended to TImode Michael Meissner
-- strict thread matches above, loose matches on Subject: below --
2022-03-22 19:56 Michael Meissner
2022-03-22 17:41 Michael Meissner
2022-03-22 17:38 Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).