* [PATCH v1] LoongArch: Optimized multiply instruction generation.
@ 2023-09-05 6:03 Lulu Cheng
0 siblings, 0 replies; only message in thread
From: Lulu Cheng @ 2023-09-05 6:03 UTC (permalink / raw)
To: gcc-patches; +Cc: xry111, i, xuchenghua, Lulu Cheng
1. Can generate mulh.w[u] instruction.
2. Can generate mulw.d.wu instruction.
gcc/ChangeLog:
* config/loongarch/loongarch.md (mulsidi3_64bit):
(<u>muldi3_highpart): Modify template name.
(<u>mulsi3_highpart): Likewise.
(<u>mulsidi3_64bit): Field unsigned extension support.
(<su>muldi3_highpart): Modify muldi3_highpart to
smuldi3_highpart.
(<su>mulsi3_highpart): Modify mulsi3_highpart to
smulsi3_highpart.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/mulw_d_wu.c: New test.
* gcc.target/loongarch/smuldi3_highpart.c: New test.
* gcc.target/loongarch/smulsi3_highpart.c: New test.
* gcc.target/loongarch/umulsi3_highpart.c: New test.
---
gcc/config/loongarch/loongarch.md | 66 ++++++++++++-------
.../gcc.target/loongarch/mulw_d_wu.c | 9 +++
.../gcc.target/loongarch/smuldi3_highpart.c | 13 ++++
.../gcc.target/loongarch/smulsi3_highpart.c | 15 +++++
.../gcc.target/loongarch/umulsi3_highpart.c | 14 ++++
5 files changed, 94 insertions(+), 23 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 25e2e1e0597..5e9a3ec15e0 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -721,15 +721,6 @@ (define_insn "mul<mode>3"
[(set_attr "type" "imul")
(set_attr "mode" "<MODE>")])
-(define_insn "mulsidi3_64bit"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
- (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
- "TARGET_64BIT"
- "mulw.d.w\t%0,%1,%2"
- [(set_attr "type" "imul")
- (set_attr "mode" "DI")])
-
(define_insn "*mulsi3_extended"
[(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI
@@ -758,14 +749,14 @@ (define_expand "<u>mulditi3"
emit_insn (gen_muldi3 (low, operands[1], operands[2]));
rtx high = gen_reg_rtx (DImode);
- emit_insn (gen_<u>muldi3_highpart (high, operands[1], operands[2]));
+ emit_insn (gen_<su>muldi3_highpart (high, operands[1], operands[2]));
emit_move_insn (gen_lowpart (DImode, operands[0]), low);
emit_move_insn (gen_highpart (DImode, operands[0]), high);
DONE;
})
-(define_insn "<u>muldi3_highpart"
+(define_insn "<su>muldi3_highpart"
[(set (match_operand:DI 0 "register_operand" "=r")
(truncate:DI
(lshiftrt:TI
@@ -780,22 +771,34 @@ (define_insn "<u>muldi3_highpart"
(set_attr "mode" "DI")])
(define_expand "<u>mulsidi3"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand")
(mult:DI (any_extend:DI
- (match_operand:SI 1 "register_operand" " r"))
+ (match_operand:SI 1 "register_operand"))
(any_extend:DI
- (match_operand:SI 2 "register_operand" " r"))))]
- "!TARGET_64BIT"
+ (match_operand:SI 2 "register_operand"))))]
+ ""
{
- rtx temp = gen_reg_rtx (SImode);
- emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
- emit_insn (gen_<u>mulsi3_highpart (loongarch_subword (operands[0], true),
- operands[1], operands[2]));
- emit_insn (gen_movsi (loongarch_subword (operands[0], false), temp));
- DONE;
+ if (!TARGET_64BIT)
+ {
+ rtx temp = gen_reg_rtx (SImode);
+ emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
+ emit_insn (gen_<su>mulsi3_highpart (loongarch_subword (operands[0], true),
+ operands[1], operands[2]));
+ emit_insn (gen_movsi (loongarch_subword (operands[0], false), temp));
+ DONE;
+ }
})
-(define_insn "<u>mulsi3_highpart"
+(define_insn "<u>mulsidi3_64bit"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "r"))
+ (any_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+ "TARGET_64BIT"
+ "mulw.d.w<u>\t%0,%1,%2"
+ [(set_attr "type" "imul")
+ (set_attr "mode" "DI")])
+
+(define_insn "<su>mulsi3_highpart"
[(set (match_operand:SI 0 "register_operand" "=r")
(truncate:SI
(lshiftrt:DI
@@ -804,11 +807,28 @@ (define_insn "<u>mulsi3_highpart"
(any_extend:DI
(match_operand:SI 2 "register_operand" " r")))
(const_int 32))))]
- "!TARGET_64BIT"
+ ""
"mulh.w<u>\t%0,%1,%2"
[(set_attr "type" "imul")
(set_attr "mode" "SI")])
+;; Under the LoongArch architecture, the mulh.w[u] instruction performs
+;; sign extension by default, so the sign extension instruction can be
+;; eliminated.
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (any_extend:DI
+ (match_operand:SI 1 "register_operand"))
+ (any_extend:DI
+ (match_operand:SI 2 "register_operand")))
+ (const_int 32))))
+ (set (match_operand:DI 3 "register_operand")
+ (sign_extend:DI (match_dup 0)))]
+ "TARGET_64BIT && REGNO (operands[0]) == REGNO (operands[3])"
+ "mulh.w<u>\t%0,%1,%2")
+
;;
;; ....................
;;
diff --git a/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c b/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
new file mode 100644
index 00000000000..16163d6675d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d" } */
+/* { dg-final { scan-assembler "mulw.d.wu" } } */
+
+__attribute__((noipa, noinline)) unsigned long
+f(unsigned long a, unsigned long b)
+{
+ return (unsigned long)(unsigned int)a * (unsigned long)(unsigned int)b;
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
new file mode 100644
index 00000000000..6f5c686ca38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-expand-all" } */
+
+typedef int TI __attribute ((mode(TI)));
+typedef int DI __attribute__((mode(DI)));
+
+DI
+test (DI x, DI y)
+{
+ return ((TI)x * y) >> 64;
+}
+
+/* { dg-final { scan-rtl-dump "highparttmp" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
new file mode 100644
index 00000000000..c4dbf8afc24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand-all" } */
+
+typedef unsigned int DI __attribute__((mode(DI)));
+typedef unsigned int SI __attribute__((mode(SI)));
+
+SI
+f (SI x, SI y)
+{
+ return ((DI) x * y) >> 32;
+}
+
+/* { dg-final { scan-rtl-dump "highparttmp" "expand" } } */
+/* { dg-final { scan-assembler "mulh\\.w" } } */
+/* { dg-final { scan-assembler-not "slli\\.w" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
new file mode 100644
index 00000000000..e208803e2d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef unsigned int DI __attribute__((mode(DI)));
+typedef unsigned int SI __attribute__((mode(SI)));
+
+SI
+f (SI x, SI y)
+{
+ return ((DI) x * y) >> 32;
+}
+
+/* { dg-final { scan-assembler "mulh\\.wu" } } */
+/* { dg-final { scan-assembler-not "slli\\.w" } } */
--
2.31.1
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-09-05 6:04 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-05 6:03 [PATCH v1] LoongArch: Optimized multiply instruction generation Lulu Cheng
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).