public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v1] LoongArch: Optimized multiply instruction generation.
@ 2023-09-05  6:03 Lulu Cheng
  0 siblings, 0 replies; only message in thread
From: Lulu Cheng @ 2023-09-05  6:03 UTC (permalink / raw)
  To: gcc-patches; +Cc: xry111, i, xuchenghua, Lulu Cheng

	1. Can generate mulh.w[u] instruction.
	2. Can generate mulw.d.wu instruction.

gcc/ChangeLog:

	* config/loongarch/loongarch.md (mulsidi3_64bit):
	(<u>muldi3_highpart): Modify template name.
	(<u>mulsi3_highpart): Likewise.
	(<u>mulsidi3_64bit): Field unsigned extension support.
	(<su>muldi3_highpart): Modify muldi3_highpart to
	smuldi3_highpart.
	(<su>mulsi3_highpart): Modify mulsi3_highpart to
	smulsi3_highpart.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/mulw_d_wu.c: New test.
	* gcc.target/loongarch/smuldi3_highpart.c: New test.
	* gcc.target/loongarch/smulsi3_highpart.c: New test.
	* gcc.target/loongarch/umulsi3_highpart.c: New test.
---
 gcc/config/loongarch/loongarch.md             | 66 ++++++++++++-------
 .../gcc.target/loongarch/mulw_d_wu.c          |  9 +++
 .../gcc.target/loongarch/smuldi3_highpart.c   | 13 ++++
 .../gcc.target/loongarch/smulsi3_highpart.c   | 15 +++++
 .../gcc.target/loongarch/umulsi3_highpart.c   | 14 ++++
 5 files changed, 94 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c

diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 25e2e1e0597..5e9a3ec15e0 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -721,15 +721,6 @@ (define_insn "mul<mode>3"
   [(set_attr "type" "imul")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "mulsidi3_64bit"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
-		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
-  "TARGET_64BIT"
-  "mulw.d.w\t%0,%1,%2"
-  [(set_attr "type" "imul")
-   (set_attr "mode" "DI")])
-
 (define_insn "*mulsi3_extended"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(sign_extend:DI
@@ -758,14 +749,14 @@ (define_expand "<u>mulditi3"
   emit_insn (gen_muldi3 (low, operands[1], operands[2]));
 
   rtx high = gen_reg_rtx (DImode);
-  emit_insn (gen_<u>muldi3_highpart (high, operands[1], operands[2]));
+  emit_insn (gen_<su>muldi3_highpart (high, operands[1], operands[2]));
 
   emit_move_insn (gen_lowpart (DImode, operands[0]), low);
   emit_move_insn (gen_highpart (DImode, operands[0]), high);
   DONE;
 })
 
-(define_insn "<u>muldi3_highpart"
+(define_insn "<su>muldi3_highpart"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(truncate:DI
 	  (lshiftrt:TI
@@ -780,22 +771,34 @@ (define_insn "<u>muldi3_highpart"
    (set_attr "mode" "DI")])
 
 (define_expand "<u>mulsidi3"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand")
 	(mult:DI (any_extend:DI
-		   (match_operand:SI 1 "register_operand" " r"))
+		   (match_operand:SI 1 "register_operand"))
 		 (any_extend:DI
-		   (match_operand:SI 2 "register_operand" " r"))))]
-  "!TARGET_64BIT"
+		   (match_operand:SI 2 "register_operand"))))]
+  ""
 {
-  rtx temp = gen_reg_rtx (SImode);
-  emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
-  emit_insn (gen_<u>mulsi3_highpart (loongarch_subword (operands[0], true),
-				     operands[1], operands[2]));
-  emit_insn (gen_movsi (loongarch_subword (operands[0], false), temp));
-  DONE;
+  if (!TARGET_64BIT)
+  {
+    rtx temp = gen_reg_rtx (SImode);
+    emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
+    emit_insn (gen_<su>mulsi3_highpart (loongarch_subword (operands[0], true),
+				       operands[1], operands[2]));
+    emit_insn (gen_movsi (loongarch_subword (operands[0], false), temp));
+    DONE;
+  }
 })
 
-(define_insn "<u>mulsi3_highpart"
+(define_insn "<u>mulsidi3_64bit"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_64BIT"
+  "mulw.d.w<u>\t%0,%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "DI")])
+
+(define_insn "<su>mulsi3_highpart"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(truncate:SI
 	  (lshiftrt:DI
@@ -804,11 +807,28 @@ (define_insn "<u>mulsi3_highpart"
 		     (any_extend:DI
 		       (match_operand:SI 2 "register_operand" " r")))
 	    (const_int 32))))]
-  "!TARGET_64BIT"
+  ""
   "mulh.w<u>\t%0,%1,%2"
   [(set_attr "type" "imul")
    (set_attr "mode" "SI")])
 
+;; Under the LoongArch architecture, the mulh.w[u] instruction performs
+;; sign extension by default, so the sign extension instruction can be
+;; eliminated.
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (any_extend:DI
+		       (match_operand:SI 1 "register_operand"))
+		     (any_extend:DI
+		       (match_operand:SI 2 "register_operand")))
+	    (const_int 32))))
+   (set (match_operand:DI 3 "register_operand")
+	(sign_extend:DI (match_dup 0)))]
+   "TARGET_64BIT && REGNO (operands[0]) == REGNO (operands[3])"
+   "mulh.w<u>\t%0,%1,%2")
+
 ;;
 ;;  ....................
 ;;
diff --git a/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c b/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
new file mode 100644
index 00000000000..16163d6675d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d" } */
+/* { dg-final { scan-assembler "mulw.d.wu" } } */
+
+__attribute__((noipa, noinline)) unsigned long
+f(unsigned long a, unsigned long b)
+{
+  return (unsigned long)(unsigned int)a * (unsigned long)(unsigned int)b;
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
new file mode 100644
index 00000000000..6f5c686ca38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-expand-all" } */
+
+typedef int TI __attribute ((mode(TI)));
+typedef int DI __attribute__((mode(DI)));
+
+DI
+test (DI x, DI y)
+{
+  return ((TI)x * y) >> 64;
+}
+
+/* { dg-final { scan-rtl-dump "highparttmp" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
new file mode 100644
index 00000000000..c4dbf8afc24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand-all" } */
+
+typedef unsigned int DI __attribute__((mode(DI)));
+typedef unsigned int SI __attribute__((mode(SI)));
+
+SI
+f (SI x, SI y)
+{
+  return ((DI) x * y) >> 32;
+}
+
+/* { dg-final { scan-rtl-dump "highparttmp" "expand" } } */
+/* { dg-final { scan-assembler "mulh\\.w" } } */
+/* { dg-final { scan-assembler-not "slli\\.w" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
new file mode 100644
index 00000000000..e208803e2d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef unsigned int DI __attribute__((mode(DI)));
+typedef unsigned int SI __attribute__((mode(SI)));
+
+SI
+f (SI x, SI y)
+{
+  return ((DI) x * y) >> 32;
+}
+
+/* { dg-final { scan-assembler "mulh\\.wu" } } */
+/* { dg-final { scan-assembler-not "slli\\.w" } } */
-- 
2.31.1


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-09-05  6:04 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-05  6:03 [PATCH v1] LoongArch: Optimized multiply instruction generation Lulu Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).