public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH]middle-end Use subregs to expand COMPLEX_EXPR to set the lowpart.
@ 2022-06-09  7:52 Tamar Christina
  2022-06-12 17:27 ` Jeff Law
  2022-06-13  8:40 ` Richard Sandiford
  0 siblings, 2 replies; 16+ messages in thread
From: Tamar Christina @ 2022-06-09  7:52 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, richard.sandiford, rguenther

[-- Attachment #1: Type: text/plain, Size: 4103 bytes --]

Hi All,

When lowering COMPLEX_EXPR we currently emit two VEC_EXTRACTs.  One for the
lowpart and one for the highpart.

The problem with this is that in RTL the lvalue of the RTX is the only thing
tying the two instructions together.

This means that e.g. combine is unable to try to combine the two instructions
for setting the lowpart and highpart.

For ISAs that have bit extract instructions we can eliminate one of the extracts
if, and only if we're setting the entire complex number.

This change changes the expand code when we're setting the entire complex number
to generate a subreg for the lowpart instead of a vec_extract.

This allows us to optimize sequences such as:

_Complex int f(int a, int b) {
    _Complex int t = a + b * 1i;
    return t;
}

from:

f:
	bfi     x2, x0, 0, 32
	bfi     x2, x1, 32, 32
	mov     x0, x2
	ret

into:

f:
	bfi	x0, x1, 32, 32
	ret

I have also confirmed the codegen for x86_64 did not change.

Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* emit-rtl.cc (validate_subreg): Accept subregs of complex modes.
	* expr.cc (emit_move_complex_parts): Emit subreg of lowpart if possible.

gcc/testsuite/ChangeLog:

	* g++.target/aarch64/complex-init.C: New test.

--- inline copy of patch -- 
diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index f4404d7abe33b565358b7f609a91114c75ecf4e7..15ffca2ffe986bca56c1fae9381bd33f5d6b012d 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -947,9 +947,11 @@ validate_subreg (machine_mode omode, machine_mode imode,
 	   && GET_MODE_INNER (omode) == GET_MODE_INNER (imode))
     ;
   /* Subregs involving floating point modes are not allowed to
-     change size.  Therefore (subreg:DI (reg:DF) 0) is fine, but
+     change size unless it's an insert into a complex mode.
+     Therefore (subreg:DI (reg:DF) 0) and (subreg:CS (reg:SF) 0) are fine, but
      (subreg:SI (reg:DF) 0) isn't.  */
-  else if (FLOAT_MODE_P (imode) || FLOAT_MODE_P (omode))
+  else if ((FLOAT_MODE_P (imode) || FLOAT_MODE_P (omode))
+	   && !COMPLEX_MODE_P (omode))
     {
       if (! (known_eq (isize, osize)
 	     /* LRA can use subreg to store a floating point value in
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 5f7142b975ada2cd8b00663d35ba1e0004b8e28d..fce672c236fdbc4d40adb6e2614c234c02a61933 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -3740,7 +3740,17 @@ emit_move_complex_parts (rtx x, rtx y)
       && REG_P (x) && !reg_overlap_mentioned_p (x, y))
     emit_clobber (x);
 
-  write_complex_part (x, read_complex_part (y, false), false);
+  /* If we're writing the entire value using a concat into a register
+     then emit the lower part as a simple mov followed by an insert
+     into the top part.  */
+  if (GET_CODE (y) == CONCAT && !reload_completed && REG_P (x))
+    {
+      rtx val = XEXP (y, false);
+      rtx dest = lowpart_subreg (GET_MODE (val), x, GET_MODE (x));
+      emit_move_insn (dest, val);
+    }
+  else
+    write_complex_part (x, read_complex_part (y, false), false);
   write_complex_part (x, read_complex_part (y, true), true);
 
   return get_last_insn ();
diff --git a/gcc/testsuite/g++.target/aarch64/complex-init.C b/gcc/testsuite/g++.target/aarch64/complex-init.C
new file mode 100644
index 0000000000000000000000000000000000000000..497cc4bca3e2c59da95c871ceb5cc96216fc302d
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/complex-init.C
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+/*
+** _Z1fii:
+** ...
+** 	bfi	x0, x1, 32, 32
+** 	ret
+** ...
+*/
+_Complex int f(int a, int b) {
+    _Complex int t = a + b * 1i;
+    return t;
+}
+
+/*
+** _Z2f2ii:
+** ...
+** 	bfi	x0, x1, 32, 32
+** 	ret
+** ...
+*/
+_Complex int f2(int a, int b) {
+    _Complex int t = {a, b};
+    return t;
+}
+
+/*
+** _Z12f_convolutedii:
+** ...
+** 	bfi	x0, x1, 32, 32
+** 	ret
+** ...
+*/
+_Complex int f_convoluted(int a, int b) {
+    _Complex int t = (_Complex int)a;
+    __imag__ t = b;
+    return t;
+}




-- 

[-- Attachment #2: rb15778.patch --]
[-- Type: text/plain, Size: 2778 bytes --]

diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index f4404d7abe33b565358b7f609a91114c75ecf4e7..15ffca2ffe986bca56c1fae9381bd33f5d6b012d 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -947,9 +947,11 @@ validate_subreg (machine_mode omode, machine_mode imode,
 	   && GET_MODE_INNER (omode) == GET_MODE_INNER (imode))
     ;
   /* Subregs involving floating point modes are not allowed to
-     change size.  Therefore (subreg:DI (reg:DF) 0) is fine, but
+     change size unless it's an insert into a complex mode.
+     Therefore (subreg:DI (reg:DF) 0) and (subreg:CS (reg:SF) 0) are fine, but
      (subreg:SI (reg:DF) 0) isn't.  */
-  else if (FLOAT_MODE_P (imode) || FLOAT_MODE_P (omode))
+  else if ((FLOAT_MODE_P (imode) || FLOAT_MODE_P (omode))
+	   && !COMPLEX_MODE_P (omode))
     {
       if (! (known_eq (isize, osize)
 	     /* LRA can use subreg to store a floating point value in
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 5f7142b975ada2cd8b00663d35ba1e0004b8e28d..fce672c236fdbc4d40adb6e2614c234c02a61933 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -3740,7 +3740,17 @@ emit_move_complex_parts (rtx x, rtx y)
       && REG_P (x) && !reg_overlap_mentioned_p (x, y))
     emit_clobber (x);
 
-  write_complex_part (x, read_complex_part (y, false), false);
+  /* If we're writing the entire value using a concat into a register
+     then emit the lower part as a simple mov followed by an insert
+     into the top part.  */
+  if (GET_CODE (y) == CONCAT && !reload_completed && REG_P (x))
+    {
+      rtx val = XEXP (y, false);
+      rtx dest = lowpart_subreg (GET_MODE (val), x, GET_MODE (x));
+      emit_move_insn (dest, val);
+    }
+  else
+    write_complex_part (x, read_complex_part (y, false), false);
   write_complex_part (x, read_complex_part (y, true), true);
 
   return get_last_insn ();
diff --git a/gcc/testsuite/g++.target/aarch64/complex-init.C b/gcc/testsuite/g++.target/aarch64/complex-init.C
new file mode 100644
index 0000000000000000000000000000000000000000..497cc4bca3e2c59da95c871ceb5cc96216fc302d
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/complex-init.C
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+/*
+** _Z1fii:
+** ...
+** 	bfi	x0, x1, 32, 32
+** 	ret
+** ...
+*/
+_Complex int f(int a, int b) {
+    _Complex int t = a + b * 1i;
+    return t;
+}
+
+/*
+** _Z2f2ii:
+** ...
+** 	bfi	x0, x1, 32, 32
+** 	ret
+** ...
+*/
+_Complex int f2(int a, int b) {
+    _Complex int t = {a, b};
+    return t;
+}
+
+/*
+** _Z12f_convolutedii:
+** ...
+** 	bfi	x0, x1, 32, 32
+** 	ret
+** ...
+*/
+_Complex int f_convoluted(int a, int b) {
+    _Complex int t = (_Complex int)a;
+    __imag__ t = b;
+    return t;
+}




^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2022-08-29 10:52 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-09  7:52 [PATCH]middle-end Use subregs to expand COMPLEX_EXPR to set the lowpart Tamar Christina
2022-06-12 17:27 ` Jeff Law
2022-06-13 10:19   ` Tamar Christina
2022-06-13 17:42     ` Jeff Law
2022-06-13 11:54   ` Richard Biener
2022-06-13 17:34     ` Jeff Law
2022-06-15 11:36       ` Richard Sandiford
2022-06-16 11:22         ` Tamar Christina
2022-06-24 21:54         ` Jeff Law
2022-06-13  8:40 ` Richard Sandiford
2022-06-16 11:28   ` Tamar Christina
2022-06-17 17:13     ` Richard Sandiford
2022-06-20  8:00       ` Richard Sandiford
2022-07-05 15:05         ` Tamar Christina
2022-07-05 16:11           ` Richard Sandiford
2022-08-29 10:52             ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).