[RFC]rs6000: split complicated constant to memory

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Jiufu Guo <guojiufu@linux.ibm.com>
To: gcc-patches@gcc.gnu.org
Cc: segher@kernel.crashing.org, dje.gcc@gmail.com, linkw@gcc.gnu.org,
	guojiufu@linux.ibm.com
Subject: [RFC]rs6000: split complicated constant to memory
Date: Mon, 15 Aug 2022 13:25:19 +0800	[thread overview]
Message-ID: <20220815052519.194582-1-guojiufu@linux.ibm.com> (raw)

Hi,

This patch tries to put the constant into constant pool if building the
constant requires 3 or more instructions.

But there is a concern: I'm wondering if this patch is really profitable.

Because, as I tested, 1. for simple case, if instructions are not been run
in parallel, loading constant from memory maybe faster; but 2. if there
are some instructions could run in parallel, loading constant from memory
are not win comparing with building constant.  As below examples.

For f1.c and f3.c, 'loading' constant would be acceptable in runtime aspect;
for f2.c and f4.c, 'loading' constant are visibly slower. 

For real-world cases, both kinds of code sequences exist.

So, I'm not sure if we need to push this patch.

Run a lot of times (1000000000) below functions to check runtime.
f1.c:
long foo (long *arg, long*, long *)
{
  *arg = 0x1234567800000000;
}
asm building constant:
	lis 10,0x1234
	ori 10,10,0x5678
	sldi 10,10,32
vs.  asm loading
	addis 10,2,.LC0@toc@ha
	ld 10,.LC0@toc@l(10)
The runtime between 'building' and 'loading' are similar: some times the
'building' is faster; sometimes 'loading' is faster. And the difference is
slight.

f2.c
long foo (long *arg, long *arg2, long *arg3)
{
  *arg = 0x1234567800000000;
  *arg2 = 0x7965234700000000;
  *arg3 = 0x4689123700000000;
}
asm building constant:
	lis 7,0x1234
	lis 10,0x7965
	lis 9,0x4689
	ori 7,7,0x5678
	ori 10,10,0x2347
	ori 9,9,0x1237
	sldi 7,7,32
	sldi 10,10,32
	sldi 9,9,32
vs. loading
	addis 7,2,.LC0@toc@ha
	addis 10,2,.LC1@toc@ha
	addis 9,2,.LC2@toc@ha
	ld 7,.LC0@toc@l(7)
	ld 10,.LC1@toc@l(10)
	ld 9,.LC2@toc@l(9)
For this case, 'loading' is always slower than 'building' (>15%).

f3.c
long foo (long *arg, long *, long *)
{
  *arg = 384307168202282325;
}
	lis 10,0x555
	ori 10,10,0x5555
	sldi 10,10,32
	oris 10,10,0x5555
	ori 10,10,0x5555
For this case, 'building' (through 5 instructions) are slower, and 'loading'
is faster ~5%;

f4.c
long foo (long *arg, long *arg2, long *arg3)
{
  *arg = 384307168202282325;
  *arg2 = -6148914691236517205;
  *arg3 = 768614336404564651;
}
	lis 7,0x555
	lis 10,0xaaaa
	lis 9,0xaaa
	ori 7,7,0x5555
	ori 10,10,0xaaaa
	ori 9,9,0xaaaa
	sldi 7,7,32
	sldi 10,10,32
	sldi 9,9,32
	oris 7,7,0x5555
	oris 10,10,0xaaaa
	oris 9,9,0xaaaa
	ori 7,7,0x5555
	ori 10,10,0xaaab
	ori 9,9,0xaaab
For this cases, since 'building' constant are parallel, 'loading' is slower:
~8%. On p10, 'loading'(through 'pld') is also slower >4%.


BR,
Jeff(Jiufu)

---
 gcc/config/rs6000/rs6000.cc                | 14 ++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr63281.c | 11 +++++++++++
 2 files changed, 25 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr63281.c

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 4b727d2a500..3798e11bdbc 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10098,6 +10098,20 @@ rs6000_emit_set_const (rtx dest, rtx source)
 	  c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
 	  emit_move_insn (lo, GEN_INT (c));
 	}
+      else if (base_reg_operand (dest, mode)
+	       && num_insns_constant (source, mode) > 2)
+	{
+	  rtx sym = force_const_mem (mode, source);
+	  if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0))
+	      && use_toc_relative_ref (XEXP (sym, 0), mode))
+	    {
+	      rtx toc = create_TOC_reference (XEXP (sym, 0), copy_rtx (dest));
+	      sym = gen_const_mem (mode, toc);
+	      set_mem_alias_set (sym, get_TOC_alias_set ());
+	    }
+
+	  emit_insn (gen_rtx_SET (dest, sym));
+	}
       else
 	rs6000_emit_set_long_const (dest, c);
       break;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr63281.c b/gcc/testsuite/gcc.target/powerpc/pr63281.c
new file mode 100644
index 00000000000..469a8f64400
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr63281.c
@@ -0,0 +1,11 @@
+/* PR target/63281 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -std=c99" } */
+
+void
+foo (unsigned long long *a)
+{
+  *a = 0x020805006106003;
+}
+
+/* { dg-final { scan-assembler-times {\mp?ld\M} 1 } } */
-- 
2.17.1

next             reply	other threads:[~2022-08-15  5:25 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-15  5:25 Jiufu Guo [this message]
2022-08-15  8:07 ` Richard Biener
2022-08-16  3:50   ` Jiufu Guo
2022-08-16  6:45     ` Jiufu Guo
2022-08-15 21:12 ` Segher Boessenkool
2022-08-17  2:32   ` Jiufu Guo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220815052519.194582-1-guojiufu@linux.ibm.com \
    --to=guojiufu@linux.ibm.com \
    --cc=dje.gcc@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=linkw@gcc.gnu.org \
    --cc=segher@kernel.crashing.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).