public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 1/2] Using pli(paddi) and rotate to build 64bit constants
@ 2022-09-01  3:24 Jiufu Guo
  2022-09-01 21:52 ` Segher Boessenkool
  2022-09-02  4:07 ` Kewen.Lin
  0 siblings, 2 replies; 11+ messages in thread
From: Jiufu Guo @ 2022-09-01  3:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, guojiufu, meissner

Hi,

As mentioned in PR106550, since pli could support 34bits immediate, we could
use less instructions(3insn would be ok) to build 64bits constant with pli.

For example, for constant 0x020805006106003, we could generate it with:
asm code1:
pli 9,101736451 (0x6106003)
sldi 9,9,32
paddi 9,9, 2130000 (0x0208050)

or asm code2:
pli 10, 2130000
pli 9, 101736451
rldimi 9, 10, 32, 0

Testing with simple cases as below, run them a lot of times:
f1.c
long __attribute__ ((noinline)) foo (long *arg,long *,long*)
{
  *arg = 0x2351847027482577;
}
5insns: base
pli+sldi+paddi: similar -0.08%
pli+pli+rldimi: faster +0.66%

f2.c
long __attribute__ ((noinline)) foo (long *arg, long *arg2, long *arg3)
{
  *arg = 0x2351847027482577;
  *arg2 = 0x3257845024384680;
  *arg3 = 0x1245abcef9240dec;
}
5nisns: base
pli+sldi+paddi: faster +1.35%
pli+pli+rldimi: faster +5.49%

f2.c would be more meaningful.  Because 'sched passes' are effective for
f2.c, but 'scheds' do less thing for f1.c.

Compare with previous patch:
https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599525.html
This one updates code slightly and extracts changes on rs6000.md to a
seperate patch.

This patch pass boostrap and regtest on ppc64le(includes p10).
Is it ok for trunk?

BR,
Jeff(Jiufu)


	PR target/106550

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add 'pli' for
	constant building.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr106550.c: New test.

---
 gcc/config/rs6000/rs6000.cc                 | 39 +++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr106550.c | 14 ++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106550.c

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index df491bee2ea..1ccb2ff30a1 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10181,6 +10181,45 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 			gen_rtx_IOR (DImode, copy_rtx (temp),
 				     GEN_INT (ud1)));
     }
+  else if (TARGET_PREFIXED)
+    {
+      /* pli 9,high32 + pli 10,low32 + rldimi 9,10,32,0.  */
+      if (can_create_pseudo_p ())
+	{
+	  temp = gen_reg_rtx (DImode);
+	  rtx temp1 = gen_reg_rtx (DImode);
+	  emit_move_insn (copy_rtx (temp), GEN_INT ((ud4 << 16) | ud3));
+	  emit_move_insn (copy_rtx (temp1), GEN_INT ((ud2 << 16) | ud1));
+
+	  emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
+					   GEN_INT (0xffffffff)));
+	}
+
+      /* pli 9,high32 + sldi 9,32 + paddi 9,9,low32.  */
+      else
+	{
+	  emit_move_insn (copy_rtx (dest), GEN_INT ((ud4 << 16) | ud3));
+
+	  emit_move_insn (copy_rtx (dest),
+			  gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+					  GEN_INT (32)));
+
+	  bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
+
+	  /* Use paddi for the low32 bits.  */
+	  if (ud2 != 0 && ud1 != 0 && can_use_paddi)
+	    emit_move_insn (dest, gen_rtx_PLUS (DImode, copy_rtx (dest),
+						GEN_INT ((ud2 << 16) | ud1)));
+	  /* Use oris, ori for low32 bits.  */
+	  if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
+	    emit_move_insn (ud1 != 0 ? copy_rtx (dest) : dest,
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud2 << 16)));
+	  if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
+	    emit_move_insn (dest, gen_rtx_IOR (DImode, copy_rtx (dest),
+					       GEN_INT (ud1)));
+	}
+    }
   else
     {
       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c
new file mode 100644
index 00000000000..c6f4116bb9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106550.c
@@ -0,0 +1,14 @@
+/* PR target/106550 */
+/* { dg-options "-O2 -std=c99 -mdejagnu-cpu=power10" } */
+
+void
+foo (unsigned long long *a)
+{
+  *a++ = 0x020805006106003;
+  *a++ = 0x2351847027482577;  
+}
+
+/* 3 insns for each constant: pli+sldi+paddi or pli+pli+rldimi.
+   And 3 additional insns: std+std+blr: 9 insns totally.  */
+/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 9 } } */
+
-- 
2.17.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-09-06 12:34 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-01  3:24 [PATCH 1/2] Using pli(paddi) and rotate to build 64bit constants Jiufu Guo
2022-09-01 21:52 ` Segher Boessenkool
2022-09-02  6:56   ` Jiufu Guo
2022-09-02 16:12     ` Segher Boessenkool
2022-09-05  6:25       ` Jiufu Guo
2022-09-05 13:42         ` Segher Boessenkool
2022-09-06 12:34           ` Jiufu Guo
2022-09-02 15:29   ` Peter Bergner
2022-09-02 16:20     ` Segher Boessenkool
2022-09-02  4:07 ` Kewen.Lin
2022-09-05  6:22   ` Jiufu Guo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).