public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Using pli(paddi) and rotate to build 64bit constants
@ 2022-09-06 12:27 Jiufu Guo
  2022-09-06 16:35 ` Segher Boessenkool
  2022-09-07  1:26 ` Kewen.Lin
  0 siblings, 2 replies; 5+ messages in thread
From: Jiufu Guo @ 2022-09-06 12:27 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, guojiufu

Hi,

Test cases are updated/added, and code is refined as the comments in the
review for previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/600768.html

As mentioned in PR106550, since pli could support 34bits immediate, we could
use less instructions(3insn would be ok) to build 64bits constant with pli.

For example, for constant 0x020805006106003, we could generate it with:
asm code1:
pli 9,101736451 (0x6106003)
sldi 9,9,32
paddi 9,9, 2130000 (0x0208050)

or asm code2:
pli 10, 2130000
pli 9, 101736451
rldimi 9, 10, 32, 0

The asm code2 would be better.
This patch generates the asm code2 in split1 pass, this patch also supports
to generate asm code1 when splitter is only after RA.

This patch pass boostrap and regtest on ppc64. P10 testing is running.
Thanks for any comments!

BR,
Jeff(Jiufu)

	PR target/106550

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Use pli.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr106550.c: New test.
	* gcc.target/powerpc/pr106550_1.c: New test.

---
 gcc/config/rs6000/rs6000.cc                   | 35 +++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr106550.c   | 14 ++++++++
 gcc/testsuite/gcc.target/powerpc/pr106550_1.c | 22 ++++++++++++
 3 files changed, 71 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106550.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106550_1.c

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 2f3146e56f8..c05b7869141 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10181,6 +10181,41 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 			gen_rtx_IOR (DImode, copy_rtx (temp),
 				     GEN_INT (ud1)));
     }
+  else if (TARGET_PREFIXED)
+    {
+      if (can_create_pseudo_p ())
+	{
+	  /* pli A,H + pli B,L + rldimi A,B,32,0.  */
+	  temp = gen_reg_rtx (DImode);
+	  rtx temp1 = gen_reg_rtx (DImode);
+	  emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3));
+	  emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
+
+	  emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
+					   GEN_INT (0xffffffff)));
+	}
+      else
+	{
+	  /* pli A,H + sldi A,32 + paddi A,A,L.  */
+	  emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
+
+	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
+
+	  bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
+
+	  /* Use paddi for the low 32 bits.  */
+	  if (ud2 != 0 && ud1 != 0 && can_use_paddi)
+	    emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
+						GEN_INT ((ud2 << 16) | ud1)));
+
+	  /* Use oris, ori for low 32 bits.  */
+	  if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
+	    emit_move_insn (ud1 != 0 ? dest : dest,
+			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
+	  if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
+	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
+	}
+    }
   else
     {
       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c
new file mode 100644
index 00000000000..d023fac4676
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106550.c
@@ -0,0 +1,14 @@
+/* PR target/106550 */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+/* { dg-require-effective-target power10_ok } */
+
+void
+foo (unsigned long long *a)
+{
+  *a++ = 0x020805006106003; /* pli+pli+rldimi */
+  *a++ = 0x2351847027482577;/* pli+pli+rldimi */  
+}
+
+/* { dg-final { scan-assembler-times {\mpli\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
new file mode 100644
index 00000000000..48f76ca3da9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
@@ -0,0 +1,22 @@
+/* PR target/106550 */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1" } */
+/* force the constant splitter run after RA: -fdisable-rtl-split1.  */
+
+void
+foo (unsigned long long *a)
+{
+  /* Test oris/ori is used where paddi does not work with 'r0'. */
+  register long long d asm("r0") = 0x1245abcef9240dec; /* pli+sldi+oris+ori */
+  long long n;
+  asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
+  *a++ = n;
+
+  *a++ = 0x235a8470a7480000ULL; /* pli+sldi+oris*/
+  *a++ = 0x23a184700000b677ULL; /* pli+sldi+ori*/
+}
+
+/* { dg-final { scan-assembler-times {\mpli\M} 3 } } */
+/* { dg-final { scan-assembler-times {\msldi\M} 3 } } */
+/* { dg-final { scan-assembler-times {\moris\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mori\M} 2 } } */
-- 
2.17.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Using pli(paddi) and rotate to build 64bit constants
  2022-09-06 12:27 [PATCH] Using pli(paddi) and rotate to build 64bit constants Jiufu Guo
@ 2022-09-06 16:35 ` Segher Boessenkool
  2022-09-07  2:50   ` Jiufu Guo
  2022-09-07  1:26 ` Kewen.Lin
  1 sibling, 1 reply; 5+ messages in thread
From: Segher Boessenkool @ 2022-09-06 16:35 UTC (permalink / raw)
  To: Jiufu Guo; +Cc: gcc-patches, dje.gcc, linkw

Hi!

On Tue, Sep 06, 2022 at 08:27:56PM +0800, Jiufu Guo wrote:
> +      if (can_create_pseudo_p ())
> +	{
> +	  /* pli A,H + pli B,L + rldimi A,B,32,0.  */

A is the low one and B is the high one (the one that needs to be
shifted).  rl[wd]imi is one of our very few insns that has the output
register as one of the input registers as well.  "Good" to see not just
the compiler has problems with this, but humans do as well!  ;-)

Otherwise looks good to me.  Okay for trunk with that fixed somehow.
Thanks!


Segher

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Using pli(paddi) and rotate to build 64bit constants
  2022-09-06 12:27 [PATCH] Using pli(paddi) and rotate to build 64bit constants Jiufu Guo
  2022-09-06 16:35 ` Segher Boessenkool
@ 2022-09-07  1:26 ` Kewen.Lin
  2022-09-07  2:58   ` Jiufu Guo
  1 sibling, 1 reply; 5+ messages in thread
From: Kewen.Lin @ 2022-09-07  1:26 UTC (permalink / raw)
  To: Jiufu Guo; +Cc: dje.gcc, segher, linkw, gcc-patches

Hi!

> +
> +	  /* Use paddi for the low 32 bits.  */
> +	  if (ud2 != 0 && ud1 != 0 && can_use_paddi)
> +	    emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
> +						GEN_INT ((ud2 << 16) | ud1)));
> +
> +	  /* Use oris, ori for low 32 bits.  */
> +	  if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
> +	    emit_move_insn (ud1 != 0 ? dest : dest,

Nit: "ud1 != 0 ? dest : dest" => dest

> +			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
> +	  if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
> +	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
> +	}
> +    }
>    else
>      {
>        temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c
> new file mode 100644
> index 00000000000..d023fac4676
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106550.c
> @@ -0,0 +1,14 @@
> +/* PR target/106550 */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +/* { dg-require-effective-target power10_ok } */
> +
> +void
> +foo (unsigned long long *a)
> +{
> +  *a++ = 0x020805006106003; /* pli+pli+rldimi */
> +  *a++ = 0x2351847027482577;/* pli+pli+rldimi */  
> +}
> +
> +/* { dg-final { scan-assembler-times {\mpli\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
> new file mode 100644
> index 00000000000..48f76ca3da9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
> @@ -0,0 +1,22 @@
> +/* PR target/106550 */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1" } */
> +/* force the constant splitter run after RA: -fdisable-rtl-split1.  */
> +
> +void
> +foo (unsigned long long *a)
> +{
> +  /* Test oris/ori is used where paddi does not work with 'r0'. */
> +  register long long d asm("r0") = 0x1245abcef9240dec; /* pli+sldi+oris+ori */
> +  long long n;
> +  asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
> +  *a++ = n;
> +
> +  *a++ = 0x235a8470a7480000ULL; /* pli+sldi+oris*/
> +  *a++ = 0x23a184700000b677ULL; /* pli+sldi+ori*/

Nit: I guess you want one space at the separated end of these two comment lines
since the comment lines at the other places have.  :)

BR,
Kewen

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Using pli(paddi) and rotate to build 64bit constants
  2022-09-06 16:35 ` Segher Boessenkool
@ 2022-09-07  2:50   ` Jiufu Guo
  0 siblings, 0 replies; 5+ messages in thread
From: Jiufu Guo @ 2022-09-07  2:50 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: gcc-patches, dje.gcc, linkw

Segher Boessenkool <segher@kernel.crashing.org> writes:

> Hi!
>
> On Tue, Sep 06, 2022 at 08:27:56PM +0800, Jiufu Guo wrote:
>> +      if (can_create_pseudo_p ())
>> +	{
>> +	  /* pli A,H + pli B,L + rldimi A,B,32,0.  */
>
> A is the low one and B is the high one (the one that needs to be
> shifted).  rl[wd]imi is one of our very few insns that has the output
> register as one of the input registers as well.  "Good" to see not just
> the compiler has problems with this, but humans do as well!  ;-)
Yes, A is the low part and B is the high.
Thanks for catch this!

>
> Otherwise looks good to me.  Okay for trunk with that fixed somehow.
> Thanks!

Thanks again!

BR,
Jeff(Jiufu)
>
>
> Segher

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Using pli(paddi) and rotate to build 64bit constants
  2022-09-07  1:26 ` Kewen.Lin
@ 2022-09-07  2:58   ` Jiufu Guo
  0 siblings, 0 replies; 5+ messages in thread
From: Jiufu Guo @ 2022-09-07  2:58 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: dje.gcc, segher, linkw, gcc-patches

"Kewen.Lin" <linkw@linux.ibm.com> writes:

> Hi!
>
>> +
>> +	  /* Use paddi for the low 32 bits.  */
>> +	  if (ud2 != 0 && ud1 != 0 && can_use_paddi)
>> +	    emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
>> +						GEN_INT ((ud2 << 16) | ud1)));
>> +
>> +	  /* Use oris, ori for low 32 bits.  */
>> +	  if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
>> +	    emit_move_insn (ud1 != 0 ? dest : dest,
>
> Nit: "ud1 != 0 ? dest : dest" => dest
Thanks! Yes, only 'dest' is ok.
>
>> +			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
>> +	  if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
>> +	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
>> +	}
>> +    }
>>    else
>>      {
>>        temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c
>> new file mode 100644
>> index 00000000000..d023fac4676
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr106550.c
>> @@ -0,0 +1,14 @@
>> +/* PR target/106550 */
>> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
>> +/* { dg-require-effective-target power10_ok } */
>> +
>> +void
>> +foo (unsigned long long *a)
>> +{
>> +  *a++ = 0x020805006106003; /* pli+pli+rldimi */
>> +  *a++ = 0x2351847027482577;/* pli+pli+rldimi */  
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\mpli\M} 4 } } */
>> +/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */
>> +
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
>> new file mode 100644
>> index 00000000000..48f76ca3da9
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
>> @@ -0,0 +1,22 @@
>> +/* PR target/106550 */
>> +/* { dg-require-effective-target power10_ok } */
>> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -fdisable-rtl-split1" } */
>> +/* force the constant splitter run after RA: -fdisable-rtl-split1.  */
>> +
>> +void
>> +foo (unsigned long long *a)
>> +{
>> +  /* Test oris/ori is used where paddi does not work with 'r0'. */
>> +  register long long d asm("r0") = 0x1245abcef9240dec; /* pli+sldi+oris+ori */
>> +  long long n;
>> +  asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
>> +  *a++ = n;
>> +
>> +  *a++ = 0x235a8470a7480000ULL; /* pli+sldi+oris*/
>> +  *a++ = 0x23a184700000b677ULL; /* pli+sldi+ori*/
>
> Nit: I guess you want one space at the separated end of these two comment lines
> since the comment lines at the other places have.  :)
Yeap, thanks for your careful review!

BR,
Jeff(Jiufu)
>
> BR,
> Kewen

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-09-07  2:58 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-06 12:27 [PATCH] Using pli(paddi) and rotate to build 64bit constants Jiufu Guo
2022-09-06 16:35 ` Segher Boessenkool
2022-09-07  2:50   ` Jiufu Guo
2022-09-07  1:26 ` Kewen.Lin
2022-09-07  2:58   ` Jiufu Guo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).