public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 2/2] rs6000: building const with lis/li/pli+rlwinm
@ 2022-09-08  2:26 Jiufu Guo
  0 siblings, 0 replies; only message in thread
From: Jiufu Guo @ 2022-09-08  2:26 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, guojiufu

Hi,

We may use two instructions (rlwinm with mask + li/lis) to build 64bit constant.
For example: 'li 9,16383 + rlwinm 9,9,0,29,25' builds 0x00003fff00003fc7LL.

This updates rs6000_emit_set_long_const to building constants through rlwinm.

Bootstrap & regtest pass on ppc64 and ppc64le.
Is this ok for trunk?

BR,
Jeff(Jiufu)


	PR target/94395

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (from_rotate32): New function to check a 32
	bit value is rotate32 from li/lis.
	(check_rotate32_mask): New function to check sh/mb/me for rlwinm.
	(rs6000_emit_set_long_const): Use rlwinm to build constant.
	* config/rs6000/rs6000.md (rlwinm<mode>3): New define_insn.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr93012.c: Update insn count.
	* gcc.target/powerpc/pr94395_rlwinm.c: New test.
	* gcc.target/powerpc/pr94395_rlwinm.h: New file.
	* gcc.target/powerpc/pr94395_rlwinm_1.c: New test.

---
 gcc/config/rs6000/rs6000.cc                   | 83 ++++++++++++++++++-
 gcc/config/rs6000/rs6000.md                   | 11 +++
 gcc/testsuite/gcc.target/powerpc/pr93012.c    |  3 +-
 .../gcc.target/powerpc/pr94395_rlwinm.c       |  6 ++
 .../gcc.target/powerpc/pr94395_rlwinm.h       |  8 ++
 .../gcc.target/powerpc/pr94395_rlwinm_1.c     | 16 ++++
 6 files changed, 123 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm_1.c

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 93438b4da07..3b5a2f5a16e 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10110,7 +10110,8 @@ rs6000_emit_set_const (rtx dest, rtx source)
    Return -1 if C can not be rotated as from.  */
 
 static int
-rotate_from_leading_zeros_const (unsigned HOST_WIDE_INT c, int clz)
+rotate_from_leading_zeros_const (unsigned HOST_WIDE_INT c, int clz,
+				 bool rotl32 = false)
 {
   /* case a. 0..0xxx: already at least clz zeros.  */
   int lz = clz_hwi (c);
@@ -10126,7 +10127,9 @@ rotate_from_leading_zeros_const (unsigned HOST_WIDE_INT c, int clz)
 	       ^bit -> Vbit, then zeros are at head or tail.
 	     00...00xxx100, 'clz + 1' >= 'bits of xxxx'.  */
   const int rot_bits = HOST_BITS_PER_WIDE_INT - clz + 1;
-  unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
+  unsigned HOST_WIDE_INT rc;
+  rc = rotl32 ? (((c >> rot_bits) | (c << (32 - rot_bits))) & 0xFFFFFFFFULL)
+	      : (c >> rot_bits) | (c << (clz - 1));
   lz = clz_hwi (rc);
   tz = ctz_hwi (rc);
   if (lz + tz >= clz)
@@ -10319,6 +10322,71 @@ check_rotate_mask (unsigned HOST_WIDE_INT c, HOST_WIDE_INT *val, int *shift,
   return true;
 }
 
+/* For low 32bits of C, check if it can be rotated from an constant value
+   which contains count of leading zeros at least CLZ.  */
+
+static int
+from_rotate32 (unsigned HOST_WIDE_INT c)
+{
+  /* rotate32 from li possitive 17bits zeros (17 + 32 = 49).  */
+  int n = rotate_from_leading_zeros_const (c & 0xFFFFFFFFULL, 49, true);
+
+  /* rotate32 from li negative.  */
+  if (n < 0)
+    n = rotate_from_leading_zeros_const ((~c) & 0xFFFFFFFFULL, 49, true);
+
+  /* rotate32 from lis negative.  */
+  if (n < 0)
+    {
+      n = rotate_from_leading_zeros_const (c & 0xFFFFFFFFULL, 48, true);
+      if (n >= 0)
+	n += 16;
+    }
+
+  return n < 0 ? -1 : (n % 32);
+}
+
+/* Check if value C can be generated by 2 instructions, one instruction
+   is li/lis or pli, another instruction is rlwinm.  */
+
+static bool
+check_rotate32_mask (unsigned HOST_WIDE_INT c, HOST_WIDE_INT *val, int *shift,
+		     int *mb, int *me, bool for_pli)
+{
+  unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
+  unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
+  unsigned HOST_WIDE_INT v;
+  int b, e;
+
+  /* diff of high and low (high ^ low) should be the mask position.  */
+  unsigned HOST_WIDE_INT m = low ^ high;
+  int tz = ctz_hwi (m);
+  int lz = clz_hwi (m);
+  b = m == 0 ? 1 : (high != 0 ? 32 - tz : lz - 32);
+  e = m == 0 ? 0 : (high != 0 ? lz - 33 : 31 - tz);
+  if (m != 0)
+    m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
+  if (high != 0)
+    m = ~m;
+  v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
+
+  if ((high != 0) && ((v & m) != low || e < 0 || b > 31))
+    return false;
+
+  int n = for_pli ? 0 : from_rotate32 (v);
+  if (n < 0)
+    return false;
+
+  v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
+  if (v & 0x80000000ULL)
+    v |= HOST_WIDE_INT_M1U << 32;
+  *me = e;
+  *mb = b;
+  *val = v;
+  *shift = n;
+  return true;
+}
+
 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
    Output insns to set DEST equal to the constant C as a series of
    lis, ori and shl instructions.  */
@@ -10330,7 +10398,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
   HOST_WIDE_INT ud1, ud2, ud3, ud4;
   HOST_WIDE_INT orig_c = c;
   HOST_WIDE_INT val = c;
-  int shift;
+  int shift, mb, me;
   unsigned HOST_WIDE_INT mask;
 
   ud1 = c & 0xffff;
@@ -10391,6 +10459,15 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 	x = gen_rtx_AND (DImode, x, GEN_INT (mask));
       emit_move_insn (dest, x);
     }
+  else if (check_rotate32_mask (orig_c, &val, &shift, &mb, &me, false)
+	   || (TARGET_PREFIXED
+	       && check_rotate32_mask (orig_c, &val, &shift, &mb, &me, true)))
+    {
+      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+      emit_move_insn (temp, GEN_INT (val));
+      emit_insn (gen_rlwinmdi3 (dest, copy_rtx (temp), GEN_INT (shift),
+				GEN_INT (mb), GEN_INT (me)));
+    }
   else if (ud3 == 0 && ud4 == 0)
     {
       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index e9e5cd1e54d..ae60d2c958f 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -4101,6 +4101,17 @@ (define_insn "*rotl<mode>3_mask"
   [(set_attr "type" "shift")
    (set_attr "maybe_var_shift" "yes")])
 
+(define_insn "rlwinm<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	 (and:GPR (rotate:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		             (match_operand:SI 2 "immediate_operand" "n"))
+		 (unspec:DI [(match_operand:SI 3 "immediate_operand" "n")
+			     (match_operand:SI 4 "immediate_operand" "n")]
+	UNSPEC_AND)))]
+  "UINTVAL (operands[3]) < 32 && UINTVAL (operands[4]) < 32"
+  "rlwinm %0,%1,%2,%3,%4"
+  [(set_attr "type" "shift")])
+
 (define_insn_and_split "*rotl<mode>3_mask_dot"
   [(set (match_operand:CC 5 "cc_reg_operand" "=x,?y")
 	(compare:CC
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
index 4f764d0576f..aaad9ede831 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
@@ -10,4 +10,5 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
 unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
 unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
 
-/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
+/* { dg-final { scan-assembler-times {\mrldimi\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mrlwinm\M} 4 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.c b/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.c
new file mode 100644
index 00000000000..80b0c4ebd64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.c
@@ -0,0 +1,6 @@
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-do compile { target has_arch_ppc64 } } */
+#include "pr94395_rlwinm.h"
+
+/* { dg-final { scan-assembler-times {\mli\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mrlwinm\M} 3 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.h b/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.h
new file mode 100644
index 00000000000..6edadd261ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.h
@@ -0,0 +1,8 @@
+/* using 2 instructions(rlwinm) to build constants.   */
+void  __attribute__ ((__noinline__, __noclone__))
+foo (long long *arg)
+{
+  *arg++ = 0x00000000faaabf80ULL;
+  *arg++ = 0x0002aaa80002aaa8ULL;
+  *arg++ = 0x00003fff00003fc7ULL;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm_1.c b/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm_1.c
new file mode 100644
index 00000000000..f8a5f69bf3e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm_1.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "pr94395_rlwinm.h"
+
+long long arr1[] = {0xfaaabf80ULL, 0x2aaa80002aaa8ULL, 0x3fff00003fc7ULL};
+int
+main ()
+{
+  long long a[3];
+
+  foo (a);
+  if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
+    __builtin_abort ();
+  return 0;
+}
-- 
2.17.1


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-09-08  2:26 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-08  2:26 [PATCH 2/2] rs6000: building const with lis/li/pli+rlwinm Jiufu Guo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).