public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Peter Bergner <bergner@linux.ibm.com>
To: Segher Boessenkool <segher@kernel.crashing.org>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>,
	Bill Schmidt <wschmidt@linux.ibm.com>
Subject: [PATCH] rs6000: inefficient 64-bit constant generation for consecutive 1-bits
Date: Thu, 10 Sep 2020 16:58:03 -0500	[thread overview]
Message-ID: <838b2e97-dfa9-3ca0-c3c6-1767d60ddf05@linux.ibm.com> (raw)

Generating arbitrary 64-bit constants on POWER can take up to 5 instructions.
However, some special constants can be generated in fewer instructions.
One special class of constants we don't handle, is constants that have one
set of consecutive 1-bits.  These can be generated with a "li rT,-1"
followed by a "rldic rX,rT,SH,MB" instruction.  The following patch
implements this idea.

This has passed bootstrap and regtesting on powerpc64le-linux with no
regressions.  Testing on powerpc64-linux is still running.
Ok for trunk if the BE testing comes back clean too?

Peter


gcc/
	PR target/93176
	* config/rs6000/rs6000.c (has_consecutive_ones): New function.
	(num_insns_constant_gpr): Use it.
	(rs6000_emit_set_long_const): Likewise.
	* config/rs6000/rs6000.md UNSPEC_RLDIC: New unspec.
	(rldic): New.

gcc/testsuite/
	PR target/93176
	* gcc.target/powerpc/pr93176.c: New test.


diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index ca5b71ecdd3..273cab14138 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5728,6 +5728,47 @@ direct_return (void)
   return 0;
 }
 
+/* Helper for num_insns_constant_gpr and rs6000_emit_set_long_const.
+   Return TRUE if VALUE contains one set of consecutive 1-bits.  Also set
+   *SH and *MB to values needed to generate VALUE with the rldic instruction.
+   We accept consecutive 1-bits that wrap from MSB to LSB, ex: 0xff00...00ff.
+   Otherwise, return FALSE.  */
+
+static bool
+has_consecutive_ones (unsigned HOST_WIDE_INT value, int *sh, int *mb)
+{
+  unsigned HOST_WIDE_INT nlz, ntz, mask;
+  unsigned HOST_WIDE_INT allones = -1;
+
+  ntz = ctz_hwi (value);
+  nlz = clz_hwi (value);
+  mask = (allones >> nlz) & (allones << ntz);
+  if (value == mask)
+    {
+      /* Compute beginning and ending bit numbers, using IBM bit numbering.  */
+      *mb = nlz;
+      *sh = ntz;
+      return true;
+    }
+
+  /* Check if the inverted value contains consecutive ones.  We can create
+     that constant by basically swapping the MB and ME bit numbers.  */
+  value = ~value;
+  ntz = ctz_hwi (value);
+  nlz = clz_hwi (value);
+  mask = (allones >> nlz) & (allones << ntz);
+  if (value == mask)
+    {
+      /* Compute beginning and ending bit numbers, using IBM bit numbering.  */
+      *mb = GET_MODE_BITSIZE (DImode) - ntz;
+      *sh = GET_MODE_BITSIZE (DImode) - nlz;
+      return true;
+    }
+
+  *sh = *mb = 0;
+  return false;
+}
+
 /* Helper for num_insns_constant.  Calculate number of instructions to
    load VALUE to a single gpr using combinations of addi, addis, ori,
    oris and sldi instructions.  */
@@ -5752,10 +5793,14 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
     {
       HOST_WIDE_INT low  = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
       HOST_WIDE_INT high = value >> 31;
+      int sh, mb;
 
       if (high == 0 || high == -1)
 	return 2;
 
+      if (has_consecutive_ones (value, &sh, &mb))
+	return 2;
+
       high >>= 1;
 
       if (low == 0)
@@ -9427,7 +9472,8 @@ static void
 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 {
   rtx temp;
-  HOST_WIDE_INT ud1, ud2, ud3, ud4;
+  HOST_WIDE_INT ud1, ud2, ud3, ud4, value = c;
+  int sh, mb;
 
   ud1 = c & 0xffff;
   c = c >> 16;
@@ -9453,6 +9499,12 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 			gen_rtx_IOR (DImode, copy_rtx (temp),
 				     GEN_INT (ud1)));
     }
+  else if (has_consecutive_ones (value, &sh, &mb))
+    {
+      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+      emit_insn (gen_rtx_SET (copy_rtx (temp), CONSTM1_RTX (DImode)));
+      emit_insn (gen_rldic (dest, copy_rtx (temp), GEN_INT (sh), GEN_INT (mb)));
+    }
   else if (ud3 == 0 && ud4 == 0)
     {
       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 43b620ae1c0..feb5884505c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -154,6 +154,7 @@
    UNSPEC_CNTTZDM
    UNSPEC_PDEPD
    UNSPEC_PEXTD
+   UNSPEC_RLDIC
   ])
 
 ;;
@@ -9173,6 +9174,14 @@
   DONE;
 })
 
+(define_insn "rldic"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
+		    (match_operand:DI 2 "u6bit_cint_operand" "n")
+		    (match_operand:DI 3 "u6bit_cint_operand" "n")]
+		   UNSPEC_RLDIC))]
+  "TARGET_POWERPC64"
+  "rldic %0,%1,%2,%3")
 \f
 ;; TImode/PTImode is similar, except that we usually want to compute the
 ;; address into a register and use lsi/stsi (the exception is during reload).
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93176.c b/gcc/testsuite/gcc.target/powerpc/pr93176.c
new file mode 100644
index 00000000000..d4d93f8f1b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr93176.c
@@ -0,0 +1,49 @@
+/* PR target/93176 */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+
+/* Verify we generate the correct 2 instruction sequence:
+   li rT,-1; rldic rX,rT,SH,MB for the constants below.  */
+
+unsigned long
+test0 (void)
+{
+   return 0x00ffffffffffff00UL;
+}
+
+unsigned long
+test1 (void)
+{
+   return 0x00ffffffff000000UL;
+}
+
+unsigned long
+test2 (void)
+{
+   return 0x00ffff0000000000UL;
+}
+
+unsigned long
+test3 (void)
+{
+   return 0xffffff0000000000UL;
+}
+
+unsigned long
+test4 (void)
+{
+   return 0xffffff000000ffffUL;
+}
+
+unsigned long
+test5 (void)
+{
+   return 0x0000010000000000UL;
+}
+
+/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,8,8" } } */
+/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,24,8" } } */
+/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,8" } } */
+/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,48" } } */
+/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,23" } } */

             reply	other threads:[~2020-09-10 21:58 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-10 21:58 Peter Bergner [this message]
2020-09-15  4:53 ` Alan Modra
2020-09-15 14:01   ` Segher Boessenkool
2020-09-15 13:56 ` Segher Boessenkool
2020-09-15 15:48   ` Peter Bergner
2020-09-15 19:54     ` Segher Boessenkool

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=838b2e97-dfa9-3ca0-c3c6-1767d60ddf05@linux.ibm.com \
    --to=bergner@linux.ibm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=segher@kernel.crashing.org \
    --cc=wschmidt@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).