* [PATCH] add rlwinm pattern for DImode for constant building
@ 2024-04-22 2:35 Jiufu Guo
2024-05-17 3:17 ` Jiufu Guo
0 siblings, 1 reply; 4+ messages in thread
From: Jiufu Guo @ 2024-04-22 2:35 UTC (permalink / raw)
To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner, guojiufu
Hi,
'rlwinm' pattern is already well used for SImode. As this instruction
can touch the whole 64bit register, so some constants in 64bit(DImode)
can be built via 'lis/li+rlwinm'. To achieve this, a new pattern for
'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check
if a constant is able to be built by 'lis/li; rlwinm'.
Bootstrap and regtest pass on ppc64{,le}.
Is this patch ok for trunk (when stage1 is open)?
Jeff (Jiufu Guo).
gcc/ChangeLog:
* config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new
parameter.
* config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function.
(rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'.
(can_be_rotated_to_lowbits): Add new parameter.
* config/rs6000/rs6000.md (rlwinm_di_mask): New pattern.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'.
* gcc.target/powerpc/rlwinm4di-1.c: New test.
* gcc.target/powerpc/rlwinm4di-2.c: New test.
* gcc.target/powerpc/rlwinm4di.c: New test.
* gcc.target/powerpc/rlwinm4di.h: New test.
---
gcc/config/rs6000/rs6000-protos.h | 2 +-
gcc/config/rs6000/rs6000.cc | 65 ++++++++++++++++++-
gcc/config/rs6000/rs6000.md | 18 +++++
gcc/testsuite/gcc.target/powerpc/pr93012.c | 2 +-
.../gcc.target/powerpc/rlwinm4di-1.c | 25 +++++++
.../gcc.target/powerpc/rlwinm4di-2.c | 19 ++++++
gcc/testsuite/gcc.target/powerpc/rlwinm4di.c | 6 ++
gcc/testsuite/gcc.target/powerpc/rlwinm4di.h | 25 +++++++
8 files changed, 158 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 09a57a806fa..10505a8061a 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
extern int vspltis_shifted (rtx);
extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
-extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *);
+extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false);
extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
extern int num_insns_constant (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 6ba9df4f02e..853eaede673 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
return false;
}
+/* Check if value C can be generated by 2 instructions, one instruction
+ is li/lis, another instruction is rlwinm. */
+
+static bool
+can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
+ int *shift, HOST_WIDE_INT *mask)
+{
+ unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
+ unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
+ unsigned HOST_WIDE_INT v;
+
+ /* diff of high and low (high ^ low) should be the mask position. */
+ unsigned HOST_WIDE_INT m = low ^ high;
+ int tz = ctz_hwi (m);
+ int lz = clz_hwi (m);
+ if (m != 0)
+ m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
+ if (high != 0)
+ m = ~m;
+ v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
+
+ if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
+ return false;
+
+ /* rotl32 on positive/negative value of 'li' 15/16bits. */
+ int n;
+ if (!can_be_rotated_to_lowbits (v, 15, &n, true)
+ && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true))
+ {
+ /* rotate32 from a negative value of 'lis'. */
+ if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true))
+ return false;
+ n += 16;
+ }
+ n = 32 - (n % 32);
+ n %= 32;
+ v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
+ if (v & 0x80000000ULL)
+ v |= HOST_WIDE_INT_M1U << 32;
+ *mask = m;
+ *val = v;
+ *shift = n;
+ return true;
+}
+
/* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
Output insns to set DEST equal to the constant C as a series of
lis, ori and shl instructions. If NUM_INSNS is not NULL, then
@@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
return;
}
+ HOST_WIDE_INT val;
+ if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
+ {
+ /* li/lis; rlwinm */
+ count_or_emit_insn (temp, GEN_INT (val));
+ rtx low = temp ? gen_lowpart (SImode, temp) : nullptr;
+ rtx m = GEN_INT (mask);
+ rtx n = GEN_INT (shift);
+ count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m));
+ return;
+ }
+
if (ud3 == 0 && ud4 == 0)
{
gcc_assert ((ud2 & 0x8000) && ud1 != 0);
@@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
Return false otherwise. */
bool
-can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
+can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot,
+ bool rotl32)
{
int clz = HOST_BITS_PER_WIDE_INT - lowbits;
@@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
^bit -> Vbit, , then zeros are at head or tail.
00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
const int rot_bits = lowbits + 1;
- unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
+ unsigned HOST_WIDE_INT rc;
+ rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits)
+ | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL)))
+ : (c >> rot_bits) | (c << (clz - 1));
tz = ctz_hwi (rc);
if (clz_hwi (rc) + tz >= clz)
{
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bc8bc6ab060..8a82ba3e26c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2"
(set_attr "dot" "yes")
(set_attr "length" "4,8")])
+; define an insn about rlwinm for DI mode (with high part content)
+(define_insn "rlwinm_di_mask"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (and:DI (plus:DI
+ (ashift:DI (subreg:DI
+ (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "n")) 0)
+ (const_int 32))
+ (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2))))
+ (match_operand:DI 3 "const_int_operand" "n")))]
+ "rs6000_is_valid_and_mask (operands[3], SImode)"
+{
+ return UINTVAL (operands[3]) == -1ULL ?
+ "rlwinm %0,%1,%h2,1,0" : "rlwinm %0,%1,%h2,%3";
+}
+ [(set_attr "type" "shift")
+ (set_attr "maybe_var_shift" "yes")])
+
; Special case for less-than-0. We can do it with just one machine
; instruction, but the generic optimizers do not realise it is cheap.
(define_insn "*lt0_<mode>di"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
index 4f764d0576f..70ddfaa21da 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
@@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
-/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
+/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
new file mode 100644
index 00000000000..8959578143b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "rlwinm4di.h"
+
+long long arr1[] = {
+ 0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL,
+ 0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL,
+ 0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL,
+ 0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL,
+ 0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL,
+ 0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL,
+ 0x0002000100000001ULL, 0x0002000100020001ULL,
+};
+
+int
+main ()
+{
+ long long a[sizeof (arr1) / sizeof (arr1[0])];
+
+ foo (a);
+ if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
new file mode 100644
index 00000000000..9494d0327b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
@@ -0,0 +1,19 @@
+/* { dg-options "-O2 -mno-prefixed" } */
+/* { dg-do compile { target has_arch_ppc64 } } */
+
+#define N 5
+#define MASK 0xffffffffe0000003ULL
+
+typedef unsigned long long int64;
+
+int64
+foo (int64 v)
+{
+ unsigned int v1 = v;
+ unsigned int v2 = ((v1 << N) | (v1 >> (32 - N)));
+ return ((int64) v2 | ((int64) v2 << 32)) & MASK;
+}
+
+/* { dg-final { scan-assembler-not {\mor\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
+/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
new file mode 100644
index 00000000000..fcbc8f8d742
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
@@ -0,0 +1,6 @@
+/* { dg-options "-O2 -mno-prefixed" } */
+/* { dg-do compile { target has_arch_ppc64 } } */
+#include "rlwinm4di.h"
+
+/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
new file mode 100644
index 00000000000..59fe739ca85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
@@ -0,0 +1,25 @@
+/* using 2 instructions(rlwinm) to build constants. */
+void __attribute__ ((__noinline__, __noclone__))
+foo (long long *arg)
+{
+ *arg++ = 0x0000400100000001ULL;
+ *arg++ = 0x0000000200000002ULL;
+ *arg++ = 0xffff8000bfff8000ULL;
+ *arg++ = 0xffff8001ffff8001ULL;
+ *arg++ = 0x0000800100000001ULL;
+ *arg++ = 0x0000800100008001ULL;
+ *arg++ = 0x0000800200000002ULL;
+ *arg++ = 0x0000800000008000ULL;
+ *arg++ = 0x0000000080008000ULL;
+ *arg++ = 0xffff0001bfff0001ULL;
+ *arg++ = 0xffff0001ffff0001ULL;
+ *arg++ = 0x0001000200000002ULL;
+ *arg++ = 0x8001000080010000ULL;
+ *arg++ = 0x0004000100000001ULL;
+ *arg++ = 0x0004000100040001ULL;
+ *arg++ = 0x00000000bfffe001ULL;
+ *arg++ = 0x0003fffe0001fffeULL;
+ *arg++ = 0x0003fffe0003fffeULL;
+ *arg++ = 0x0002000100000001ULL;
+ *arg++ = 0x0002000100020001ULL;
+}
--
2.25.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] add rlwinm pattern for DImode for constant building
2024-04-22 2:35 [PATCH] add rlwinm pattern for DImode for constant building Jiufu Guo
@ 2024-05-17 3:17 ` Jiufu Guo
2024-06-06 1:53 ` Jiufu Guo
0 siblings, 1 reply; 4+ messages in thread
From: Jiufu Guo @ 2024-05-17 3:17 UTC (permalink / raw)
To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner
Hi,
Gentle ping ...
BR,
Jeff(Jiufu) Guo
Jiufu Guo <guojiufu@linux.ibm.com> writes:
> Hi,
>
> 'rlwinm' pattern is already well used for SImode. As this instruction
> can touch the whole 64bit register, so some constants in 64bit(DImode)
> can be built via 'lis/li+rlwinm'. To achieve this, a new pattern for
> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check
> if a constant is able to be built by 'lis/li; rlwinm'.
>
> Bootstrap and regtest pass on ppc64{,le}.
>
> Is this patch ok for trunk (when stage1 is open)?
>
> Jeff (Jiufu Guo).
>
> gcc/ChangeLog:
>
> * config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new
> parameter.
> * config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function.
> (rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'.
> (can_be_rotated_to_lowbits): Add new parameter.
> * config/rs6000/rs6000.md (rlwinm_di_mask): New pattern.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'.
> * gcc.target/powerpc/rlwinm4di-1.c: New test.
> * gcc.target/powerpc/rlwinm4di-2.c: New test.
> * gcc.target/powerpc/rlwinm4di.c: New test.
> * gcc.target/powerpc/rlwinm4di.h: New test.
>
> ---
> gcc/config/rs6000/rs6000-protos.h | 2 +-
> gcc/config/rs6000/rs6000.cc | 65 ++++++++++++++++++-
> gcc/config/rs6000/rs6000.md | 18 +++++
> gcc/testsuite/gcc.target/powerpc/pr93012.c | 2 +-
> .../gcc.target/powerpc/rlwinm4di-1.c | 25 +++++++
> .../gcc.target/powerpc/rlwinm4di-2.c | 19 ++++++
> gcc/testsuite/gcc.target/powerpc/rlwinm4di.c | 6 ++
> gcc/testsuite/gcc.target/powerpc/rlwinm4di.h | 25 +++++++
> 8 files changed, 158 insertions(+), 4 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>
> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> index 09a57a806fa..10505a8061a 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
> extern int vspltis_shifted (rtx);
> extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
> extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *);
> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false);
> extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
> extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
> extern int num_insns_constant (rtx, machine_mode);
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 6ba9df4f02e..853eaede673 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
> return false;
> }
>
> +/* Check if value C can be generated by 2 instructions, one instruction
> + is li/lis, another instruction is rlwinm. */
> +
> +static bool
> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
> + int *shift, HOST_WIDE_INT *mask)
> +{
> + unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
> + unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
> + unsigned HOST_WIDE_INT v;
> +
> + /* diff of high and low (high ^ low) should be the mask position. */
> + unsigned HOST_WIDE_INT m = low ^ high;
> + int tz = ctz_hwi (m);
> + int lz = clz_hwi (m);
> + if (m != 0)
> + m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
> + if (high != 0)
> + m = ~m;
> + v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
> +
> + if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
> + return false;
> +
> + /* rotl32 on positive/negative value of 'li' 15/16bits. */
> + int n;
> + if (!can_be_rotated_to_lowbits (v, 15, &n, true)
> + && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true))
> + {
> + /* rotate32 from a negative value of 'lis'. */
> + if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true))
> + return false;
> + n += 16;
> + }
> + n = 32 - (n % 32);
> + n %= 32;
> + v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
> + if (v & 0x80000000ULL)
> + v |= HOST_WIDE_INT_M1U << 32;
> + *mask = m;
> + *val = v;
> + *shift = n;
> + return true;
> +}
> +
> /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
> Output insns to set DEST equal to the constant C as a series of
> lis, ori and shl instructions. If NUM_INSNS is not NULL, then
> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
> return;
> }
>
> + HOST_WIDE_INT val;
> + if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
> + {
> + /* li/lis; rlwinm */
> + count_or_emit_insn (temp, GEN_INT (val));
> + rtx low = temp ? gen_lowpart (SImode, temp) : nullptr;
> + rtx m = GEN_INT (mask);
> + rtx n = GEN_INT (shift);
> + count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m));
> + return;
> + }
> +
> if (ud3 == 0 && ud4 == 0)
> {
> gcc_assert ((ud2 & 0x8000) && ud1 != 0);
> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
> Return false otherwise. */
>
> bool
> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot,
> + bool rotl32)
> {
> int clz = HOST_BITS_PER_WIDE_INT - lowbits;
>
> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
> ^bit -> Vbit, , then zeros are at head or tail.
> 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
> const int rot_bits = lowbits + 1;
> - unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
> + unsigned HOST_WIDE_INT rc;
> + rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits)
> + | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL)))
> + : (c >> rot_bits) | (c << (clz - 1));
> tz = ctz_hwi (rc);
> if (clz_hwi (rc) + tz >= clz)
> {
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index bc8bc6ab060..8a82ba3e26c 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2"
> (set_attr "dot" "yes")
> (set_attr "length" "4,8")])
>
> +; define an insn about rlwinm for DI mode (with high part content)
> +(define_insn "rlwinm_di_mask"
> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> + (and:DI (plus:DI
> + (ashift:DI (subreg:DI
> + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
> + (match_operand:SI 2 "const_int_operand" "n")) 0)
> + (const_int 32))
> + (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2))))
> + (match_operand:DI 3 "const_int_operand" "n")))]
> + "rs6000_is_valid_and_mask (operands[3], SImode)"
> +{
> + return UINTVAL (operands[3]) == -1ULL ?
> + "rlwinm %0,%1,%h2,1,0" : "rlwinm %0,%1,%h2,%3";
> +}
> + [(set_attr "type" "shift")
> + (set_attr "maybe_var_shift" "yes")])
> +
> ; Special case for less-than-0. We can do it with just one machine
> ; instruction, but the generic optimizers do not realise it is cheap.
> (define_insn "*lt0_<mode>di"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
> index 4f764d0576f..70ddfaa21da 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
> unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
> unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
>
> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
> new file mode 100644
> index 00000000000..8959578143b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +#include "rlwinm4di.h"
> +
> +long long arr1[] = {
> + 0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL,
> + 0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL,
> + 0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL,
> + 0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL,
> + 0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL,
> + 0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL,
> + 0x0002000100000001ULL, 0x0002000100020001ULL,
> +};
> +
> +int
> +main ()
> +{
> + long long a[sizeof (arr1) / sizeof (arr1[0])];
> +
> + foo (a);
> + if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
> + __builtin_abort ();
> + return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
> new file mode 100644
> index 00000000000..9494d0327b4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-options "-O2 -mno-prefixed" } */
> +/* { dg-do compile { target has_arch_ppc64 } } */
> +
> +#define N 5
> +#define MASK 0xffffffffe0000003ULL
> +
> +typedef unsigned long long int64;
> +
> +int64
> +foo (int64 v)
> +{
> + unsigned int v1 = v;
> + unsigned int v2 = ((v1 << N) | (v1 >> (32 - N)));
> + return ((int64) v2 | ((int64) v2 << 32)) & MASK;
> +}
> +
> +/* { dg-final { scan-assembler-not {\mor\M} } } */
> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
> new file mode 100644
> index 00000000000..fcbc8f8d742
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
> @@ -0,0 +1,6 @@
> +/* { dg-options "-O2 -mno-prefixed" } */
> +/* { dg-do compile { target has_arch_ppc64 } } */
> +#include "rlwinm4di.h"
> +
> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
> new file mode 100644
> index 00000000000..59fe739ca85
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
> @@ -0,0 +1,25 @@
> +/* using 2 instructions(rlwinm) to build constants. */
> +void __attribute__ ((__noinline__, __noclone__))
> +foo (long long *arg)
> +{
> + *arg++ = 0x0000400100000001ULL;
> + *arg++ = 0x0000000200000002ULL;
> + *arg++ = 0xffff8000bfff8000ULL;
> + *arg++ = 0xffff8001ffff8001ULL;
> + *arg++ = 0x0000800100000001ULL;
> + *arg++ = 0x0000800100008001ULL;
> + *arg++ = 0x0000800200000002ULL;
> + *arg++ = 0x0000800000008000ULL;
> + *arg++ = 0x0000000080008000ULL;
> + *arg++ = 0xffff0001bfff0001ULL;
> + *arg++ = 0xffff0001ffff0001ULL;
> + *arg++ = 0x0001000200000002ULL;
> + *arg++ = 0x8001000080010000ULL;
> + *arg++ = 0x0004000100000001ULL;
> + *arg++ = 0x0004000100040001ULL;
> + *arg++ = 0x00000000bfffe001ULL;
> + *arg++ = 0x0003fffe0001fffeULL;
> + *arg++ = 0x0003fffe0003fffeULL;
> + *arg++ = 0x0002000100000001ULL;
> + *arg++ = 0x0002000100020001ULL;
> +}
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] add rlwinm pattern for DImode for constant building
2024-05-17 3:17 ` Jiufu Guo
@ 2024-06-06 1:53 ` Jiufu Guo
2024-06-21 3:06 ` Ping^3 " Jiufu Guo
0 siblings, 1 reply; 4+ messages in thread
From: Jiufu Guo @ 2024-06-06 1:53 UTC (permalink / raw)
To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner
Hi,
Gentle ping ...
Jiufu Guo <guojiufu@linux.ibm.com> writes:
> Hi,
>
> Gentle ping ...
>
> BR,
> Jeff(Jiufu) Guo
>
> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>
>> Hi,
>>
>> 'rlwinm' pattern is already well used for SImode. As this instruction
>> can touch the whole 64bit register, so some constants in 64bit(DImode)
>> can be built via 'lis/li+rlwinm'. To achieve this, a new pattern for
>> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check
>> if a constant is able to be built by 'lis/li; rlwinm'.
>>
>> Bootstrap and regtest pass on ppc64{,le}.
>>
>> Is this patch ok for trunk (when stage1 is open)?
Is this patch ok for trunk?
BR,
Jeff(Jiufu) Guo
>>
>> Jeff (Jiufu Guo).
>>
>> gcc/ChangeLog:
>>
>> * config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new
>> parameter.
>> * config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function.
>> (rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'.
>> (can_be_rotated_to_lowbits): Add new parameter.
>> * config/rs6000/rs6000.md (rlwinm_di_mask): New pattern.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'.
>> * gcc.target/powerpc/rlwinm4di-1.c: New test.
>> * gcc.target/powerpc/rlwinm4di-2.c: New test.
>> * gcc.target/powerpc/rlwinm4di.c: New test.
>> * gcc.target/powerpc/rlwinm4di.h: New test.
>>
>> ---
>> gcc/config/rs6000/rs6000-protos.h | 2 +-
>> gcc/config/rs6000/rs6000.cc | 65 ++++++++++++++++++-
>> gcc/config/rs6000/rs6000.md | 18 +++++
>> gcc/testsuite/gcc.target/powerpc/pr93012.c | 2 +-
>> .../gcc.target/powerpc/rlwinm4di-1.c | 25 +++++++
>> .../gcc.target/powerpc/rlwinm4di-2.c | 19 ++++++
>> gcc/testsuite/gcc.target/powerpc/rlwinm4di.c | 6 ++
>> gcc/testsuite/gcc.target/powerpc/rlwinm4di.h | 25 +++++++
>> 8 files changed, 158 insertions(+), 4 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>
>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
>> index 09a57a806fa..10505a8061a 100644
>> --- a/gcc/config/rs6000/rs6000-protos.h
>> +++ b/gcc/config/rs6000/rs6000-protos.h
>> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
>> extern int vspltis_shifted (rtx);
>> extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
>> extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
>> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *);
>> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false);
>> extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
>> extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
>> extern int num_insns_constant (rtx, machine_mode);
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index 6ba9df4f02e..853eaede673 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
>> return false;
>> }
>>
>> +/* Check if value C can be generated by 2 instructions, one instruction
>> + is li/lis, another instruction is rlwinm. */
>> +
>> +static bool
>> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
>> + int *shift, HOST_WIDE_INT *mask)
>> +{
>> + unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
>> + unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
>> + unsigned HOST_WIDE_INT v;
>> +
>> + /* diff of high and low (high ^ low) should be the mask position. */
>> + unsigned HOST_WIDE_INT m = low ^ high;
>> + int tz = ctz_hwi (m);
>> + int lz = clz_hwi (m);
>> + if (m != 0)
>> + m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
>> + if (high != 0)
>> + m = ~m;
>> + v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
>> +
>> + if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
>> + return false;
>> +
>> + /* rotl32 on positive/negative value of 'li' 15/16bits. */
>> + int n;
>> + if (!can_be_rotated_to_lowbits (v, 15, &n, true)
>> + && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true))
>> + {
>> + /* rotate32 from a negative value of 'lis'. */
>> + if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true))
>> + return false;
>> + n += 16;
>> + }
>> + n = 32 - (n % 32);
>> + n %= 32;
>> + v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
>> + if (v & 0x80000000ULL)
>> + v |= HOST_WIDE_INT_M1U << 32;
>> + *mask = m;
>> + *val = v;
>> + *shift = n;
>> + return true;
>> +}
>> +
>> /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>> Output insns to set DEST equal to the constant C as a series of
>> lis, ori and shl instructions. If NUM_INSNS is not NULL, then
>> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>> return;
>> }
>>
>> + HOST_WIDE_INT val;
>> + if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
>> + {
>> + /* li/lis; rlwinm */
>> + count_or_emit_insn (temp, GEN_INT (val));
>> + rtx low = temp ? gen_lowpart (SImode, temp) : nullptr;
>> + rtx m = GEN_INT (mask);
>> + rtx n = GEN_INT (shift);
>> + count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m));
>> + return;
>> + }
>> +
>> if (ud3 == 0 && ud4 == 0)
>> {
>> gcc_assert ((ud2 & 0x8000) && ud1 != 0);
>> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
>> Return false otherwise. */
>>
>> bool
>> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot,
>> + bool rotl32)
>> {
>> int clz = HOST_BITS_PER_WIDE_INT - lowbits;
>>
>> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>> ^bit -> Vbit, , then zeros are at head or tail.
>> 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
>> const int rot_bits = lowbits + 1;
>> - unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
>> + unsigned HOST_WIDE_INT rc;
>> + rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits)
>> + | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL)))
>> + : (c >> rot_bits) | (c << (clz - 1));
>> tz = ctz_hwi (rc);
>> if (clz_hwi (rc) + tz >= clz)
>> {
>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>> index bc8bc6ab060..8a82ba3e26c 100644
>> --- a/gcc/config/rs6000/rs6000.md
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2"
>> (set_attr "dot" "yes")
>> (set_attr "length" "4,8")])
>>
>> +; define an insn about rlwinm for DI mode (with high part content)
>> +(define_insn "rlwinm_di_mask"
>> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>> + (and:DI (plus:DI
>> + (ashift:DI (subreg:DI
>> + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
>> + (match_operand:SI 2 "const_int_operand" "n")) 0)
>> + (const_int 32))
>> + (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2))))
>> + (match_operand:DI 3 "const_int_operand" "n")))]
>> + "rs6000_is_valid_and_mask (operands[3], SImode)"
>> +{
>> + return UINTVAL (operands[3]) == -1ULL ?
>> + "rlwinm %0,%1,%h2,1,0" : "rlwinm %0,%1,%h2,%3";
>> +}
>> + [(set_attr "type" "shift")
>> + (set_attr "maybe_var_shift" "yes")])
>> +
>> ; Special case for less-than-0. We can do it with just one machine
>> ; instruction, but the generic optimizers do not realise it is cheap.
>> (define_insn "*lt0_<mode>di"
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>> index 4f764d0576f..70ddfaa21da 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
>> unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
>> unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
>>
>> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
>> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>> new file mode 100644
>> index 00000000000..8959578143b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>> @@ -0,0 +1,25 @@
>> +/* { dg-do run } */
>> +/* { dg-options "-O2" } */
>> +
>> +#include "rlwinm4di.h"
>> +
>> +long long arr1[] = {
>> + 0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL,
>> + 0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL,
>> + 0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL,
>> + 0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL,
>> + 0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL,
>> + 0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL,
>> + 0x0002000100000001ULL, 0x0002000100020001ULL,
>> +};
>> +
>> +int
>> +main ()
>> +{
>> + long long a[sizeof (arr1) / sizeof (arr1[0])];
>> +
>> + foo (a);
>> + if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
>> + __builtin_abort ();
>> + return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>> new file mode 100644
>> index 00000000000..9494d0327b4
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-options "-O2 -mno-prefixed" } */
>> +/* { dg-do compile { target has_arch_ppc64 } } */
>> +
>> +#define N 5
>> +#define MASK 0xffffffffe0000003ULL
>> +
>> +typedef unsigned long long int64;
>> +
>> +int64
>> +foo (int64 v)
>> +{
>> + unsigned int v1 = v;
>> + unsigned int v2 = ((v1 << N) | (v1 >> (32 - N)));
>> + return ((int64) v2 | ((int64) v2 << 32)) & MASK;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not {\mor\M} } } */
>> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>> new file mode 100644
>> index 00000000000..fcbc8f8d742
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>> @@ -0,0 +1,6 @@
>> +/* { dg-options "-O2 -mno-prefixed" } */
>> +/* { dg-do compile { target has_arch_ppc64 } } */
>> +#include "rlwinm4di.h"
>> +
>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */
>> +
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>> new file mode 100644
>> index 00000000000..59fe739ca85
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>> @@ -0,0 +1,25 @@
>> +/* using 2 instructions(rlwinm) to build constants. */
>> +void __attribute__ ((__noinline__, __noclone__))
>> +foo (long long *arg)
>> +{
>> + *arg++ = 0x0000400100000001ULL;
>> + *arg++ = 0x0000000200000002ULL;
>> + *arg++ = 0xffff8000bfff8000ULL;
>> + *arg++ = 0xffff8001ffff8001ULL;
>> + *arg++ = 0x0000800100000001ULL;
>> + *arg++ = 0x0000800100008001ULL;
>> + *arg++ = 0x0000800200000002ULL;
>> + *arg++ = 0x0000800000008000ULL;
>> + *arg++ = 0x0000000080008000ULL;
>> + *arg++ = 0xffff0001bfff0001ULL;
>> + *arg++ = 0xffff0001ffff0001ULL;
>> + *arg++ = 0x0001000200000002ULL;
>> + *arg++ = 0x8001000080010000ULL;
>> + *arg++ = 0x0004000100000001ULL;
>> + *arg++ = 0x0004000100040001ULL;
>> + *arg++ = 0x00000000bfffe001ULL;
>> + *arg++ = 0x0003fffe0001fffeULL;
>> + *arg++ = 0x0003fffe0003fffeULL;
>> + *arg++ = 0x0002000100000001ULL;
>> + *arg++ = 0x0002000100020001ULL;
>> +}
^ permalink raw reply [flat|nested] 4+ messages in thread
* Ping^3 [PATCH] add rlwinm pattern for DImode for constant building
2024-06-06 1:53 ` Jiufu Guo
@ 2024-06-21 3:06 ` Jiufu Guo
0 siblings, 0 replies; 4+ messages in thread
From: Jiufu Guo @ 2024-06-21 3:06 UTC (permalink / raw)
To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner
Hi,
Gentle ping.
BR,
Jeff(Jiufu) Guo
Jiufu Guo <guojiufu@linux.ibm.com> writes:
> Hi,
>
> Gentle ping ...
>
> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>
>> Hi,
>>
>> Gentle ping ...
>>
>> BR,
>> Jeff(Jiufu) Guo
>>
>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>
>>> Hi,
>>>
>>> 'rlwinm' pattern is already well used for SImode. As this instruction
>>> can touch the whole 64bit register, so some constants in 64bit(DImode)
>>> can be built via 'lis/li+rlwinm'. To achieve this, a new pattern for
>>> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check
>>> if a constant is able to be built by 'lis/li; rlwinm'.
>>>
>>> Bootstrap and regtest pass on ppc64{,le}.
>>>
>>> Is this patch ok for trunk (when stage1 is open)?
>
> Is this patch ok for trunk?
>
> BR,
> Jeff(Jiufu) Guo
>
>>>
>>> Jeff (Jiufu Guo).
>>>
>>> gcc/ChangeLog:
>>>
>>> * config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new
>>> parameter.
>>> * config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New function.
>>> (rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'.
>>> (can_be_rotated_to_lowbits): Add new parameter.
>>> * config/rs6000/rs6000.md (rlwinm_di_mask): New pattern.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> * gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'.
>>> * gcc.target/powerpc/rlwinm4di-1.c: New test.
>>> * gcc.target/powerpc/rlwinm4di-2.c: New test.
>>> * gcc.target/powerpc/rlwinm4di.c: New test.
>>> * gcc.target/powerpc/rlwinm4di.h: New test.
>>>
>>> ---
>>> gcc/config/rs6000/rs6000-protos.h | 2 +-
>>> gcc/config/rs6000/rs6000.cc | 65 ++++++++++++++++++-
>>> gcc/config/rs6000/rs6000.md | 18 +++++
>>> gcc/testsuite/gcc.target/powerpc/pr93012.c | 2 +-
>>> .../gcc.target/powerpc/rlwinm4di-1.c | 25 +++++++
>>> .../gcc.target/powerpc/rlwinm4di-2.c | 19 ++++++
>>> gcc/testsuite/gcc.target/powerpc/rlwinm4di.c | 6 ++
>>> gcc/testsuite/gcc.target/powerpc/rlwinm4di.h | 25 +++++++
>>> 8 files changed, 158 insertions(+), 4 deletions(-)
>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>>
>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
>>> index 09a57a806fa..10505a8061a 100644
>>> --- a/gcc/config/rs6000/rs6000-protos.h
>>> +++ b/gcc/config/rs6000/rs6000-protos.h
>>> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int * = nullptr);
>>> extern int vspltis_shifted (rtx);
>>> extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
>>> extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
>>> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *);
>>> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, bool = false);
>>> extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
>>> extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
>>> extern int num_insns_constant (rtx, machine_mode);
>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>>> index 6ba9df4f02e..853eaede673 100644
>>> --- a/gcc/config/rs6000/rs6000.cc
>>> +++ b/gcc/config/rs6000/rs6000.cc
>>> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
>>> return false;
>>> }
>>>
>>> +/* Check if value C can be generated by 2 instructions, one instruction
>>> + is li/lis, another instruction is rlwinm. */
>>> +
>>> +static bool
>>> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
>>> + int *shift, HOST_WIDE_INT *mask)
>>> +{
>>> + unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
>>> + unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
>>> + unsigned HOST_WIDE_INT v;
>>> +
>>> + /* diff of high and low (high ^ low) should be the mask position. */
>>> + unsigned HOST_WIDE_INT m = low ^ high;
>>> + int tz = ctz_hwi (m);
>>> + int lz = clz_hwi (m);
>>> + if (m != 0)
>>> + m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
>>> + if (high != 0)
>>> + m = ~m;
>>> + v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
>>> +
>>> + if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
>>> + return false;
>>> +
>>> + /* rotl32 on positive/negative value of 'li' 15/16bits. */
>>> + int n;
>>> + if (!can_be_rotated_to_lowbits (v, 15, &n, true)
>>> + && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true))
>>> + {
>>> + /* rotate32 from a negative value of 'lis'. */
>>> + if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true))
>>> + return false;
>>> + n += 16;
>>> + }
>>> + n = 32 - (n % 32);
>>> + n %= 32;
>>> + v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
>>> + if (v & 0x80000000ULL)
>>> + v |= HOST_WIDE_INT_M1U << 32;
>>> + *mask = m;
>>> + *val = v;
>>> + *shift = n;
>>> + return true;
>>> +}
>>> +
>>> /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>>> Output insns to set DEST equal to the constant C as a series of
>>> lis, ori and shl instructions. If NUM_INSNS is not NULL, then
>>> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>>> return;
>>> }
>>>
>>> + HOST_WIDE_INT val;
>>> + if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
>>> + {
>>> + /* li/lis; rlwinm */
>>> + count_or_emit_insn (temp, GEN_INT (val));
>>> + rtx low = temp ? gen_lowpart (SImode, temp) : nullptr;
>>> + rtx m = GEN_INT (mask);
>>> + rtx n = GEN_INT (shift);
>>> + count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m));
>>> + return;
>>> + }
>>> +
>>> if (ud3 == 0 && ud4 == 0)
>>> {
>>> gcc_assert ((ud2 & 0x8000) && ud1 != 0);
>>> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
>>> Return false otherwise. */
>>>
>>> bool
>>> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>>> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot,
>>> + bool rotl32)
>>> {
>>> int clz = HOST_BITS_PER_WIDE_INT - lowbits;
>>>
>>> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
>>> ^bit -> Vbit, , then zeros are at head or tail.
>>> 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
>>> const int rot_bits = lowbits + 1;
>>> - unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
>>> + unsigned HOST_WIDE_INT rc;
>>> + rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits)
>>> + | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL)))
>>> + : (c >> rot_bits) | (c << (clz - 1));
>>> tz = ctz_hwi (rc);
>>> if (clz_hwi (rc) + tz >= clz)
>>> {
>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>>> index bc8bc6ab060..8a82ba3e26c 100644
>>> --- a/gcc/config/rs6000/rs6000.md
>>> +++ b/gcc/config/rs6000/rs6000.md
>>> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2"
>>> (set_attr "dot" "yes")
>>> (set_attr "length" "4,8")])
>>>
>>> +; define an insn about rlwinm for DI mode (with high part content)
>>> +(define_insn "rlwinm_di_mask"
>>> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>>> + (and:DI (plus:DI
>>> + (ashift:DI (subreg:DI
>>> + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
>>> + (match_operand:SI 2 "const_int_operand" "n")) 0)
>>> + (const_int 32))
>>> + (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2))))
>>> + (match_operand:DI 3 "const_int_operand" "n")))]
>>> + "rs6000_is_valid_and_mask (operands[3], SImode)"
>>> +{
>>> + return UINTVAL (operands[3]) == -1ULL ?
>>> + "rlwinm %0,%1,%h2,1,0" : "rlwinm %0,%1,%h2,%3";
>>> +}
>>> + [(set_attr "type" "shift")
>>> + (set_attr "maybe_var_shift" "yes")])
>>> +
>>> ; Special case for less-than-0. We can do it with just one machine
>>> ; instruction, but the generic optimizers do not realise it is cheap.
>>> (define_insn "*lt0_<mode>di"
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>> index 4f764d0576f..70ddfaa21da 100644
>>> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
>>> unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
>>> unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
>>>
>>> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>> new file mode 100644
>>> index 00000000000..8959578143b
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>> @@ -0,0 +1,25 @@
>>> +/* { dg-do run } */
>>> +/* { dg-options "-O2" } */
>>> +
>>> +#include "rlwinm4di.h"
>>> +
>>> +long long arr1[] = {
>>> + 0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL,
>>> + 0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL,
>>> + 0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL,
>>> + 0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL,
>>> + 0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL,
>>> + 0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL,
>>> + 0x0002000100000001ULL, 0x0002000100020001ULL,
>>> +};
>>> +
>>> +int
>>> +main ()
>>> +{
>>> + long long a[sizeof (arr1) / sizeof (arr1[0])];
>>> +
>>> + foo (a);
>>> + if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
>>> + __builtin_abort ();
>>> + return 0;
>>> +}
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>> new file mode 100644
>>> index 00000000000..9494d0327b4
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>> @@ -0,0 +1,19 @@
>>> +/* { dg-options "-O2 -mno-prefixed" } */
>>> +/* { dg-do compile { target has_arch_ppc64 } } */
>>> +
>>> +#define N 5
>>> +#define MASK 0xffffffffe0000003ULL
>>> +
>>> +typedef unsigned long long int64;
>>> +
>>> +int64
>>> +foo (int64 v)
>>> +{
>>> + unsigned int v1 = v;
>>> + unsigned int v2 = ((v1 << N) | (v1 >> (32 - N)));
>>> + return ((int64) v2 | ((int64) v2 << 32)) & MASK;
>>> +}
>>> +
>>> +/* { dg-final { scan-assembler-not {\mor\M} } } */
>>> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>> new file mode 100644
>>> index 00000000000..fcbc8f8d742
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>> @@ -0,0 +1,6 @@
>>> +/* { dg-options "-O2 -mno-prefixed" } */
>>> +/* { dg-do compile { target has_arch_ppc64 } } */
>>> +#include "rlwinm4di.h"
>>> +
>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */
>>> +
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>> new file mode 100644
>>> index 00000000000..59fe739ca85
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>> @@ -0,0 +1,25 @@
>>> +/* using 2 instructions(rlwinm) to build constants. */
>>> +void __attribute__ ((__noinline__, __noclone__))
>>> +foo (long long *arg)
>>> +{
>>> + *arg++ = 0x0000400100000001ULL;
>>> + *arg++ = 0x0000000200000002ULL;
>>> + *arg++ = 0xffff8000bfff8000ULL;
>>> + *arg++ = 0xffff8001ffff8001ULL;
>>> + *arg++ = 0x0000800100000001ULL;
>>> + *arg++ = 0x0000800100008001ULL;
>>> + *arg++ = 0x0000800200000002ULL;
>>> + *arg++ = 0x0000800000008000ULL;
>>> + *arg++ = 0x0000000080008000ULL;
>>> + *arg++ = 0xffff0001bfff0001ULL;
>>> + *arg++ = 0xffff0001ffff0001ULL;
>>> + *arg++ = 0x0001000200000002ULL;
>>> + *arg++ = 0x8001000080010000ULL;
>>> + *arg++ = 0x0004000100000001ULL;
>>> + *arg++ = 0x0004000100040001ULL;
>>> + *arg++ = 0x00000000bfffe001ULL;
>>> + *arg++ = 0x0003fffe0001fffeULL;
>>> + *arg++ = 0x0003fffe0003fffeULL;
>>> + *arg++ = 0x0002000100000001ULL;
>>> + *arg++ = 0x0002000100020001ULL;
>>> +}
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-06-21 3:06 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-22 2:35 [PATCH] add rlwinm pattern for DImode for constant building Jiufu Guo
2024-05-17 3:17 ` Jiufu Guo
2024-06-06 1:53 ` Jiufu Guo
2024-06-21 3:06 ` Ping^3 " Jiufu Guo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).