public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] x86: Change CTZ_DEFINED_VALUE_AT_ZERO to return 0/2
@ 2020-07-13 13:42 H.J. Lu
  2020-08-23 11:58 ` PING " H.J. Lu
  2020-08-25 19:08 ` Jeff Law
  0 siblings, 2 replies; 3+ messages in thread
From: H.J. Lu @ 2020-07-13 13:42 UTC (permalink / raw)
  To: gcc-patches

Change CTZ_DEFINED_VALUE_AT_ZERO/CTZ_DEFINED_VALUE_AT_ZERO to return 0/2
to enable table-based clz/ctz optimization:

 -- Macro: CLZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE)
 -- Macro: CTZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE)
     A C expression that indicates whether the architecture defines a
     value for 'clz' or 'ctz' with a zero operand.  A result of '0'
     indicates the value is undefined.  If the value is defined for only
     the RTL expression, the macro should evaluate to '1'; if the value
     applies also to the corresponding optab entry (which is normally
     the case if it expands directly into the corresponding RTL), then
     the macro should evaluate to '2'.  In the cases where the value is
     defined, VALUE should be set to this value.

gcc/

	PR target/95863
	* config/i386/i386.h (CTZ_DEFINED_VALUE_AT_ZERO): Return 0/2.
	(CLZ_DEFINED_VALUE_AT_ZERO): Likewise.

gcc/testsuite/

	PR target/95863
	* gcc.target/i386/pr95863-1.c: New test.
	* gcc.target/i386/pr95863-2.c: Likewise.
---
 gcc/config/i386/i386.h                    |  4 +-
 gcc/testsuite/gcc.target/i386/pr95863-1.c | 47 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr95863-2.c | 27 +++++++++++++
 3 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95863-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95863-2.c

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f4a8f1391fa..1deb59f286f 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2946,9 +2946,9 @@ extern void debug_dispatch_window (int);
 /* The value at zero is only defined for the BMI instructions
    LZCNT and TZCNT, not the BSR/BSF insns in the original isa.  */
 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
-	((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI ? 1 : 0)
+	((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI ? 2 : 0)
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
-	((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT ? 1 : 0)
+	((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT ? 2 : 0)
 
 
 /* Flags returned by ix86_get_callcvt ().  */
diff --git a/gcc/testsuite/gcc.target/i386/pr95863-1.c b/gcc/testsuite/gcc.target/i386/pr95863-1.c
new file mode 100644
index 00000000000..f3918a1a766
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95863-1.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mbmi" } */
+
+int ctz1 (unsigned x)
+{
+  static const char table[32] =
+    {
+      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+
+  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
+}
+
+int ctz2 (unsigned x)
+{
+#define u 0
+  static short table[64] =
+    {
+      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
+      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
+      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
+      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
+    };
+
+  x = (x & -x) * 0x0450FBAF;
+  return table[x >> 26];
+}
+
+int ctz3 (unsigned x)
+{
+  static int table[32] =
+    {
+      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
+      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
+    };
+
+  if (x == 0) return 32;
+  x = (x & -x) * 0x04D7651F;
+  return table[x >> 27];
+}
+
+/* { dg-final { scan-assembler-times "tzcntl\t" 3 } } */
+/* { dg-final { scan-assembler-times "andl\t" 1 } } */
+/* { dg-final { scan-assembler-not "neg" } } */
+/* { dg-final { scan-assembler-not "imul" } } */
+/* { dg-final { scan-assembler-not "shr" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95863-2.c b/gcc/testsuite/gcc.target/i386/pr95863-2.c
new file mode 100644
index 00000000000..cb56dfc6d94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95863-2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O -mbmi" } */
+
+static const unsigned long long magic = 0x03f08c5392f756cdULL;
+
+static const char table[64] = {
+     0,  1, 12,  2, 13, 22, 17,  3,
+    14, 33, 23, 36, 18, 58, 28,  4,
+    62, 15, 34, 26, 24, 48, 50, 37,
+    19, 55, 59, 52, 29, 44, 39,  5,
+    63, 11, 21, 16, 32, 35, 57, 27,
+    61, 25, 47, 49, 54, 51, 43, 38,
+    10, 20, 31, 56, 60, 46, 53, 42,
+     9, 30, 45, 41,  8, 40,  7,  6,
+};
+
+int ctz4 (unsigned long long x)
+{
+  unsigned long long lsb = x & -x;
+  return table[(lsb * magic) >> 58];
+}
+
+/* { dg-final { scan-assembler-times "tzcntq\t" 1 } } */
+/* { dg-final { scan-assembler-times "andl\t" 1 } } */
+/* { dg-final { scan-assembler-not "negq" } } */
+/* { dg-final { scan-assembler-not "imulq" } } */
+/* { dg-final { scan-assembler-not "shrq" } } */
-- 
2.26.2


^ permalink raw reply	[flat|nested] 3+ messages in thread

* PING [PATCH] x86: Change CTZ_DEFINED_VALUE_AT_ZERO to return 0/2
  2020-07-13 13:42 [PATCH] x86: Change CTZ_DEFINED_VALUE_AT_ZERO to return 0/2 H.J. Lu
@ 2020-08-23 11:58 ` H.J. Lu
  2020-08-25 19:08 ` Jeff Law
  1 sibling, 0 replies; 3+ messages in thread
From: H.J. Lu @ 2020-08-23 11:58 UTC (permalink / raw)
  To: GCC Patches, Jakub Jelinek, Jeffrey Law, Jan Hubicka, Richard Biener

On Mon, Jul 13, 2020 at 6:42 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Change CTZ_DEFINED_VALUE_AT_ZERO/CTZ_DEFINED_VALUE_AT_ZERO to return 0/2
> to enable table-based clz/ctz optimization:
>
>  -- Macro: CLZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE)
>  -- Macro: CTZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE)
>      A C expression that indicates whether the architecture defines a
>      value for 'clz' or 'ctz' with a zero operand.  A result of '0'
>      indicates the value is undefined.  If the value is defined for only
>      the RTL expression, the macro should evaluate to '1'; if the value
>      applies also to the corresponding optab entry (which is normally
>      the case if it expands directly into the corresponding RTL), then
>      the macro should evaluate to '2'.  In the cases where the value is
>      defined, VALUE should be set to this value.
>
> gcc/
>
>         PR target/95863
>         * config/i386/i386.h (CTZ_DEFINED_VALUE_AT_ZERO): Return 0/2.
>         (CLZ_DEFINED_VALUE_AT_ZERO): Likewise.
>
> gcc/testsuite/
>
>         PR target/95863
>         * gcc.target/i386/pr95863-1.c: New test.
>         * gcc.target/i386/pr95863-2.c: Likewise.
> ---
>  gcc/config/i386/i386.h                    |  4 +-
>  gcc/testsuite/gcc.target/i386/pr95863-1.c | 47 +++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr95863-2.c | 27 +++++++++++++
>  3 files changed, 76 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95863-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95863-2.c
>
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index f4a8f1391fa..1deb59f286f 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2946,9 +2946,9 @@ extern void debug_dispatch_window (int);
>  /* The value at zero is only defined for the BMI instructions
>     LZCNT and TZCNT, not the BSR/BSF insns in the original isa.  */
>  #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
> -       ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI ? 1 : 0)
> +       ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI ? 2 : 0)
>  #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
> -       ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT ? 1 : 0)
> +       ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT ? 2 : 0)
>
>
>  /* Flags returned by ix86_get_callcvt ().  */
> diff --git a/gcc/testsuite/gcc.target/i386/pr95863-1.c b/gcc/testsuite/gcc.target/i386/pr95863-1.c
> new file mode 100644
> index 00000000000..f3918a1a766
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95863-1.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -mbmi" } */
> +
> +int ctz1 (unsigned x)
> +{
> +  static const char table[32] =
> +    {
> +      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
> +      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
> +    };
> +
> +  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
> +}
> +
> +int ctz2 (unsigned x)
> +{
> +#define u 0
> +  static short table[64] =
> +    {
> +      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
> +      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
> +      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
> +      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
> +    };
> +
> +  x = (x & -x) * 0x0450FBAF;
> +  return table[x >> 26];
> +}
> +
> +int ctz3 (unsigned x)
> +{
> +  static int table[32] =
> +    {
> +      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
> +      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
> +    };
> +
> +  if (x == 0) return 32;
> +  x = (x & -x) * 0x04D7651F;
> +  return table[x >> 27];
> +}
> +
> +/* { dg-final { scan-assembler-times "tzcntl\t" 3 } } */
> +/* { dg-final { scan-assembler-times "andl\t" 1 } } */
> +/* { dg-final { scan-assembler-not "neg" } } */
> +/* { dg-final { scan-assembler-not "imul" } } */
> +/* { dg-final { scan-assembler-not "shr" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr95863-2.c b/gcc/testsuite/gcc.target/i386/pr95863-2.c
> new file mode 100644
> index 00000000000..cb56dfc6d94
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95863-2.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O -mbmi" } */
> +
> +static const unsigned long long magic = 0x03f08c5392f756cdULL;
> +
> +static const char table[64] = {
> +     0,  1, 12,  2, 13, 22, 17,  3,
> +    14, 33, 23, 36, 18, 58, 28,  4,
> +    62, 15, 34, 26, 24, 48, 50, 37,
> +    19, 55, 59, 52, 29, 44, 39,  5,
> +    63, 11, 21, 16, 32, 35, 57, 27,
> +    61, 25, 47, 49, 54, 51, 43, 38,
> +    10, 20, 31, 56, 60, 46, 53, 42,
> +     9, 30, 45, 41,  8, 40,  7,  6,
> +};
> +
> +int ctz4 (unsigned long long x)
> +{
> +  unsigned long long lsb = x & -x;
> +  return table[(lsb * magic) >> 58];
> +}
> +
> +/* { dg-final { scan-assembler-times "tzcntq\t" 1 } } */
> +/* { dg-final { scan-assembler-times "andl\t" 1 } } */
> +/* { dg-final { scan-assembler-not "negq" } } */
> +/* { dg-final { scan-assembler-not "imulq" } } */
> +/* { dg-final { scan-assembler-not "shrq" } } */
> --
> 2.26.2
>

PING.

-- 
H.J.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] x86: Change CTZ_DEFINED_VALUE_AT_ZERO to return 0/2
  2020-07-13 13:42 [PATCH] x86: Change CTZ_DEFINED_VALUE_AT_ZERO to return 0/2 H.J. Lu
  2020-08-23 11:58 ` PING " H.J. Lu
@ 2020-08-25 19:08 ` Jeff Law
  1 sibling, 0 replies; 3+ messages in thread
From: Jeff Law @ 2020-08-25 19:08 UTC (permalink / raw)
  To: H.J. Lu, gcc-patches

On Mon, 2020-07-13 at 06:42 -0700, H.J. Lu via Gcc-patches wrote:
> Change CTZ_DEFINED_VALUE_AT_ZERO/CTZ_DEFINED_VALUE_AT_ZERO to return 0/2
> to enable table-based clz/ctz optimization:
> 
>  -- Macro: CLZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE)
>  -- Macro: CTZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE)
>      A C expression that indicates whether the architecture defines a
>      value for 'clz' or 'ctz' with a zero operand.  A result of '0'
>      indicates the value is undefined.  If the value is defined for only
>      the RTL expression, the macro should evaluate to '1'; if the value
>      applies also to the corresponding optab entry (which is normally
>      the case if it expands directly into the corresponding RTL), then
>      the macro should evaluate to '2'.  In the cases where the value is
>      defined, VALUE should be set to this value.
> 
> gcc/
> 
> 	PR target/95863
> 	* config/i386/i386.h (CTZ_DEFINED_VALUE_AT_ZERO): Return 0/2.
> 	(CLZ_DEFINED_VALUE_AT_ZERO): Likewise.
> 
> gcc/testsuite/
> 
> 	PR target/95863
> 	* gcc.target/i386/pr95863-1.c: New test.
> 	* gcc.target/i386/pr95863-2.c: Likewise.
OK
jeff
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-08-25 19:08 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-13 13:42 [PATCH] x86: Change CTZ_DEFINED_VALUE_AT_ZERO to return 0/2 H.J. Lu
2020-08-23 11:58 ` PING " H.J. Lu
2020-08-25 19:08 ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).