public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix ctz issues (PR93231)
@ 2020-01-13 17:53 Wilco Dijkstra
  2020-01-13 18:16 ` Jakub Jelinek
  0 siblings, 1 reply; 4+ messages in thread
From: Wilco Dijkstra @ 2020-01-13 17:53 UTC (permalink / raw)
  To: GCC Patches; +Cc: jakub

Further improve the ctz recognition: Avoid ICEing on negative shift
counts or multiply constants.  Check the type is 8 bits for the string
constant case to avoid accidentally matching a wide STRING_CST.
Add a tree_expr_nonzero_p check to allow the optimization even if
CTZ_DEFINED_VALUE_AT_ZERO returns 0 or 1.  Add extra test cases.

(note the diff uses the old tree and includes Jakub's bootstrap fixes)

Bootstrap OK on AArch64 and x64.

ChangeLog:
2020-01-13  Wilco Dijkstra  <wdijkstr@arm.com>

	PR tree-optimization/93231
	* tree-ssa-forwprop.c
	(optimize_count_trailing_zeroes): Use tree_to_shwi for shift
	and TREE_INT_CST_LOW for multiply constants.  Check CST_STRING
	element size is 8 bits.
	(simplify_count_trailing_zeroes): Add test to handle known non-zero
	inputs more efficiently.

    testsuite/
	* gcc.dg/pr90838.c: New test.
	* gcc.dg/pr93231.c: New test.
---

diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c
new file mode 100644
index 0000000000000000000000000000000000000000..8070d439f6404dc6884a11e58f1db41c435e61fb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr90838.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop2-details" } */
+
+int ctz1 (unsigned x)
+{
+  static const char table[32] = "\x00\x01\x1c\x02\x1d\x0e\x18\x03\x1e\x16\x14"
+    "\x0f\x19\x11\x04\b\x1f\x1b\r\x17\x15\x13\x10\x07\x1a\f\x12\x06\v\x05\n\t";
+
+  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
+}
+
+int ctz2 (unsigned x)
+{
+  const int u = 0;
+  static short table[64] =
+    {
+      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
+      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
+      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
+      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
+    };
+
+  x = (x & -x) * 0x0450FBAF;
+  return table[x >> 26];
+}
+
+int ctz3 (unsigned x)
+{
+  static int table[32] =
+    {
+      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
+      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
+    };
+
+  if (x == 0) return 32;
+  x = (x & -x) * 0x04D7651F;
+  return table[x >> 27];
+}
+
+static const unsigned long long magic = 0x03f08c5392f756cdULL;
+
+static const char table[64] = {
+     0,  1, 12,  2, 13, 22, 17,  3,
+    14, 33, 23, 36, 18, 58, 28,  4,
+    62, 15, 34, 26, 24, 48, 50, 37,
+    19, 55, 59, 52, 29, 44, 39,  5,
+    63, 11, 21, 16, 32, 35, 57, 27,
+    61, 25, 47, 49, 54, 51, 43, 38,
+    10, 20, 31, 56, 60, 46, 53, 42,
+     9, 30, 45, 41,  8, 40,  7,  6,
+};
+
+int ctz4 (unsigned long x)
+{
+  unsigned long lsb = x & -x;
+  return table[(lsb * magic) >> 58];
+}
+
+/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/pr93231.c b/gcc/testsuite/gcc.dg/pr93231.c
new file mode 100644
index 0000000000000000000000000000000000000000..80853bad23b28abbe51bb6e2b9f8beeb06618e2f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr93231.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop2-details -Wno-shift-count-negative" } */
+
+int ctz_ice1 (int x)
+{
+  static const char table[32] =
+    {
+      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+
+  return table[((int)((x & -x) * -0x077CB531)) >> 27];
+}
+
+int ctz_ice2 (unsigned x)
+{
+  static const char table[32] =
+    {
+      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+
+  return table[((unsigned)((x & -x) * 0x077CB531U)) >> -27];
+}
+
+// This should never match
+int ctz_fail (int x)
+{
+  static const unsigned short int table[32] =
+    u"\x0100\x021c\x0e1d\x0318\x161e\x0f14\x1119\x0804\x1b1f\x170d\x1315\x0710\x0c1a\x0612\x050b\x090a";
+
+  return table[((int)((x & -x) * 0x077CB531)) >> 27];
+}
+
+/* { dg-final { scan-tree-dump-not {= \.CTZ} "forwprop2" } } */
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index aced6eb2d9139cae277593022415bc5efdf45175..afc52e5dfbaea29cb07707fee6cbbd3e0c969eda 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -1864,8 +1864,8 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
   tree input_type = TREE_TYPE (x);
   unsigned input_bits = tree_to_shwi (TYPE_SIZE (input_type));
 
-  /* Check the array is not wider than integer type and the input is a 32-bit
-     or 64-bit type.  */
+  /* Check the array element type is not wider than 32 bits and the input is
+     a 32-bit or 64-bit type.  */
   if (TYPE_PRECISION (type) > 32)
     return false;
   if (input_bits != 32 && input_bits != 64)
@@ -1879,7 +1879,7 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
   if (!low || !integer_zerop (low))
     return false;
 
-  unsigned shiftval = tree_to_uhwi (tshift);
+  unsigned shiftval = tree_to_shwi (tshift);
 
   /* Check the shift extracts the top 5..7 bits.  */
   if (shiftval < input_bits - 7 || shiftval > input_bits - 5)
@@ -1889,12 +1889,13 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
   if (!ctor)
     return false;
 
-  unsigned HOST_WIDE_INT val = tree_to_uhwi (mulc);
+  /* Extract the binary representation of the multiply constant.  */
+  unsigned HOST_WIDE_INT val = TREE_INT_CST_LOW (mulc);
 
   if (TREE_CODE (ctor) == CONSTRUCTOR)
     return check_ctz_array (ctor, val, zero_val, shiftval, input_bits);
 
-  if (TREE_CODE (ctor) == STRING_CST)
+  if (TREE_CODE (ctor) == STRING_CST && TYPE_PRECISION (type) == 8)
     return check_ctz_string (ctor, val, zero_val, shiftval, input_bits);
 
   return false;
@@ -1920,15 +1921,24 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
 				      res_ops[1], res_ops[2], zero_val))
     {
       tree type = TREE_TYPE (res_ops[0]);
-      HOST_WIDE_INT ctzval;
+      HOST_WIDE_INT ctz_val = 0;
       HOST_WIDE_INT type_size = tree_to_shwi (TYPE_SIZE (type));
-      bool zero_ok = CTZ_DEFINED_VALUE_AT_ZERO (TYPE_MODE (type), ctzval) == 2;
+      bool zero_ok =
+	CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ctz_val) == 2;
+
+      /* If the input value can't be zero, don't special case ctz (0).  */
+      if (tree_expr_nonzero_p (res_ops[0]))
+	{
+	  zero_ok = true;
+	  zero_val = 0;
+	  ctz_val = 0;
+	}
 
       /* Skip if there is no value defined at zero, or if we can't easily
 	 return the correct value for zero.  */
       if (!zero_ok)
 	return false;
-      if (zero_val != ctzval && !(zero_val == 0 && ctzval == type_size))
+      if (zero_val != ctz_val && !(zero_val == 0 && ctz_val == type_size))
 	return false;
 
       gimple_seq seq = NULL;
@@ -1941,7 +1951,7 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
       tree prev_lhs = gimple_call_lhs (call);
 
       /* Emit ctz (x) & 31 if ctz (0) is 32 but we need to return 0.  */
-      if (zero_val == 0 && ctzval == type_size)
+      if (zero_val == 0 && ctz_val == type_size)
 	{
 	  g = gimple_build_assign (make_ssa_name (integer_type_node),
 				   BIT_AND_EXPR, prev_lhs,

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Fix ctz issues (PR93231)
  2020-01-13 17:53 [PATCH] Fix ctz issues (PR93231) Wilco Dijkstra
@ 2020-01-13 18:16 ` Jakub Jelinek
  2020-01-15 16:46   ` Wilco Dijkstra
  0 siblings, 1 reply; 4+ messages in thread
From: Jakub Jelinek @ 2020-01-13 18:16 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GCC Patches

On Mon, Jan 13, 2020 at 05:30:23PM +0000, Wilco Dijkstra wrote:
> Further improve the ctz recognition: Avoid ICEing on negative shift
> counts or multiply constants.  Check the type is 8 bits for the string
> constant case to avoid accidentally matching a wide STRING_CST.
> Add a tree_expr_nonzero_p check to allow the optimization even if
> CTZ_DEFINED_VALUE_AT_ZERO returns 0 or 1.  Add extra test cases.
> 
> (note the diff uses the old tree and includes Jakub's bootstrap fixes)

You should rebase it because you'll be committing it against trunk
which already has those changes.

> @@ -1879,7 +1879,7 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
>    if (!low || !integer_zerop (low))
>      return false;
>  
> -  unsigned shiftval = tree_to_uhwi (tshift);
> +  unsigned shiftval = tree_to_shwi (tshift);

This relies on the FEs to narrow the type of say:
x >> (((__uint128_t) 0x12 << 64) | 0x1234567812345678ULL)

> @@ -1889,12 +1889,13 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
>    if (!ctor)
>      return false;
>  
> -  unsigned HOST_WIDE_INT val = tree_to_uhwi (mulc);
> +  /* Extract the binary representation of the multiply constant.  */
> +  unsigned HOST_WIDE_INT val = TREE_INT_CST_LOW (mulc);

Will it work properly with the signed types though?
The difference is whether the C code we are matching will use logical or
arithmetic right shift.  And if the power of two times mulc ever can have
sign bit set, it will then use negative indexes into the array.

> -  if (TREE_CODE (ctor) == STRING_CST)
> +  if (TREE_CODE (ctor) == STRING_CST && TYPE_PRECISION (type) == 8)

Isn't another precondition that BITS_PER_UNIT is 8 (because STRING_CSTs are
really bytes)?

>      return check_ctz_string (ctor, val, zero_val, shiftval, input_bits);
>  
>    return false;
> @@ -1920,15 +1921,24 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
>  				      res_ops[1], res_ops[2], zero_val))
>      {
>        tree type = TREE_TYPE (res_ops[0]);
> -      HOST_WIDE_INT ctzval;
> +      HOST_WIDE_INT ctz_val = 0;
>        HOST_WIDE_INT type_size = tree_to_shwi (TYPE_SIZE (type));
> -      bool zero_ok = CTZ_DEFINED_VALUE_AT_ZERO (TYPE_MODE (type), ctzval) == 2;
> +      bool zero_ok =
> +	CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ctz_val) == 2;

= shouldn't be at the end of line.

	Jakub

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Fix ctz issues (PR93231)
  2020-01-13 18:16 ` Jakub Jelinek
@ 2020-01-15 16:46   ` Wilco Dijkstra
  2020-01-15 16:59     ` Jakub Jelinek
  0 siblings, 1 reply; 4+ messages in thread
From: Wilco Dijkstra @ 2020-01-15 16:46 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: GCC Patches

Hi Jakub,

>> (note the diff uses the old tree and includes Jakub's bootstrap fixes)
> You should rebase it because you'll be committing it against trunk
> which already has those changes.

Sure, it was just the small matter of updating the many GCC checkouts I have...

>> -  unsigned shiftval = tree_to_uhwi (tshift);
>> +  unsigned shiftval = tree_to_shwi (tshift);
>
> This relies on the FEs to narrow the type of say:
> x >> (((__uint128_t) 0x12 << 64) | 0x1234567812345678ULL)

Gimple canonicalizes all shifts to integer type, eg. 
x >> 0xabcdef00000001 is converted to x >> 1 with a warning.

>> -  unsigned HOST_WIDE_INT val = tree_to_uhwi (mulc);
>> +  /* Extract the binary representation of the multiply constant.  */
>> +  unsigned HOST_WIDE_INT val = TREE_INT_CST_LOW (mulc);
>
> Will it work properly with the signed types though?
> The difference is whether the C code we are matching will use logical or
> arithmetic right shift.  And if the power of two times mulc ever can have
> sign bit set, it will then use negative indexes into the array.

The multiply can be signed or unsigned, and the immediate can be positive or
negative, but the shift must be unsigned indeed. I thought the match.pd
pattern only allows unsigned shifts, but the shift operator allows signed shifts
too, so I've added an unsigned check on the input_type.

>> -  if (TREE_CODE (ctor) == STRING_CST)
>> +  if (TREE_CODE (ctor) == STRING_CST && TYPE_PRECISION (type) == 8)
>
> Isn't another precondition that BITS_PER_UNIT is 8 (because STRING_CSTs are
> really bytes)?

I've used CHAR_TYPE_SIZE instead of 8, but I don't think GCC supports anything other
than BITS_PER_UNIT == 8 and CHAR_TYPE_SIZE == 8. GCC uses memcmp/strlen
on STRING_CST (as well as direct accesses as 'char') which won't work if the host and
target chars are not the same size.

Here is the updated version:

Further improve the ctz recognition: Avoid ICEing on negative shift
counts or multiply constants.  Check the type is a char type for the
string constant case to avoid accidentally matching a wide STRING_CST.
Add a tree_expr_nonzero_p check to allow the optimization even if
CTZ_DEFINED_VALUE_AT_ZERO returns 0 or 1.  Add extra test cases.

Bootstrap OK on AArch64 and x64.

ChangeLog:

2020-01-15  Wilco Dijkstra  <wdijkstr@arm.com>

	PR tree-optimization/93231
	* tree-ssa-forwprop.c
	(optimize_count_trailing_zeroes): Check input_type is unsigned.
	Use tree_to_shwi for shift constant.  Check CST_STRING element
	size is CHAR_TYPE_SIZE bits.
	(simplify_count_trailing_zeroes): Add test to handle known non-zero
	inputs more efficiently.

    testsuite/
	* gcc.dg/pr90838.c: New test.
	* gcc.dg/pr93231.c: New test.
--
diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c
new file mode 100644
index 0000000000000000000000000000000000000000..8070d439f6404dc6884a11e58f1db41c435e61fb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr90838.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop2-details" } */
+
+int ctz1 (unsigned x)
+{
+  static const char table[32] = "\x00\x01\x1c\x02\x1d\x0e\x18\x03\x1e\x16\x14"
+    "\x0f\x19\x11\x04\b\x1f\x1b\r\x17\x15\x13\x10\x07\x1a\f\x12\x06\v\x05\n\t";
+
+  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
+}
+
+int ctz2 (unsigned x)
+{
+  const int u = 0;
+  static short table[64] =
+    {
+      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
+      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
+      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
+      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
+    };
+
+  x = (x & -x) * 0x0450FBAF;
+  return table[x >> 26];
+}
+
+int ctz3 (unsigned x)
+{
+  static int table[32] =
+    {
+      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
+      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
+    };
+
+  if (x == 0) return 32;
+  x = (x & -x) * 0x04D7651F;
+  return table[x >> 27];
+}
+
+static const unsigned long long magic = 0x03f08c5392f756cdULL;
+
+static const char table[64] = {
+     0,  1, 12,  2, 13, 22, 17,  3,
+    14, 33, 23, 36, 18, 58, 28,  4,
+    62, 15, 34, 26, 24, 48, 50, 37,
+    19, 55, 59, 52, 29, 44, 39,  5,
+    63, 11, 21, 16, 32, 35, 57, 27,
+    61, 25, 47, 49, 54, 51, 43, 38,
+    10, 20, 31, 56, 60, 46, 53, 42,
+     9, 30, 45, 41,  8, 40,  7,  6,
+};
+
+int ctz4 (unsigned long x)
+{
+  unsigned long lsb = x & -x;
+  return table[(lsb * magic) >> 58];
+}
+
+/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/pr93231.c b/gcc/testsuite/gcc.dg/pr93231.c
new file mode 100644
index 0000000000000000000000000000000000000000..cd0b3f320f78ffdd3d82cf487a63e861d0bf8eab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr93231.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop2-details -Wno-shift-count-negative" } */
+
+int ctz_ice1 (int x)
+{
+  static const char table[32] =
+    {
+      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+
+  return table[((int)((x & -x) * -0x077CB531)) >> 27];
+}
+
+int ctz_ice2 (unsigned x)
+{
+  static const char table[32] =
+    {
+      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+
+  return table[((unsigned)((x & -x) * 0x077CB531U)) >> -27];
+}
+
+// This should never match
+int ctz_fail (unsigned x)
+{
+  static const unsigned short int table[32] =
+    u"\x0100\x021c\x0e1d\x0318\x161e\x0f14\x1119\x0804\x1b1f\x170d\x1315\x0710\x0c1a\x0612\x050b\x090a";
+
+  return table[((x & -x) * 0x077CB531) >> 27];
+}
+
+/* { dg-final { scan-tree-dump-not {= \.CTZ} "forwprop2" } } */
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 56c470f6ecfe4c0703350e797e50832f323c43d0..e238915dfe9cdcb23717a639dba1590952a8f9f0 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -1864,9 +1864,9 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
   tree input_type = TREE_TYPE (x);
   unsigned input_bits = tree_to_shwi (TYPE_SIZE (input_type));
 
-  /* Check the array is not wider than integer type and the input is a 32-bit
-     or 64-bit type.  */
-  if (TYPE_PRECISION (type) > 32)
+  /* Check the array element type is not wider than 32 bits and the input is
+     an unsigned 32-bit or 64-bit type.  */
+  if (TYPE_PRECISION (type) > 32 || !TYPE_UNSIGNED (input_type))
     return false;
   if (input_bits != 32 && input_bits != 64)
     return false;
@@ -1879,7 +1879,7 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
   if (!low || !integer_zerop (low))
     return false;
 
-  unsigned shiftval = tree_to_uhwi (tshift);
+  unsigned shiftval = tree_to_shwi (tshift);
 
   /* Check the shift extracts the top 5..7 bits.  */
   if (shiftval < input_bits - 7 || shiftval > input_bits - 5)
@@ -1894,7 +1894,7 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
   if (TREE_CODE (ctor) == CONSTRUCTOR)
     return check_ctz_array (ctor, val, zero_val, shiftval, input_bits);
 
-  if (TREE_CODE (ctor) == STRING_CST)
+  if (TREE_CODE (ctor) == STRING_CST && TYPE_PRECISION (type) == CHAR_TYPE_SIZE)
     return check_ctz_string (ctor, val, zero_val, shiftval, input_bits);
 
   return false;
@@ -1920,16 +1920,24 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
 				      res_ops[1], res_ops[2], zero_val))
     {
       tree type = TREE_TYPE (res_ops[0]);
-      HOST_WIDE_INT ctzval = 0;
+      HOST_WIDE_INT ctz_val = 0;
       HOST_WIDE_INT type_size = tree_to_shwi (TYPE_SIZE (type));
       bool zero_ok
-	= CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ctzval) == 2;
+	= CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ctz_val) == 2;
+
+      /* If the input value can't be zero, don't special case ctz (0).  */
+      if (tree_expr_nonzero_p (res_ops[0]))
+	{
+	  zero_ok = true;
+	  zero_val = 0;
+	  ctz_val = 0;
+	}
 
       /* Skip if there is no value defined at zero, or if we can't easily
 	 return the correct value for zero.  */
       if (!zero_ok)
 	return false;
-      if (zero_val != ctzval && !(zero_val == 0 && ctzval == type_size))
+      if (zero_val != ctz_val && !(zero_val == 0 && ctz_val == type_size))
 	return false;
 
       gimple_seq seq = NULL;
@@ -1942,7 +1950,7 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
       tree prev_lhs = gimple_call_lhs (call);
 
       /* Emit ctz (x) & 31 if ctz (0) is 32 but we need to return 0.  */
-      if (zero_val == 0 && ctzval == type_size)
+      if (zero_val == 0 && ctz_val == type_size)
 	{
 	  g = gimple_build_assign (make_ssa_name (integer_type_node),
 				   BIT_AND_EXPR, prev_lhs,


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Fix ctz issues (PR93231)
  2020-01-15 16:46   ` Wilco Dijkstra
@ 2020-01-15 16:59     ` Jakub Jelinek
  0 siblings, 0 replies; 4+ messages in thread
From: Jakub Jelinek @ 2020-01-15 16:59 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GCC Patches

On Wed, Jan 15, 2020 at 03:50:01PM +0000, Wilco Dijkstra wrote:
> The multiply can be signed or unsigned, and the immediate can be positive or
> negative, but the shift must be unsigned indeed. I thought the match.pd
> pattern only allows unsigned shifts, but the shift operator allows signed shifts
> too, so I've added an unsigned check on the input_type.

I guess in theory if there is some multiplication constant that for all the
power of twos + 0 would give a positive result even arithetic right shift
would be ok, and we could also in the match.pd pattern use convert? or
nop_convert? to allow signed multiplication with a cast to unsigned after
it, but guess it isn't that important and we can just go with TYPE_UNSIGNED
requirement for now.

> >> -  if (TREE_CODE (ctor) == STRING_CST)
> >> +  if (TREE_CODE (ctor) == STRING_CST && TYPE_PRECISION (type) == 8)
> >
> > Isn't another precondition that BITS_PER_UNIT is 8 (because STRING_CSTs are
> > really bytes)?
> 
> I've used CHAR_TYPE_SIZE instead of 8, but I don't think GCC supports anything other
> than BITS_PER_UNIT == 8 and CHAR_TYPE_SIZE == 8. GCC uses memcmp/strlen
> on STRING_CST (as well as direct accesses as 'char') which won't work if the host and
> target chars are not the same size.

Yes, we even don't support anything else ATM, but are trying to document in
the source when we rely on it.
So e.g. VIEW_CONVERT_EXPR folding has
  /* Check that the host and target are sane.  */
  if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
    return NULL_TREE;
etc.
> 2020-01-15  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	PR tree-optimization/93231
> 	* tree-ssa-forwprop.c
> 	(optimize_count_trailing_zeroes): Check input_type is unsigned.

No need to wrap line before (optimize_count_trailing_zeroes), please use:
	* tree-ssa-forwprop.c (optimize_count_trailing_zeroes): Check
	input_type is unsigned.

> +int ctz2 (unsigned x)
> +{
> +  const int u = 0;
> +  static short table[64] =
> +    {
> +      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
> +      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
> +      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
> +      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u

This is just a GNU extension that we accept it in C, more portable would be
  enum U { u = 0; };
or #define u 0
But no big deal.

> @@ -1894,7 +1894,7 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
>    if (TREE_CODE (ctor) == CONSTRUCTOR)
>      return check_ctz_array (ctor, val, zero_val, shiftval, input_bits);
>  
> -  if (TREE_CODE (ctor) == STRING_CST)
> +  if (TREE_CODE (ctor) == STRING_CST && TYPE_PRECISION (type) == CHAR_TYPE_SIZE)

Too long line, please wrap.

Ok for trunk with those nits tweaked.

	Jakub

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-01-15 16:14 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-13 17:53 [PATCH] Fix ctz issues (PR93231) Wilco Dijkstra
2020-01-13 18:16 ` Jakub Jelinek
2020-01-15 16:46   ` Wilco Dijkstra
2020-01-15 16:59     ` Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).