public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH take 2] Fold (X<<C1)^(X<<C2) to a multiplication when possible.
@ 2021-07-28 12:44 Roger Sayle
  2021-08-03 11:58 ` Richard Biener
  2021-08-05 13:52 ` Christophe Lyon
  0 siblings, 2 replies; 4+ messages in thread
From: Roger Sayle @ 2021-07-28 12:44 UTC (permalink / raw)
  To: 'GCC Patches'; +Cc: 'Marc Glisse'

[-- Attachment #1: Type: text/plain, Size: 2220 bytes --]


Hi Marc,

Thanks for the feedback.  After some quality time in gdb, I now appreciate
that
match.pd behaves (subtly) differently between generic and gimple, and the
trees actually being passed to tree_nonzero_bits were not quite what I had
expected.  Sorry for my confusion, the revised patch below is now much
shorter
(and my follow-up patch that was originally to tree_nonzero_bits looks like
it
now needs to be to get_nonzero_bits!).

This revised patch has been retested on 864_64-pc-linux-gnu with a
"make bootstrap" and "make -k check" with no new failures.

Ok for mainline?

2021-07-28  Roger Sayle  <roger@nextmovesoftware.com>
	    Marc Glisse <marc.glisse@inria.fr>

gcc/ChangeLog
	* match.pd (bit_ior, bit_xor): Canonicalize (X*C1)|(X*C2) and
	(X*C1)^(X*C2) as X*(C1+C2), and related variants, using
	tree_nonzero_bits to ensure that operands are bit-wise disjoint.

gcc/testsuite/ChangeLog
	* gcc.dg/fold-ior-4.c: New test.

Roger
--

-----Original Message-----
From: Marc Glisse <marc.glisse@inria.fr> 
Sent: 26 July 2021 16:45
To: Roger Sayle <roger@nextmovesoftware.com>
Cc: 'GCC Patches' <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH] Fold (X<<C1)^(X<<C2) to a multiplication when possible.

On Mon, 26 Jul 2021, Roger Sayle wrote:

> The one aspect that's a little odd is that each transform is paired 
> with a convert@1 variant, using the efficient match machinery to 
> expose any zero extension to fold-const.c's tree_nonzero_bits
functionality.

Copying the first transform for context

+(for op (bit_ior bit_xor)
+ (simplify
+  (op (mult:s@0 @1 INTEGER_CST@2)
+      (mult:s@3 @1 INTEGER_CST@4))
+  (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
+       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0)
+   (mult @1
+	 { wide_int_to_tree (type, wi::to_wide (@2) + wi::to_wide (@4));
})))  
+(simplify
+  (op (mult:s@0 (convert@1 @2) INTEGER_CST@3)
+      (mult:s@4 (convert@1 @2) INTEGER_CST@5))
+  (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
+       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@4)) == 0)
+   (mult @1
+	 { wide_int_to_tree (type, wi::to_wide (@3) + wi::to_wide (@5));
})))

Could you explain how the convert helps exactly?

--
Marc Glisse

[-- Attachment #2: patchm2.txt --]
[-- Type: text/plain, Size: 2722 bytes --]

diff --git a/gcc/match.pd b/gcc/match.pd
index beb8d27..5bc6851 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2833,6 +2833,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (convert (mult (convert:t @0) { cst; })))))
 #endif
 
+/* Canonicalize (X*C1)|(X*C2) and (X*C1)^(X*C2) to (C1+C2)*X when
+   tree_nonzero_bits allows IOR and XOR to be treated like PLUS.
+   Likewise, handle (X<<C3) and X as legitimate variants of X*C.  */
+(for op (bit_ior bit_xor)
+ (simplify
+  (op (mult:s@0 @1 INTEGER_CST@2)
+      (mult:s@3 @1 INTEGER_CST@4))
+  (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
+       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0)
+   (mult @1
+	 { wide_int_to_tree (type, wi::to_wide (@2) + wi::to_wide (@4)); })))
+ (simplify
+  (op:c (mult:s@0 @1 INTEGER_CST@2)
+	(lshift:s@3 @1 INTEGER_CST@4))
+  (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
+       && tree_int_cst_sgn (@4) > 0
+       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0)
+   (with { wide_int wone = wi::one (TYPE_PRECISION (type));
+	   wide_int c = wi::add (wi::to_wide (@2),
+				 wi::lshift (wone, wi::to_wide (@4))); }
+    (mult @1 { wide_int_to_tree (type, c); }))))
+ (simplify
+  (op:c (mult:s@0 @1 INTEGER_CST@2)
+	@1)
+  (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
+       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@1)) == 0)
+   (mult @1
+	 { wide_int_to_tree (type,
+			     wi::add (wi::to_wide (@2), 1)); })))
+ (simplify
+  (op (lshift:s@0 @1 INTEGER_CST@2)
+      (lshift:s@3 @1 INTEGER_CST@4))
+  (if (INTEGRAL_TYPE_P (type)
+       && tree_int_cst_sgn (@2) > 0
+       && tree_int_cst_sgn (@4) > 0
+       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0)
+   (with { tree t = type;
+	   if (!TYPE_OVERFLOW_WRAPS (t))
+	     t = unsigned_type_for (t);
+	   wide_int wone = wi::one (TYPE_PRECISION (t));
+	   wide_int c = wi::add (wi::lshift (wone, wi::to_wide (@2)),
+				 wi::lshift (wone, wi::to_wide (@4))); }
+    (convert (mult:t (convert:t @1) { wide_int_to_tree (t,c); })))))
+ (simplify
+  (op:c (lshift:s@0 @1 INTEGER_CST@2)
+	@1)
+  (if (INTEGRAL_TYPE_P (type)
+       && tree_int_cst_sgn (@2) > 0
+       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@1)) == 0)
+   (with { tree t = type;
+	   if (!TYPE_OVERFLOW_WRAPS (t))
+	     t = unsigned_type_for (t);
+	   wide_int wone = wi::one (TYPE_PRECISION (t));
+	   wide_int c = wi::add (wi::lshift (wone, wi::to_wide (@2)), wone); }
+    (convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); }))))))
+
 /* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax().  */
 
 (for minmax (min max FMIN_ALL FMAX_ALL)

[-- Attachment #3: fold-ior-4.c --]
[-- Type: text/plain, Size: 1160 bytes --]

/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-optimized" } */

unsigned int test_ior(unsigned char i)
{
  return i | (i<<8) | (i<<16) | (i<<24);
}

unsigned int test_xor(unsigned char i)
{
  return i ^ (i<<8) ^ (i<<16) ^ (i<<24);
}

unsigned int test_ior_1s(unsigned char i)
{
  return i | (i<<8);
}

unsigned int test_ior_1u(unsigned char i)
{
  unsigned int t = i;
  return t | (t<<8);
}

unsigned int test_xor_1s(unsigned char i)
{
  return i ^ (i<<8);
}

unsigned int test_xor_1u(unsigned char i)
{
  unsigned int t = i;
  return t ^ (t<<8);
}

unsigned int test_ior_2s(unsigned char i)
{
  return (i<<8) | (i<<16);
}

unsigned int test_ior_2u(unsigned char i)
{
  unsigned int t = i;
  return (t<<8) | (t<<16);
}

unsigned int test_xor_2s(unsigned char i)
{
  return (i<<8) ^ (i<<16);
}

unsigned int test_xor_2u(unsigned char i)
{
  unsigned int t = i;
  return (t<<8) ^ (t<<16);
}

/* { dg-final { scan-tree-dump-not " \\^ " "optimized" } } */
/* { dg-final { scan-tree-dump-not " \\| " "optimized" } } */
/* { dg-final { scan-tree-dump-times " \\* 16843009" 2 "optimized" } } */


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-08-05 14:54 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-28 12:44 [PATCH take 2] Fold (X<<C1)^(X<<C2) to a multiplication when possible Roger Sayle
2021-08-03 11:58 ` Richard Biener
2021-08-05 13:52 ` Christophe Lyon
2021-08-05 14:54   ` Roger Sayle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).