public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [embedded-5-branch][PATCH 1/2] Backporting algorithmic optimization in match and simplify
@ 2015-11-17  9:34 Andre Vieira
  0 siblings, 0 replies; only message in thread
From: Andre Vieira @ 2015-11-17  9:34 UTC (permalink / raw)
  To: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 2204 bytes --]

New algorithmic optimisations:
((X inner_op C0) outer_op C1)
    With X being a tree where value_range has reasoned certain bits to 
always be
    zero throughout its computed value range, we will call this the 
zero_mask,
    and with inner_op = {|,^}, outer_op = {|,^} and inner_op != outer_op.
    if (inner_op == '^') C0 &= ~C1;
    if ((C0 & ~zero_mask) == 0) then emit (X outer_op (C0 outer_op C1)
    if ((C1 & ~zero_mask) == 0) then emit (X inner_op (C0 outer_op C1)

(X {&,^,|} C2) << C1 -> (X << C1) {&,^,|} (C2 << C1)
(X {&,^,|} C2) >> C1 -> (X >> C1) & (C2 >> C1)

The second and third transformations enable more applications of the 
first. Also some targets may benefit from delaying shift operations. I 
am aware that such an optimization, in combination with one or more 
optimizations that cause the reverse transformation, may lead to an 
infinite loop. Though such behavior has not been detected during 
regression testing and bootstrapping on aarch64.

This patch backports the optimization from trunk to the embedded-5-branch.

The original patches are at:
https://gcc.gnu.org/ml/gcc-patches/2015-08/msg01493.html (for the first 
optimization and changes to second and third)
https://gcc.gnu.org/ml/gcc-patches/2015-07/msg00517.html (the addition 
of the original second and third optimizations)

gcc/ChangeLog:

2015-11-27  Andre Vieira  <andre.simoesdiasvieira@arm.com>
Backport from mainline:

   2015-10-09  Andre Vieira  <andre.simoesdiasvieira@arm.com>

         * match.pd: ((X inner_op C0) outer_op C1) New pattern.
         ((X & C2) << C1): Expand to...
         (X {&,^,|} C2 << C1): ...This.
         ((X & C2) >> C1): Expand to...
         (X {&,^,|} C2 >> C1): ...This.
   2015-07-07  Richard Biener  <rguenther@suse.de>

	* fold-const.c (fold_binary_loc): Move
	(X & C2) << C1 -> (X << C1) & (C2 << C1) simplification ...
	* match.pd: ... here.


gcc/testsuite/ChangeLog:

2015-11-27  Andre Vieira  <andre.simoesdiasvieira@arm.com>
Backport from mainline:

   2015-10-09  Andre Vieira  <andre.simoesdiasvieira@arm.com>
             Hale Wang  <hale.wang@arm.com>

         * gcc.dg/tree-ssa/forwprop-33.c: New.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-backport-algorithmic-optimization.patch --]
[-- Type: text/x-patch;  name=0001-backport-algorithmic-optimization.patch, Size: 6218 bytes --]

From ac76659583ab0d9eb96552a540a22e66fdd430f7 Mon Sep 17 00:00:00 2001
From: Andre Simoes Dias Vieira <andsim01@arm.com>
Date: Fri, 23 Oct 2015 15:35:27 +0100
Subject: [PATCH 1/2] backport algorithmic optimization

---
 gcc/fold-const.c                            | 21 ---------
 gcc/match.pd                                | 54 ++++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-33.c | 71 +++++++++++++++++++++++++++++
 3 files changed, 125 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-33.c

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 6d085b185895f715d99a53a9e706295aa983d52a..5e0477f974173627317a25e018c9a6e098b099fb 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -12080,27 +12080,6 @@ fold_binary_loc (location_t loc,
 			     prec) == 0)
 	return TREE_OPERAND (arg0, 0);
 
-      /* Fold (X & C2) << C1 into (X << C1) & (C2 << C1)
-	      (X & C2) >> C1 into (X >> C1) & (C2 >> C1)
-	 if the latter can be further optimized.  */
-      if ((code == LSHIFT_EXPR || code == RSHIFT_EXPR)
-	  && TREE_CODE (arg0) == BIT_AND_EXPR
-	  && TREE_CODE (arg1) == INTEGER_CST
-	  && TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST)
-	{
-	  tree mask = fold_build2_loc (loc, code, type,
-				   fold_convert_loc (loc, type,
-						     TREE_OPERAND (arg0, 1)),
-				   arg1);
-	  tree shift = fold_build2_loc (loc, code, type,
-				    fold_convert_loc (loc, type,
-						      TREE_OPERAND (arg0, 0)),
-				    arg1);
-	  tem = fold_binary_loc (loc, BIT_AND_EXPR, type, shift, mask);
-	  if (tem)
-	    return tem;
-	}
-
       return NULL_TREE;
 
     case MIN_EXPR:
diff --git a/gcc/match.pd b/gcc/match.pd
index e40720e130fe0302466969e39ad5803960b942da..bea0237ae3b335c73bbc71f6d3997b1d84a58986 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -389,6 +389,49 @@ along with GCC; see the file COPYING3.  If not see
 	&& (TREE_CODE (@4) != SSA_NAME || has_single_use (@4)))
    (bit_xor (bit_and (bit_xor @0 @1) @2) @0)))
 
+/* ((X inner_op C0) outer_op C1)
+   With X being a tree where value_range has reasoned certain bits to always be
+   zero throughout its computed value range,
+   inner_op = {|,^}, outer_op = {|,^} and inner_op != outer_op
+   where zero_mask has 1's for all bits that are sure to be 0 in
+   and 0's otherwise.
+   if (inner_op == '^') C0 &= ~C1;
+   if ((C0 & ~zero_mask) == 0) then emit (X outer_op (C0 outer_op C1)
+   if ((C1 & ~zero_mask) == 0) then emit (X inner_op (C0 outer_op C1)
+*/
+(for inner_op (bit_ior bit_xor)
+     outer_op (bit_xor bit_ior)
+(simplify
+ (outer_op
+  (inner_op@3 @2 INTEGER_CST@0) INTEGER_CST@1)
+ (with
+  {
+    bool fail = false;
+    wide_int zero_mask_not;
+    wide_int C0;
+    wide_int cst_emit;
+
+    if (TREE_CODE (@2) == SSA_NAME &&
+	(TREE_CODE (@3) != SSA_NAME || has_single_use (@3)))
+      zero_mask_not = get_nonzero_bits (@2);
+    else
+      fail = true;
+
+    if (inner_op == BIT_XOR_EXPR)
+      {
+	C0 = wi::bit_and_not (@0, @1);
+	cst_emit = wi::bit_or (C0, @1);
+      }
+    else
+      {
+	C0 = @0;
+	cst_emit = wi::bit_xor (@0, @1);
+      }
+  }
+  (if (!fail && wi::bit_and (C0, zero_mask_not) == 0)
+   (outer_op @2 { wide_int_to_tree (type, cst_emit); }))
+  (if (!fail && wi::bit_and (@1, zero_mask_not) == 0)
+    (inner_op @2 { wide_int_to_tree (type, cst_emit); })))))
 
 /* Associate (p +p off1) +p off2 as (p +p (off1 + off2)).  */
 (simplify
@@ -614,6 +657,17 @@ along with GCC; see the file COPYING3.  If not see
   (cmp (bit_and (lshift integer_onep @0) integer_onep) integer_zerop)
   (icmp @0 { build_zero_cst (TREE_TYPE (@0)); })))
 
+/* Fold (X {&,^,|} C2) << C1 into (X << C1) {&,^,|} (C2 << C1)
+   (X {&,^,|} C2) >> C1 into (X >> C1) & (C2 >> C1).  */
+ (for shift (lshift rshift)
+ (for bit_op (bit_and bit_xor bit_ior)
+  (simplify
+   (shift (convert? (bit_op@4 @0 INTEGER_CST@2)) INTEGER_CST@1)
+   (with { bool fail = !(TREE_CODE (@4) != SSA_NAME || has_single_use (@4)); }
+    (if (!fail && tree_nop_conversion_p (type, TREE_TYPE (@0)))
+     (with { tree mask = int_const_binop (shift, fold_convert (type, @2), @1); }
+      (bit_op (shift (convert @0) @1) { mask; })))))))
+
 /* Simplifications of conversions.  */
 
 /* Basic strip-useless-type-conversions / strip_nops.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-33.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-33.c
new file mode 100644
index 0000000000000000000000000000000000000000..c7124deee11f3655d7c50bd9605bb7caddba6470
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-33.c
@@ -0,0 +1,71 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop3" } */
+
+unsigned short
+test1 (unsigned short a)
+{
+  a ^= 0x4002;
+  a >>= 1;
+  a |= 0x8000; /* Simplify to ((a >> 1) ^ 0xa001).  */
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\^ 40961" "forwprop3" } } */
+
+unsigned short
+test2 (unsigned short a)
+{
+  a |= 0x4002;
+  a <<= 1;
+  a ^= 0x0001; /* Simplify to ((a << 1) | 0x8005).  */
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\| 32773" "forwprop3" } } */
+
+unsigned short
+test3 (unsigned short a)
+{
+  a &= 0xd123;
+  a ^= 0x6040;
+  a |= 0xc031; /* Simplify to ((a & 0xd123) | 0xe071).  */
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\| 57457" "forwprop3" } } */
+
+unsigned short
+test4 (unsigned short a)
+{
+  a ^= 0x8002;
+  a >>= 1;
+  a |= 0x8000;
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\^ 49153" "forwprop3" } } */
+
+unsigned short
+test5 (unsigned short a)
+{
+  a ^= 0x8002;
+  a >>= 1;
+  a |= 0x8001; /* Only move shift inward: (((a >> 1) ^ 0x4001) | 0x8001).  */
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\^ 16385" "forwprop3" } } */
+/* { dg-final { scan-tree-dump "\\| 32769" "forwprop3" } } */
+
+short
+test6 (short a)
+{
+  a &= 0x7fff;
+  a >>= 2;
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\& 8191" "forwprop3" } } */
+
+short
+test7 (short a)
+{
+  a &= 0x8fff;
+  a >>= 2;
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\& -7169" "forwprop3" } } */
-- 
1.9.1


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2015-11-17  9:34 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-17  9:34 [embedded-5-branch][PATCH 1/2] Backporting algorithmic optimization in match and simplify Andre Vieira

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).