public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Add a pass to back-propagate use information
@ 2015-10-15 13:17 Richard Sandiford
  2015-10-19 11:40 ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2015-10-15 13:17 UTC (permalink / raw)
  To: gcc-patches

This patch adds a pass that collects information that is common to all
uses of an SSA name X and back-propagates that information up the statements
that generate X.  The general idea is to use the information to simplify
instructions (rather than a pure DCE) so I've simply called it
tree-ssa-backprop.c, to go with tree-ssa-forwprop.c.

At the moment the only use of the pass is to remove unnecessry sign
operations, so that it's effectively a global version of
fold_strip_sign_ops.  I'm hoping it could be extended in future to
record which bits of an integer are significant.  There are probably
other potential uses too.

A later patch gets rid of fold_strip_sign_ops.

Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi.
OK to install?

Thanks,
Richard


gcc/
	* doc/invoke.texi (-fdump-tree-backprop, -ftree-backprop): Document.
	* Makefile.in (OBJS): Add tree-ssa-backprop.o.
	* common.opt (ftree-backprop): New option.
	* fold-const.h (negate_mathfn_p): Declare.
	* fold-const.c (negate_mathfn_p): Make public.
	* timevar.def (TV_TREE_BACKPROP): New.
	* tree-passes.h (make_pass_backprop): Declare.
	* passes.def (pass_backprop): Add.
	* tree-ssa-backprop.c: New file.

gcc/testsuite/
	* gcc.dg/tree-ssa/backprop-1.c, gcc.dg/tree-ssa/backprop-2.c,
	gcc.dg/tree-ssa/backprop-3.c, gcc.dg/tree-ssa/backprop-4.c,
	gcc.dg/tree-ssa/backprop-5.c, gcc.dg/tree-ssa/backprop-6.c: New tests.

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 783e4c9..69e669d 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1445,6 +1445,7 @@ OBJS = \
 	tree-switch-conversion.o \
 	tree-ssa-address.o \
 	tree-ssa-alias.o \
+	tree-ssa-backprop.o \
 	tree-ssa-ccp.o \
 	tree-ssa-coalesce.o \
 	tree-ssa-copy.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 5060208..5aef625 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2364,6 +2364,10 @@ ftree-pta
 Common Report Var(flag_tree_pta) Optimization
 Perform function-local points-to analysis on trees.
 
+ftree-backprop
+Common Report Var(flag_tree_backprop) Init(1) Optimization
+Enable backward propagation of use properties at the tree level.
+
 ftree-reassoc
 Common Report Var(flag_tree_reassoc) Init(1) Optimization
 Enable reassociation on tree level
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 54e9f12..fe15d08 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -343,6 +343,7 @@ Objective-C and Objective-C++ Dialects}.
 -fdump-tree-dse@r{[}-@var{n}@r{]} @gol
 -fdump-tree-phiprop@r{[}-@var{n}@r{]} @gol
 -fdump-tree-phiopt@r{[}-@var{n}@r{]} @gol
+-fdump-tree-backprop@r{[}-@var{n}@r{]} @gol
 -fdump-tree-forwprop@r{[}-@var{n}@r{]} @gol
 -fdump-tree-nrv -fdump-tree-vect @gol
 -fdump-tree-sink @gol
@@ -451,8 +452,8 @@ Objective-C and Objective-C++ Dialects}.
 -fstrict-overflow -fthread-jumps -ftracer -ftree-bit-ccp @gol
 -ftree-builtin-call-dce -ftree-ccp -ftree-ch @gol
 -ftree-coalesce-vars -ftree-copy-prop -ftree-dce -ftree-dominator-opts @gol
--ftree-dse -ftree-forwprop -ftree-fre -ftree-loop-if-convert @gol
--ftree-loop-if-convert-stores -ftree-loop-im @gol
+-ftree-dse -ftree-backprop -ftree-forwprop -ftree-fre @gol
+-ftree-loop-if-convert -ftree-loop-if-convert-stores -ftree-loop-im @gol
 -ftree-phiprop -ftree-loop-distribution -ftree-loop-distribute-patterns @gol
 -ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
 -ftree-loop-vectorize @gol
@@ -7236,6 +7237,12 @@ name is made by appending @file{.dse} to the source file name.
 Dump each function after optimizing PHI nodes into straightline code.  The file
 name is made by appending @file{.phiopt} to the source file name.
 
+@item backprop
+@opindex fdump-tree-backprop
+Dump each function after back-propagating use information up the definition
+chain.  The file name is made by appending @file{.backprop} to the
+source file name.
+
 @item forwprop
 @opindex fdump-tree-forwprop
 Dump each function after forward propagating single use variables.  The file
@@ -7716,6 +7723,7 @@ compilation time.
 -ftree-dce @gol
 -ftree-dominator-opts @gol
 -ftree-dse @gol
+-ftree-backprop @gol
 -ftree-forwprop @gol
 -ftree-fre @gol
 -ftree-phiprop @gol
@@ -8658,6 +8666,13 @@ enabled by default at @option{-O2} and @option{-O3}.
 Make partial redundancy elimination (PRE) more aggressive.  This flag is
 enabled by default at @option{-O3}.
 
+@item -ftree-backprop
+@opindex ftree-backprop
+Propagate information about uses of a value up the definition chain
+in order to simplify the definitions.  For example, this pass strips
+sign operations if the sign of a value never matters.  The flag is
+enabled by default at @option{-O} and higher.
+
 @item -ftree-forwprop
 @opindex ftree-forwprop
 Perform forward propagation on trees.  This flag is enabled by default
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index de45a2c..7f00e72 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -110,7 +110,6 @@ enum comparison_code {
   COMPCODE_TRUE = 15
 };
 
-static bool negate_mathfn_p (enum built_in_function);
 static bool negate_expr_p (tree);
 static tree negate_expr (tree);
 static tree split_tree (tree, enum tree_code, tree *, tree *, tree *, int);
@@ -319,7 +318,7 @@ fold_overflow_warning (const char* gmsgid, enum warn_strict_overflow_code wc)
 /* Return true if the built-in mathematical function specified by CODE
    is odd, i.e. -f(x) == f(-x).  */
 
-static bool
+bool
 negate_mathfn_p (enum built_in_function code)
 {
   switch (code)
diff --git a/gcc/fold-const.h b/gcc/fold-const.h
index ee74dc8..4d5b24b 100644
--- a/gcc/fold-const.h
+++ b/gcc/fold-const.h
@@ -173,6 +173,7 @@ extern tree sign_bit_p (tree, const_tree);
 extern tree exact_inverse (tree, tree);
 extern tree const_unop (enum tree_code, tree, tree);
 extern tree const_binop (enum tree_code, tree, tree, tree);
+extern bool negate_mathfn_p (enum built_in_function);
 
 /* Return OFF converted to a pointer offset type suitable as offset for
    POINTER_PLUS_EXPR.  Use location LOC for this conversion.  */
diff --git a/gcc/passes.def b/gcc/passes.def
index dc3f44c..36d2b3b 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -159,6 +159,7 @@ along with GCC; see the file COPYING3.  If not see
       /* After CCP we rewrite no longer addressed locals into SSA
 	 form if possible.  */
       NEXT_PASS (pass_complete_unrolli);
+      NEXT_PASS (pass_backprop);
       NEXT_PASS (pass_phiprop);
       NEXT_PASS (pass_forwprop);
       NEXT_PASS (pass_object_sizes);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-1.c
new file mode 100644
index 0000000..ff88ae8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-backprop-details" }  */
+
+/* Test a simple case of non-looping code in which both uses ignore
+   the sign and both definitions are sign ops.  */
+#define TEST_FUNCTION(TYPE, SUFFIX)				\
+  TYPE								\
+  test##SUFFIX (TYPE x, int sel1, int sel2)			\
+  {								\
+    TYPE input = sel1 ? -x : __builtin_fabs##SUFFIX (x);	\
+    if (sel2)							\
+      return __builtin_cos##SUFFIX (input);			\
+    else							\
+      return __builtin_cosh##SUFFIX (input);			\
+  }
+
+TEST_FUNCTION (float, f)
+TEST_FUNCTION (double, )
+TEST_FUNCTION (long double, l)
+
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -x} 3 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <x} 3 "backprop" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-2.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-2.c
new file mode 100644
index 0000000..aa1233a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-backprop-details" }  */
+
+/* Test a simple case of non-looping code in which both uses ignore
+   the sign but only one definition is a sign op.  */
+#define TEST_FUNCTION(TYPE, SUFFIX)				\
+  TYPE								\
+  test##SUFFIX (TYPE x, int sel1, int sel2)			\
+  {								\
+    TYPE input = sel1 ? -x : x + 1;				\
+    if (sel2)							\
+      return __builtin_cos##SUFFIX (input);			\
+    else							\
+      return __builtin_cosh##SUFFIX (input);			\
+  }
+
+TEST_FUNCTION (float, f)
+TEST_FUNCTION (double, )
+TEST_FUNCTION (long double, l)
+
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -x} 3 "backprop" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-3.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-3.c
new file mode 100644
index 0000000..6381fff
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-3.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-backprop-details" }  */
+
+/* Test a simple case of non-looping code in which one use ignores
+   the sign but another doesn't.  */
+#define TEST_FUNCTION(TYPE, SUFFIX)				\
+  TYPE								\
+  test##SUFFIX (TYPE x, int sel1, int sel2)			\
+  {								\
+    TYPE input = sel1 ? -x : __builtin_fabs##SUFFIX (x);	\
+    if (sel2)							\
+      return __builtin_cos##SUFFIX (input);			\
+    else							\
+      return __builtin_sin##SUFFIX (input);			\
+  }
+
+TEST_FUNCTION (float, f)
+TEST_FUNCTION (double, )
+TEST_FUNCTION (long double, l)
+
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -x} 0 "backprop" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-4.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-4.c
new file mode 100644
index 0000000..fc96f3e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-4.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-backprop-details" }  */
+
+/* Test a simple reduction loop in which all inputs are sign ops and
+   the consumer doesn't care about the sign.  */
+#define TEST_FUNCTION(TYPE, SUFFIX)			\
+  TYPE							\
+  test##SUFFIX (TYPE x, TYPE y, TYPE *array, int n)	\
+  {							\
+    x = __builtin_copysign##SUFFIX (x, y);		\
+    for (int i = 0; i < n; ++i)				\
+      x *= -array[i];					\
+    return __builtin_hypot##SUFFIX (x, y);		\
+  }
+
+TEST_FUNCTION (float, f)
+TEST_FUNCTION (double, )
+TEST_FUNCTION (long double, l)
+
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = __builtin_copysign} 3 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 3 "backprop" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-5.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-5.c
new file mode 100644
index 0000000..90e18cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-5.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-backprop-details" }  */
+
+/* Test a loop that does both a multiplication and addition.  The addition
+   should prevent any sign ops from being removed.  */
+#define TEST_FUNCTION(TYPE, SUFFIX)			\
+  TYPE							\
+  test##SUFFIX (TYPE x, TYPE y, TYPE *array, int n)	\
+  {							\
+    x = __builtin_copysign##SUFFIX (x, y);		\
+    for (int i = 0; i < n; ++i)				\
+      x = (x + 1) * array[i];				\
+    return __builtin_hypot##SUFFIX (x, y);		\
+  }
+
+TEST_FUNCTION (float, f)
+TEST_FUNCTION (double, )
+TEST_FUNCTION (long double, l)
+
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = __builtin_copysign} 0 "backprop" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
new file mode 100644
index 0000000..31f0571
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-backprop-details" }  */
+
+void start (void *);
+void end (void *);
+
+/* Test that we optimize the contents of infinite loops.  */
+#define TEST_FUNCTION(TYPE, SUFFIX)			\
+  void							\
+  test##SUFFIX (TYPE *array, TYPE y, int n)		\
+  {							\
+    for (;;)						\
+      {							\
+	start (array);					\
+	TYPE x = -__builtin_fabs##SUFFIX (array[-1]);	\
+	for (int i = 0; i < n; ++i)			\
+	  x = -x / array[i];				\
+	array[-1] = x * x;				\
+	array[-2] = __builtin_fma##SUFFIX (x, x, y);	\
+	array[-3] = __builtin_pow##SUFFIX (x, 20);	\
+	end (array);					\
+      }							\
+  }
+
+TEST_FUNCTION (float, f)
+TEST_FUNCTION (double, )
+TEST_FUNCTION (long double, l)
+
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
diff --git a/gcc/timevar.def b/gcc/timevar.def
index ac41075..ef16226 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -163,6 +163,7 @@ DEFTIMEVAR (TV_TREE_PRE		     , "tree PRE")
 DEFTIMEVAR (TV_TREE_FRE		     , "tree FRE")
 DEFTIMEVAR (TV_TREE_SINK             , "tree code sinking")
 DEFTIMEVAR (TV_TREE_PHIOPT	     , "tree linearize phis")
+DEFTIMEVAR (TV_TREE_BACKPROP	     , "tree backward propagate")
 DEFTIMEVAR (TV_TREE_FORWPROP	     , "tree forward propagate")
 DEFTIMEVAR (TV_TREE_PHIPROP	     , "tree phiprop")
 DEFTIMEVAR (TV_TREE_DCE		     , "tree conservative DCE")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index c37e4b2..333b5a7 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -497,6 +497,7 @@ extern gimple_opt_pass *make_pass_cleanup_cfg_post_optimizing (gcc::context
 							       *ctxt);
 extern gimple_opt_pass *make_pass_init_datastructures (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_fixup_cfg (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_backprop (gcc::context *ctxt);
 
 extern rtl_opt_pass *make_pass_expand (gcc::context *ctxt);
 extern rtl_opt_pass *make_pass_instantiate_virtual_regs (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-backprop.c b/gcc/tree-ssa-backprop.c
new file mode 100644
index 0000000..bf2dcd9
--- /dev/null
+++ b/gcc/tree-ssa-backprop.c
@@ -0,0 +1,900 @@
+/* Back-propagation of usage information to definitions.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This pass propagates information that is common to all uses of an SSA
+   name back up through the sequence of statements that generate it,
+   simplifying the statements where possible.  Sometimes this can expose
+   fully or partially dead code, but the main focus is simplifying
+   computations.
+
+   At the moment the pass only handles one piece of information: whether the
+   sign of a value matters, and therefore whether sign-changing operations
+   can be skipped.  The pass could be extended to more interesting
+   information in future, such as which bits of an integer are significant.
+
+   For example, take the function:
+
+     double
+     f (double *a, int n, double start)
+     {
+       double x = fabs (start);
+       for (int i = 0; i < n; ++i)
+	 x *= a[i];
+       return __builtin_cos (x);
+     }
+
+   cos(x) == cos(-x), so the sign of the final x doesn't matter.
+   That x is the result of a series of multiplications, and if
+   the sign of the result of a multiplication doesn't matter,
+   the signs of the inputs don't matter either.
+
+   The pass would replace the incoming value of x (i.e. fabs(start))
+   with start.  Since there are no other uses of the fabs result,
+   the call would get deleted as dead.
+
+   The algorithm is:
+
+   (1) Do a post-order traversal of the blocks in the function, walking
+       each block backwards.  For each potentially-simplifiable statement
+       that defines an SSA name X, examine all uses of X to see what
+       information is actually significant.  Record this as INFO_MAP[X].
+       Optimistically ignore for now any back-edge references to
+       unprocessed phis.
+
+   (2) Iteratively reduce the optimistic result of (1) until we reach
+       a maximal fixed point (which at the moment would mean revisiting
+       statements at most once).  First push all SSA names that used an
+       optimistic assumption about a backedge phi onto a worklist.
+       While the worklist is nonempty, pick off an SSA name X and recompute
+       INFO_MAP[X].  If the value changes, push all SSA names used in the
+       definition of X onto the worklist.
+
+   (3) Iterate over each SSA name X with info in INFO_MAP, in the
+       opposite order to (1), i.e. a forward reverse-post-order walk.
+       Try to optimize the definition of X using INFO_MAP[X] and fold
+       the result.  (This ensures that we fold definitions before uses.)
+
+   (4) Iterate over each SSA name X with info in INFO_MAP, in the same
+       order as (1), and delete any statements that are now dead.
+       (This ensures that if a sequence of statements is dead,
+       we delete the last statement first.)
+
+   Note that this pass does not deal with direct redundancies,
+   such as cos(-x)->cos(x).  match.pd handles those cases instead.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "ssa.h"
+#include "fold-const.h"
+#include "tree-pass.h"
+#include "cfganal.h"
+#include "gimple-pretty-print.h"
+#include "tree-cfg.h"
+#include "tree-ssa-propagate.h"
+#include "gimple-fold.h"
+#include "alloc-pool.h"
+#include "tree-hash-traits.h"
+
+namespace {
+
+/* Information about a group of uses of an SSA name.  */
+struct usage_info
+{
+  usage_info () : flag_word (0) {}
+  usage_info &operator &= (const usage_info &);
+  usage_info operator & (const usage_info &) const;
+  bool operator == (const usage_info &) const;
+  bool operator != (const usage_info &) const;
+  bool is_useful () const;
+
+  static usage_info intersection_identity ();
+
+  union
+  {
+    struct
+    {
+      /* True if the uses treat x and -x in the same way.  */
+      unsigned int ignore_sign : 1;
+    } flags;
+    /* All the flag bits as a single int.  */
+    unsigned int flag_word;
+  };
+};
+
+/* Return an X such that X & Y == Y for all Y.  This is the most
+   optimistic assumption possible.  */
+
+usage_info
+usage_info::intersection_identity ()
+{
+  usage_info ret;
+  ret.flag_word = -1;
+  return ret;
+}
+
+/* Intersect *THIS with OTHER, so that *THIS describes all uses covered
+   by the original *THIS and OTHER.  */
+
+usage_info &
+usage_info::operator &= (const usage_info &other)
+{
+  flag_word &= other.flag_word;
+  return *this;
+}
+
+/* Return the intersection of *THIS and OTHER, i.e. a structure that
+   describes all uses covered by *THIS and OTHER.  */
+
+usage_info
+usage_info::operator & (const usage_info &other) const
+{
+  usage_info info (*this);
+  info &= other;
+  return info;
+}
+
+bool
+usage_info::operator == (const usage_info &other) const
+{
+  return flag_word == other.flag_word;
+}
+
+bool
+usage_info::operator != (const usage_info &other) const
+{
+  return !operator == (other);
+}
+
+/* Return true if *THIS is not simply the default, safe assumption.  */
+
+bool
+usage_info::is_useful () const
+{
+  return flag_word != 0;
+}
+
+/* Start a dump line about SSA name VAR.  */
+
+static void
+dump_usage_prefix (FILE *file, tree var)
+{
+  fprintf (file, "  ");
+  print_generic_expr (file, var, 0);
+  fprintf (file, ": ");
+}
+
+/* Print INFO to FILE.  */
+
+static void
+dump_usage_info (FILE *file, tree var, usage_info *info)
+{
+  if (info->flags.ignore_sign)
+    {
+      dump_usage_prefix (file, var);
+      fprintf (file, "sign bit not important\n");
+    }
+}
+
+/* Represents one execution of the pass.  */
+class backprop
+{
+public:
+  backprop (function *);
+  ~backprop ();
+
+  void execute ();
+
+private:
+  const usage_info *lookup_operand (tree);
+
+  void push_to_worklist (tree);
+  tree pop_from_worklist ();
+
+  void process_builtin_call_use (gcall *, tree, usage_info *);
+  void process_assign_use (gassign *, tree, usage_info *);
+  void process_phi_use (gphi *, usage_info *);
+  void process_use (gimple *, tree, usage_info *);
+  bool intersect_uses (tree, usage_info *);
+  void reprocess_inputs (gimple *);
+  void process_var (tree);
+  void process_block (basic_block);
+
+  void complete_change (gimple *);
+  void optimize_builtin_call (gcall *, const usage_info *);
+  void optimize_assign (gassign *, const usage_info *);
+  void optimize_phi (gphi *, const usage_info *);
+
+  typedef hash_map <tree_ssa_name_hash, usage_info *> info_map_type;
+  typedef std::pair <tree, usage_info *> var_info_pair;
+
+  /* The function we're optimizing.  */
+  function *m_fn;
+
+  /* Pool for allocating usage_info structures.  */
+  object_allocator <usage_info> m_info_pool;
+
+  /* Maps an SSA name to a description of all uses of that SSA name.
+     All the usage_infos satisfy is_useful.  */
+  info_map_type m_info_map;
+
+  /* Post-ordered list of all potentially-interesting SSA names,
+     along with information that describes all uses.  */
+  auto_vec <var_info_pair, 128> m_vars;
+
+  /* A bitmap of blocks that we have finished processing in the initial
+     post-order walk.  */
+  sbitmap m_visited_blocks;
+
+  /* A worklist of SSA names whose definitions need to be reconsidered.  */
+  auto_vec <tree, 64> m_worklist;
+
+  /* The SSA names in M_WORKLIST, identified by their SSA_NAME_VERSION.  */
+  bitmap m_worklist_names;
+};
+
+backprop::backprop (function *fn)
+  : m_fn (fn),
+    m_info_pool ("usage_info"),
+    m_visited_blocks (sbitmap_alloc (last_basic_block_for_fn (m_fn))),
+    m_worklist_names (BITMAP_ALLOC (NULL))
+{
+  bitmap_clear (m_visited_blocks);
+}
+
+backprop::~backprop ()
+{
+  BITMAP_FREE (m_worklist_names);
+  sbitmap_free (m_visited_blocks);
+  m_info_pool.release ();
+}
+
+/* Return usage information for general operand OP, or null if none.  */
+
+const usage_info *
+backprop::lookup_operand (tree op)
+{
+  if (op && TREE_CODE (op) == SSA_NAME)
+    if (usage_info **slot = m_info_map.get (op))
+      return *slot;
+  return NULL;
+}
+
+/* Add SSA name VAR to the worklist, if it isn't on the worklist already.  */
+
+void
+backprop::push_to_worklist (tree var)
+{
+  if (!bitmap_set_bit (m_worklist_names, SSA_NAME_VERSION (var)))
+    return;
+  m_worklist.safe_push (var);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "[WORKLIST] Pushing ");
+      print_generic_expr (dump_file, var, 0);
+      fprintf (dump_file, "\n");
+    }
+}
+
+/* Remove and return the next SSA name from the worklist.  The worklist
+   is known to be nonempty.  */
+
+tree
+backprop::pop_from_worklist ()
+{
+  tree var = m_worklist.pop ();
+  bitmap_clear_bit (m_worklist_names, SSA_NAME_VERSION (var));
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "[WORKLIST] Popping ");
+      print_generic_expr (dump_file, var, 0);
+      fprintf (dump_file, "\n");
+    }
+  return var;
+}
+
+/* Make INFO describe all uses of RHS in CALL, which is a call to a
+   built-in function.  */
+
+void
+backprop::process_builtin_call_use (gcall *call, tree rhs, usage_info *info)
+{
+  enum built_in_function fn = DECL_FUNCTION_CODE (gimple_call_fndecl (call));
+  tree lhs = gimple_call_lhs (call);
+  switch (fn)
+    {
+    CASE_FLT_FN (BUILT_IN_COS):
+    CASE_FLT_FN (BUILT_IN_COSH):
+    CASE_FLT_FN (BUILT_IN_CCOS):
+    CASE_FLT_FN (BUILT_IN_CCOSH):
+    CASE_FLT_FN (BUILT_IN_HYPOT):
+      /* The signs of all inputs are ignored.  */
+      info->flags.ignore_sign = true;
+      break;
+
+    CASE_FLT_FN (BUILT_IN_COPYSIGN):
+      /* The sign of the first input is ignored.  */
+      if (rhs != gimple_call_arg (call, 1))
+	info->flags.ignore_sign = true;
+      break;
+
+    CASE_FLT_FN (BUILT_IN_POW):
+      {
+	/* The sign of the first input is ignored as long as the second
+	   input is an even real.  */
+	tree power = gimple_call_arg (call, 1);
+	HOST_WIDE_INT n;
+	if (TREE_CODE (power) == REAL_CST
+	    && real_isinteger (&TREE_REAL_CST (power), &n)
+	    && (n & 1) == 0)
+	  info->flags.ignore_sign = true;
+	break;
+      }
+
+    CASE_FLT_FN (BUILT_IN_FMA):
+      /* In X * X + Y, where Y is distinct from X, the sign of X doesn't
+	 matter.  */
+      if (gimple_call_arg (call, 0) == rhs
+	  && gimple_call_arg (call, 1) == rhs
+	  && gimple_call_arg (call, 2) != rhs)
+	info->flags.ignore_sign = true;
+      break;
+
+    default:
+      if (negate_mathfn_p (fn))
+	/* The sign of the (single) input doesn't matter provided
+	   that the sign of the output doesn't matter.  */
+	if (const usage_info *lhs_info = lookup_operand (lhs))
+	  info->flags.ignore_sign = lhs_info->flags.ignore_sign;
+      break;
+    }
+}
+
+/* Make INFO describe all uses of RHS in ASSIGN.  */
+
+void
+backprop::process_assign_use (gassign *assign, tree rhs, usage_info *info)
+{
+  tree lhs = gimple_assign_lhs (assign);
+  switch (gimple_assign_rhs_code (assign))
+    {
+    case ABS_EXPR:
+      /* The sign of the input doesn't matter.  */
+      info->flags.ignore_sign = true;
+      break;
+
+    case COND_EXPR:
+      /* For A = B ? C : D, propagate information about all uses of A
+	 to B and C.  */
+      if (rhs != gimple_assign_rhs1 (assign))
+	if (const usage_info *lhs_info = lookup_operand (lhs))
+	  *info = *lhs_info;
+      break;
+
+    case FMA_EXPR:
+      /* In X * X + Y, where Y is distinct from X, the sign of X doesn't
+	 matter.  */
+      if (gimple_assign_rhs1 (assign) == rhs
+	  && gimple_assign_rhs2 (assign) == rhs
+	  && gimple_assign_rhs3 (assign) != rhs)
+	info->flags.ignore_sign = true;
+      break;
+
+    case MULT_EXPR:
+      /* In X * X, the sign of X doesn't matter.  */
+      if (gimple_assign_rhs1 (assign) == rhs
+	  && gimple_assign_rhs2 (assign) == rhs)
+	info->flags.ignore_sign = true;
+      /* Fall through.  */
+
+    case NEGATE_EXPR:
+    case RDIV_EXPR:
+      /* If the sign of the result doesn't matter, the sign of the inputs
+	 doesn't matter either.  */
+      if (FLOAT_TYPE_P (TREE_TYPE (rhs)))
+	if (const usage_info *lhs_info = lookup_operand (lhs))
+	  info->flags.ignore_sign = lhs_info->flags.ignore_sign;
+      break;
+
+    default:
+      break;
+    }
+}
+
+/* Make INFO describe the uses of PHI's result.  */
+
+void
+backprop::process_phi_use (gphi *phi, usage_info *info)
+{
+  tree result = gimple_phi_result (phi);
+  if (const usage_info *result_info = lookup_operand (result))
+    *info = *result_info;
+}
+
+/* Make INFO describe all uses of RHS in STMT.  */
+
+void
+backprop::process_use (gimple *stmt, tree rhs, usage_info *info)
+{
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "[USE] ");
+      print_generic_expr (dump_file, rhs, 0);
+      fprintf (dump_file, " in ");
+      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+    }
+
+  if (gcall *call = dyn_cast <gcall *> (stmt))
+    {
+      if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
+	process_builtin_call_use (call, rhs, info);
+    }
+  else if (gassign *assign = dyn_cast <gassign *> (stmt))
+    process_assign_use (assign, rhs, info);
+  else if (gphi *phi = dyn_cast <gphi *> (stmt))
+    process_phi_use (phi, info);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    dump_usage_info (dump_file, rhs, info);
+}
+
+/* Make INFO describe all uses of VAR, returning true if the result
+   is useful.  If the uses include phis that haven't been processed yet,
+   make the most optimistic assumption possible, so that we aim for
+   a maximum rather than a minimum fixed point.  */
+
+bool
+backprop::intersect_uses (tree var, usage_info *info)
+{
+  imm_use_iterator iter;
+  gimple *stmt;
+  *info = usage_info::intersection_identity ();
+  FOR_EACH_IMM_USE_STMT (stmt, iter, var)
+    {
+      if (is_a <gphi *> (stmt)
+	  && !bitmap_bit_p (m_visited_blocks, gimple_bb (stmt)->index))
+	{
+	  /* Skip unprocessed phis.  */
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "[BACKEDGE] ");
+	      print_generic_expr (dump_file, var, 0);
+	      fprintf (dump_file, " in ");
+	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+	    }
+	}
+      else
+	{
+	  usage_info subinfo;
+	  process_use (stmt, var, &subinfo);
+	  *info &= subinfo;
+	  if (!info->is_useful ())
+	    {
+	      BREAK_FROM_IMM_USE_STMT (iter);
+	      return false;
+	    }
+	}
+    }
+  return true;
+}
+
+/* Queue for reconsideration any input of STMT that has information
+   associated with it.  This is used if that information might be
+   too optimistic.  */
+
+void
+backprop::reprocess_inputs (gimple *stmt)
+{
+  use_operand_p use_p;
+  ssa_op_iter oi;
+  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
+    {
+      tree var = get_use_from_ptr (use_p);
+      if (lookup_operand (var))
+	push_to_worklist (var);
+    }
+}
+
+/* Say that we're recording INFO for SSA name VAR, or that we're deleting
+   existing information if INFO is null.  INTRO describes the change.  */
+
+static void
+dump_var_info (tree var, usage_info *info, const char *intro)
+{
+  fprintf (dump_file, "[DEF] %s for ", intro);
+  print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (var), 0, TDF_SLIM);
+  if (info)
+    dump_usage_info (dump_file, var, info);
+}
+
+/* Process all uses of VAR and record or update the result in
+   M_INFO_MAP and M_VARS.  */
+
+void
+backprop::process_var (tree var)
+{
+  if (has_zero_uses (var))
+    return;
+
+  usage_info info;
+  intersect_uses (var, &info);
+
+  gimple *stmt = SSA_NAME_DEF_STMT (var);
+  if (info.is_useful ())
+    {
+      bool existed;
+      usage_info *&map_info = m_info_map.get_or_insert (var, &existed);
+      if (!existed)
+	{
+	  /* Recording information about VAR for the first time.  */
+	  map_info = m_info_pool.allocate ();
+	  *map_info = info;
+	  m_vars.safe_push (var_info_pair (var, map_info));
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    dump_var_info (var, map_info, "Recording new information");
+
+	  /* If STMT is a phi, reprocess any backedge uses.  This is a
+	     no-op for other uses, which won't have any information
+	     associated with them.  */
+	  if (is_a <gphi *> (stmt))
+	    reprocess_inputs (stmt);
+	}
+      else if (info != *map_info)
+	{
+	  /* Recording information that is less optimistic than before.  */
+	  gcc_checking_assert ((info & *map_info) == info);
+	  *map_info = info;
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    dump_var_info (var, map_info, "Updating information");
+	  reprocess_inputs (stmt);
+	}
+    }
+  else
+    {
+      if (usage_info **slot = m_info_map.get (var))
+	{
+	  /* Removing previously-recorded information.  */
+	  **slot = info;
+	  m_info_map.remove (var);
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    dump_var_info (var, NULL, "Deleting information");
+	  reprocess_inputs (stmt);
+	}
+      else
+	{
+	  /* If STMT is a phi, remove any information recorded for
+	     its arguments.  */
+	  if (is_a <gphi *> (stmt))
+	    reprocess_inputs (stmt);
+	}
+    }
+}
+
+/* Process all statements and phis in BB, during the first post-order walk.  */
+
+void
+backprop::process_block (basic_block bb)
+{
+  for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
+       gsi_prev (&gsi))
+    {
+      tree lhs = gimple_get_lhs (gsi_stmt (gsi));
+      if (lhs && TREE_CODE (lhs) == SSA_NAME)
+	process_var (lhs);
+    }
+  for (gphi_iterator gpi = gsi_start_phis (bb); !gsi_end_p (gpi);
+       gsi_next (&gpi))
+    process_var (gimple_phi_result (gpi.phi ()));
+}
+
+/* Delete the definition of VAR, which has no uses.  */
+
+static void
+remove_unused_var (tree var)
+{
+  gimple *stmt = SSA_NAME_DEF_STMT (var);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "Deleting ");
+      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+    }
+  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+  gsi_remove (&gsi, true);
+  release_ssa_name (var);
+}
+
+/* Note that we're replacing OLD_RHS with NEW_RHS in STMT.  */
+
+static void
+note_replacement (gimple *stmt, tree old_rhs, tree new_rhs)
+{
+  fprintf (dump_file, "Replacing use of ");
+  print_generic_expr (dump_file, old_rhs, 0);
+  fprintf (dump_file, " with ");
+  print_generic_expr (dump_file, new_rhs, 0);
+  fprintf (dump_file, " in ");
+  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+}
+
+/* If RHS is an SSA name whose definition just changes the sign of a value,
+   return that other value, otherwise return null.  */
+
+static tree
+strip_sign_op_1 (tree rhs)
+{
+  if (TREE_CODE (rhs) != SSA_NAME)
+    return NULL_TREE;
+
+  gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
+  if (gassign *assign = dyn_cast <gassign *> (def_stmt))
+    switch (gimple_assign_rhs_code (assign))
+      {
+      case ABS_EXPR:
+      case NEGATE_EXPR:
+	return gimple_assign_rhs1 (assign);
+
+      default:
+	break;
+      }
+  else if (gcall *call = dyn_cast <gcall *> (def_stmt))
+    {
+      if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
+	switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
+	  {
+	  CASE_FLT_FN (BUILT_IN_COPYSIGN):
+	    return gimple_call_arg (call, 0);
+
+	  default:
+	    break;
+	  }
+    }
+
+  return NULL_TREE;
+}
+
+/* If RHS is an SSA name whose definition just changes the sign of a value,
+   strip all such operations and return the ultimate input to them.
+   Return null otherwise.
+
+   Although this could in principle lead to quadratic searching,
+   in practice a long sequence of sign manipulations should already
+   have been folded down.  E.g. --x -> x, abs(-x) -> abs(x).  We search
+   for more than one operation in order to catch cases like -abs(x).  */
+
+static tree
+strip_sign_op (tree rhs)
+{
+  tree new_rhs = strip_sign_op_1 (rhs);
+  if (!new_rhs)
+    return NULL_TREE;
+  while (tree next = strip_sign_op_1 (new_rhs))
+    new_rhs = next;
+  return new_rhs;
+}
+
+/* Strip all sign operations from the rvalue at *RHS_PTR in STMT.
+   Return true if something changed.  The caller is responsible
+   for the necessary bookkeeping.  */
+
+static bool
+strip_sign_op (gimple *stmt, tree *rhs_ptr)
+{
+  if (tree new_rhs = strip_sign_op (*rhs_ptr))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	note_replacement (stmt, *rhs_ptr, new_rhs);
+      *rhs_ptr = new_rhs;
+      return true;
+    }
+  return false;
+}
+
+/* STMT has been changed.  Give the fold machinery a chance to simplify
+   and canonicalize it (e.g. by ensuring that commutative operands have
+   the right order), then record the updates.  */
+
+void
+backprop::complete_change (gimple *stmt)
+{
+  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+  if (fold_stmt (&gsi))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file, "  which folds to: ");
+	  print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_SLIM);
+	}
+    }
+  update_stmt (gsi_stmt (gsi));
+}
+
+/* Optimize CALL, a call to a built-in function, on the basis that INFO
+   describes all uses of the LHS.  */
+
+void
+backprop::optimize_builtin_call (gcall *call, const usage_info *info)
+{
+  bool changed_p = false;
+  enum built_in_function fn = DECL_FUNCTION_CODE (gimple_call_fndecl (call));
+  /* If we have an f such that -f(x) = f(-x), and if the sign of the result
+     doesn't matter, strip any sign operations from the input.  */
+  if (info->flags.ignore_sign
+      && negate_mathfn_p (fn)
+      && strip_sign_op (call, gimple_call_arg_ptr (call, 0)))
+    changed_p = true;
+  if (changed_p)
+    complete_change (call);
+}
+
+/* Optimize ASSIGN on the basis that INFO describes all uses of the lhs.  */
+
+void
+backprop::optimize_assign (gassign *assign, const usage_info *info)
+{
+  bool changed_p = false;
+  switch (gimple_assign_rhs_code (assign))
+    {
+    case MULT_EXPR:
+    case RDIV_EXPR:
+      /* If the sign of the result doesn't matter, strip sign operations
+	 from both inputs.  */
+      if (info->flags.ignore_sign
+	  && (strip_sign_op (assign, gimple_assign_rhs1_ptr (assign))
+	      || strip_sign_op (assign, gimple_assign_rhs2_ptr (assign))))
+	changed_p = true;
+      break;
+
+    case COND_EXPR:
+      /* If the sign of A ? B : C doesn't matter, strip sign operations
+	 from both B and C.  */
+      if (info->flags.ignore_sign
+	  && (strip_sign_op (assign, gimple_assign_rhs2_ptr (assign))
+	      || strip_sign_op (assign, gimple_assign_rhs3_ptr (assign))))
+	changed_p = true;
+      break;
+
+    default:
+      break;
+    }
+  if (changed_p)
+    complete_change (assign);
+}
+
+/* Optimize PHI on the basis that INFO describes all uses of the result.  */
+
+void
+backprop::optimize_phi (gphi *phi, const usage_info *info)
+{
+  /* If the sign of the result doesn't matter, strip sign operations
+     from all arguments.  */
+  if (info->flags.ignore_sign)
+    {
+      use_operand_p use;
+      ssa_op_iter oi;
+      FOR_EACH_PHI_ARG (use, phi, oi, SSA_OP_USE)
+	if (tree new_arg = strip_sign_op (USE_FROM_PTR (use)))
+	  {
+	    if (dump_file && (dump_flags & TDF_DETAILS))
+	      note_replacement (phi, USE_FROM_PTR (use), new_arg);
+	    replace_exp (use, new_arg);
+	  }
+    }
+}
+
+void
+backprop::execute ()
+{
+  /* Phase 1: Traverse the function, making optimistic assumptions
+     about any phi whose definition we haven't seen.  */
+  int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (m_fn));
+  unsigned int postorder_num = post_order_compute (postorder, false, false);
+  for (unsigned int i = 0; i < postorder_num; ++i)
+    {
+      process_block (BASIC_BLOCK_FOR_FN (m_fn, postorder[i]));
+      bitmap_set_bit (m_visited_blocks, postorder[i]);
+    }
+  XDELETEVEC (postorder);
+
+  /* Phase 2: Use the initial (perhaps overly optimistic) information
+     to create a maximal fixed point solution.  */
+  while (!m_worklist.is_empty ())
+    process_var (pop_from_worklist ());
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "\n");
+
+  /* Phase 3: Do a reverse post-order walk, using information about
+     the uses of SSA names to optimize their definitions.  */
+  for (unsigned int i = m_vars.length (); i-- > 0;)
+    {
+      usage_info *info = m_vars[i].second;
+      if (info->is_useful ())
+	{
+	  tree var = m_vars[i].first;
+	  gimple *stmt = SSA_NAME_DEF_STMT (var);
+	  if (gcall *call = dyn_cast <gcall *> (stmt))
+	    {
+	      if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
+		optimize_builtin_call (call, info);
+	    }
+	  else if (gassign *assign = dyn_cast <gassign *> (stmt))
+	    optimize_assign (assign, info);
+	  else if (gphi *phi = dyn_cast <gphi *> (stmt))
+	    optimize_phi (phi, info);
+	}
+    }
+
+  /* Phase 4: Do a post-order walk, deleting statements that are no
+     longer needed.  */
+  for (unsigned int i = 0; i < m_vars.length (); ++i)
+    {
+      tree var = m_vars[i].first;
+      if (has_zero_uses (var))
+	remove_unused_var (var);
+    }
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "\n");
+}
+
+const pass_data pass_data_backprop =
+{
+  GIMPLE_PASS, /* type */
+  "backprop", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_TREE_BACKPROP, /* tv_id */
+  ( PROP_cfg | PROP_ssa ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_backprop : public gimple_opt_pass
+{
+public:
+  pass_backprop (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_backprop, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  opt_pass * clone () { return new pass_backprop (m_ctxt); }
+  virtual bool gate (function *) { return flag_tree_backprop; }
+  virtual unsigned int execute (function *);
+
+}; // class pass_backprop
+
+unsigned int
+pass_backprop::execute (function *fn)
+{
+  backprop (fn).execute ();
+  return 0;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_backprop (gcc::context *ctxt)
+{
+  return new pass_backprop (ctxt);
+}

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Add a pass to back-propagate use information
  2015-10-15 13:17 Add a pass to back-propagate use information Richard Sandiford
@ 2015-10-19 11:40 ` Richard Biener
  2015-10-19 12:38   ` Richard Sandiford
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2015-10-19 11:40 UTC (permalink / raw)
  To: GCC Patches, richard.sandiford

On Thu, Oct 15, 2015 at 3:17 PM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> This patch adds a pass that collects information that is common to all
> uses of an SSA name X and back-propagates that information up the statements
> that generate X.  The general idea is to use the information to simplify
> instructions (rather than a pure DCE) so I've simply called it
> tree-ssa-backprop.c, to go with tree-ssa-forwprop.c.
>
> At the moment the only use of the pass is to remove unnecessry sign
> operations, so that it's effectively a global version of
> fold_strip_sign_ops.  I'm hoping it could be extended in future to
> record which bits of an integer are significant.  There are probably
> other potential uses too.
>
> A later patch gets rid of fold_strip_sign_ops.
>
> Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi.
> OK to install?
>
> Thanks,
> Richard
>
>
> gcc/
>         * doc/invoke.texi (-fdump-tree-backprop, -ftree-backprop): Document.
>         * Makefile.in (OBJS): Add tree-ssa-backprop.o.
>         * common.opt (ftree-backprop): New option.
>         * fold-const.h (negate_mathfn_p): Declare.
>         * fold-const.c (negate_mathfn_p): Make public.
>         * timevar.def (TV_TREE_BACKPROP): New.
>         * tree-passes.h (make_pass_backprop): Declare.
>         * passes.def (pass_backprop): Add.
>         * tree-ssa-backprop.c: New file.
>
> gcc/testsuite/
>         * gcc.dg/tree-ssa/backprop-1.c, gcc.dg/tree-ssa/backprop-2.c,
>         gcc.dg/tree-ssa/backprop-3.c, gcc.dg/tree-ssa/backprop-4.c,
>         gcc.dg/tree-ssa/backprop-5.c, gcc.dg/tree-ssa/backprop-6.c: New tests.
>
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index 783e4c9..69e669d 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1445,6 +1445,7 @@ OBJS = \
>         tree-switch-conversion.o \
>         tree-ssa-address.o \
>         tree-ssa-alias.o \
> +       tree-ssa-backprop.o \
>         tree-ssa-ccp.o \
>         tree-ssa-coalesce.o \
>         tree-ssa-copy.o \
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 5060208..5aef625 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2364,6 +2364,10 @@ ftree-pta
>  Common Report Var(flag_tree_pta) Optimization
>  Perform function-local points-to analysis on trees.
>
> +ftree-backprop
> +Common Report Var(flag_tree_backprop) Init(1) Optimization
> +Enable backward propagation of use properties at the tree level.

Don't add new -ftree-* "tree" doesn't add any info for our users.  I'd
also refer to SSA level rather than "tree" level.  Not sure if -fbackprop
is good, but let's go for it.

> +
>  ftree-reassoc
>  Common Report Var(flag_tree_reassoc) Init(1) Optimization
>  Enable reassociation on tree level
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 54e9f12..fe15d08 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -343,6 +343,7 @@ Objective-C and Objective-C++ Dialects}.
>  -fdump-tree-dse@r{[}-@var{n}@r{]} @gol
>  -fdump-tree-phiprop@r{[}-@var{n}@r{]} @gol
>  -fdump-tree-phiopt@r{[}-@var{n}@r{]} @gol
> +-fdump-tree-backprop@r{[}-@var{n}@r{]} @gol
>  -fdump-tree-forwprop@r{[}-@var{n}@r{]} @gol
>  -fdump-tree-nrv -fdump-tree-vect @gol
>  -fdump-tree-sink @gol
> @@ -451,8 +452,8 @@ Objective-C and Objective-C++ Dialects}.
>  -fstrict-overflow -fthread-jumps -ftracer -ftree-bit-ccp @gol
>  -ftree-builtin-call-dce -ftree-ccp -ftree-ch @gol
>  -ftree-coalesce-vars -ftree-copy-prop -ftree-dce -ftree-dominator-opts @gol
> --ftree-dse -ftree-forwprop -ftree-fre -ftree-loop-if-convert @gol
> --ftree-loop-if-convert-stores -ftree-loop-im @gol
> +-ftree-dse -ftree-backprop -ftree-forwprop -ftree-fre @gol
> +-ftree-loop-if-convert -ftree-loop-if-convert-stores -ftree-loop-im @gol
>  -ftree-phiprop -ftree-loop-distribution -ftree-loop-distribute-patterns @gol
>  -ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
>  -ftree-loop-vectorize @gol
> @@ -7236,6 +7237,12 @@ name is made by appending @file{.dse} to the source file name.
>  Dump each function after optimizing PHI nodes into straightline code.  The file
>  name is made by appending @file{.phiopt} to the source file name.
>
> +@item backprop
> +@opindex fdump-tree-backprop
> +Dump each function after back-propagating use information up the definition
> +chain.  The file name is made by appending @file{.backprop} to the
> +source file name.
> +
>  @item forwprop
>  @opindex fdump-tree-forwprop
>  Dump each function after forward propagating single use variables.  The file
> @@ -7716,6 +7723,7 @@ compilation time.
>  -ftree-dce @gol
>  -ftree-dominator-opts @gol
>  -ftree-dse @gol
> +-ftree-backprop @gol
>  -ftree-forwprop @gol
>  -ftree-fre @gol
>  -ftree-phiprop @gol
> @@ -8658,6 +8666,13 @@ enabled by default at @option{-O2} and @option{-O3}.
>  Make partial redundancy elimination (PRE) more aggressive.  This flag is
>  enabled by default at @option{-O3}.
>
> +@item -ftree-backprop
> +@opindex ftree-backprop
> +Propagate information about uses of a value up the definition chain
> +in order to simplify the definitions.  For example, this pass strips
> +sign operations if the sign of a value never matters.  The flag is
> +enabled by default at @option{-O} and higher.
> +
>  @item -ftree-forwprop
>  @opindex ftree-forwprop
>  Perform forward propagation on trees.  This flag is enabled by default
> diff --git a/gcc/fold-const.c b/gcc/fold-const.c
> index de45a2c..7f00e72 100644
> --- a/gcc/fold-const.c
> +++ b/gcc/fold-const.c
> @@ -110,7 +110,6 @@ enum comparison_code {
>    COMPCODE_TRUE = 15
>  };
>
> -static bool negate_mathfn_p (enum built_in_function);
>  static bool negate_expr_p (tree);
>  static tree negate_expr (tree);
>  static tree split_tree (tree, enum tree_code, tree *, tree *, tree *, int);
> @@ -319,7 +318,7 @@ fold_overflow_warning (const char* gmsgid, enum warn_strict_overflow_code wc)
>  /* Return true if the built-in mathematical function specified by CODE
>     is odd, i.e. -f(x) == f(-x).  */
>
> -static bool
> +bool
>  negate_mathfn_p (enum built_in_function code)
>  {
>    switch (code)

Belongs more to builtins.[ch] if exported.

> diff --git a/gcc/fold-const.h b/gcc/fold-const.h
> index ee74dc8..4d5b24b 100644
> --- a/gcc/fold-const.h
> +++ b/gcc/fold-const.h
> @@ -173,6 +173,7 @@ extern tree sign_bit_p (tree, const_tree);
>  extern tree exact_inverse (tree, tree);
>  extern tree const_unop (enum tree_code, tree, tree);
>  extern tree const_binop (enum tree_code, tree, tree, tree);
> +extern bool negate_mathfn_p (enum built_in_function);
>
>  /* Return OFF converted to a pointer offset type suitable as offset for
>     POINTER_PLUS_EXPR.  Use location LOC for this conversion.  */
> diff --git a/gcc/passes.def b/gcc/passes.def
> index dc3f44c..36d2b3b 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -159,6 +159,7 @@ along with GCC; see the file COPYING3.  If not see
>        /* After CCP we rewrite no longer addressed locals into SSA
>          form if possible.  */
>        NEXT_PASS (pass_complete_unrolli);
> +      NEXT_PASS (pass_backprop);
>        NEXT_PASS (pass_phiprop);
>        NEXT_PASS (pass_forwprop);

Any reason to not put this later?  I was thinking before reassoc.

>        NEXT_PASS (pass_object_sizes);
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-1.c
> new file mode 100644
> index 0000000..ff88ae8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-1.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-backprop-details" }  */
> +
> +/* Test a simple case of non-looping code in which both uses ignore
> +   the sign and both definitions are sign ops.  */
> +#define TEST_FUNCTION(TYPE, SUFFIX)                            \
> +  TYPE                                                         \
> +  test##SUFFIX (TYPE x, int sel1, int sel2)                    \
> +  {                                                            \
> +    TYPE input = sel1 ? -x : __builtin_fabs##SUFFIX (x);       \
> +    if (sel2)                                                  \
> +      return __builtin_cos##SUFFIX (input);                    \
> +    else                                                       \
> +      return __builtin_cosh##SUFFIX (input);                   \
> +  }
> +
> +TEST_FUNCTION (float, f)
> +TEST_FUNCTION (double, )
> +TEST_FUNCTION (long double, l)
> +
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -x} 3 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <x} 3 "backprop" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-2.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-2.c
> new file mode 100644
> index 0000000..aa1233a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-2.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-backprop-details" }  */
> +
> +/* Test a simple case of non-looping code in which both uses ignore
> +   the sign but only one definition is a sign op.  */
> +#define TEST_FUNCTION(TYPE, SUFFIX)                            \
> +  TYPE                                                         \
> +  test##SUFFIX (TYPE x, int sel1, int sel2)                    \
> +  {                                                            \
> +    TYPE input = sel1 ? -x : x + 1;                            \
> +    if (sel2)                                                  \
> +      return __builtin_cos##SUFFIX (input);                    \
> +    else                                                       \
> +      return __builtin_cosh##SUFFIX (input);                   \
> +  }
> +
> +TEST_FUNCTION (float, f)
> +TEST_FUNCTION (double, )
> +TEST_FUNCTION (long double, l)
> +
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -x} 3 "backprop" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-3.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-3.c
> new file mode 100644
> index 0000000..6381fff
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-3.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-backprop-details" }  */
> +
> +/* Test a simple case of non-looping code in which one use ignores
> +   the sign but another doesn't.  */
> +#define TEST_FUNCTION(TYPE, SUFFIX)                            \
> +  TYPE                                                         \
> +  test##SUFFIX (TYPE x, int sel1, int sel2)                    \
> +  {                                                            \
> +    TYPE input = sel1 ? -x : __builtin_fabs##SUFFIX (x);       \
> +    if (sel2)                                                  \
> +      return __builtin_cos##SUFFIX (input);                    \
> +    else                                                       \
> +      return __builtin_sin##SUFFIX (input);                    \
> +  }
> +
> +TEST_FUNCTION (float, f)
> +TEST_FUNCTION (double, )
> +TEST_FUNCTION (long double, l)
> +
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -x} 0 "backprop" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-4.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-4.c
> new file mode 100644
> index 0000000..fc96f3e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-4.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-backprop-details" }  */
> +
> +/* Test a simple reduction loop in which all inputs are sign ops and
> +   the consumer doesn't care about the sign.  */
> +#define TEST_FUNCTION(TYPE, SUFFIX)                    \
> +  TYPE                                                 \
> +  test##SUFFIX (TYPE x, TYPE y, TYPE *array, int n)    \
> +  {                                                    \
> +    x = __builtin_copysign##SUFFIX (x, y);             \
> +    for (int i = 0; i < n; ++i)                                \
> +      x *= -array[i];                                  \
> +    return __builtin_hypot##SUFFIX (x, y);             \
> +  }
> +
> +TEST_FUNCTION (float, f)
> +TEST_FUNCTION (double, )
> +TEST_FUNCTION (long double, l)
> +
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = __builtin_copysign} 3 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 3 "backprop" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-5.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-5.c
> new file mode 100644
> index 0000000..90e18cb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-5.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-backprop-details" }  */
> +
> +/* Test a loop that does both a multiplication and addition.  The addition
> +   should prevent any sign ops from being removed.  */
> +#define TEST_FUNCTION(TYPE, SUFFIX)                    \
> +  TYPE                                                 \
> +  test##SUFFIX (TYPE x, TYPE y, TYPE *array, int n)    \
> +  {                                                    \
> +    x = __builtin_copysign##SUFFIX (x, y);             \
> +    for (int i = 0; i < n; ++i)                                \
> +      x = (x + 1) * array[i];                          \
> +    return __builtin_hypot##SUFFIX (x, y);             \
> +  }
> +
> +TEST_FUNCTION (float, f)
> +TEST_FUNCTION (double, )
> +TEST_FUNCTION (long double, l)
> +
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = __builtin_copysign} 0 "backprop" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> new file mode 100644
> index 0000000..31f0571
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-backprop-details" }  */
> +
> +void start (void *);
> +void end (void *);
> +
> +/* Test that we optimize the contents of infinite loops.  */
> +#define TEST_FUNCTION(TYPE, SUFFIX)                    \
> +  void                                                 \
> +  test##SUFFIX (TYPE *array, TYPE y, int n)            \
> +  {                                                    \
> +    for (;;)                                           \
> +      {                                                        \
> +       start (array);                                  \
> +       TYPE x = -__builtin_fabs##SUFFIX (array[-1]);   \
> +       for (int i = 0; i < n; ++i)                     \
> +         x = -x / array[i];                            \
> +       array[-1] = x * x;                              \
> +       array[-2] = __builtin_fma##SUFFIX (x, x, y);    \
> +       array[-3] = __builtin_pow##SUFFIX (x, 20);      \
> +       end (array);                                    \
> +      }                                                        \
> +  }
> +
> +TEST_FUNCTION (float, f)
> +TEST_FUNCTION (double, )
> +TEST_FUNCTION (long double, l)
> +
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
> diff --git a/gcc/timevar.def b/gcc/timevar.def
> index ac41075..ef16226 100644
> --- a/gcc/timevar.def
> +++ b/gcc/timevar.def
> @@ -163,6 +163,7 @@ DEFTIMEVAR (TV_TREE_PRE                  , "tree PRE")
>  DEFTIMEVAR (TV_TREE_FRE                     , "tree FRE")
>  DEFTIMEVAR (TV_TREE_SINK             , "tree code sinking")
>  DEFTIMEVAR (TV_TREE_PHIOPT          , "tree linearize phis")
> +DEFTIMEVAR (TV_TREE_BACKPROP        , "tree backward propagate")
>  DEFTIMEVAR (TV_TREE_FORWPROP        , "tree forward propagate")
>  DEFTIMEVAR (TV_TREE_PHIPROP         , "tree phiprop")
>  DEFTIMEVAR (TV_TREE_DCE                     , "tree conservative DCE")
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index c37e4b2..333b5a7 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -497,6 +497,7 @@ extern gimple_opt_pass *make_pass_cleanup_cfg_post_optimizing (gcc::context
>                                                                *ctxt);
>  extern gimple_opt_pass *make_pass_init_datastructures (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_fixup_cfg (gcc::context *ctxt);
> +extern gimple_opt_pass *make_pass_backprop (gcc::context *ctxt);
>
>  extern rtl_opt_pass *make_pass_expand (gcc::context *ctxt);
>  extern rtl_opt_pass *make_pass_instantiate_virtual_regs (gcc::context *ctxt);
> diff --git a/gcc/tree-ssa-backprop.c b/gcc/tree-ssa-backprop.c
> new file mode 100644
> index 0000000..bf2dcd9
> --- /dev/null
> +++ b/gcc/tree-ssa-backprop.c

gimple-ssa-backprop.c

> @@ -0,0 +1,900 @@
> +/* Back-propagation of usage information to definitions.
> +   Copyright (C) 2015 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify
> +it under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful,
> +but WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +GNU General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +/* This pass propagates information that is common to all uses of an SSA
> +   name back up through the sequence of statements that generate it,
> +   simplifying the statements where possible.  Sometimes this can expose
> +   fully or partially dead code, but the main focus is simplifying
> +   computations.
> +
> +   At the moment the pass only handles one piece of information: whether the
> +   sign of a value matters, and therefore whether sign-changing operations
> +   can be skipped.  The pass could be extended to more interesting
> +   information in future, such as which bits of an integer are significant.
> +
> +   For example, take the function:
> +
> +     double
> +     f (double *a, int n, double start)
> +     {
> +       double x = fabs (start);
> +       for (int i = 0; i < n; ++i)
> +        x *= a[i];
> +       return __builtin_cos (x);
> +     }
> +
> +   cos(x) == cos(-x), so the sign of the final x doesn't matter.
> +   That x is the result of a series of multiplications, and if
> +   the sign of the result of a multiplication doesn't matter,
> +   the signs of the inputs don't matter either.
> +
> +   The pass would replace the incoming value of x (i.e. fabs(start))
> +   with start.  Since there are no other uses of the fabs result,
> +   the call would get deleted as dead.
> +
> +   The algorithm is:
> +
> +   (1) Do a post-order traversal of the blocks in the function, walking
> +       each block backwards.  For each potentially-simplifiable statement
> +       that defines an SSA name X, examine all uses of X to see what
> +       information is actually significant.  Record this as INFO_MAP[X].
> +       Optimistically ignore for now any back-edge references to
> +       unprocessed phis.
> +
> +   (2) Iteratively reduce the optimistic result of (1) until we reach
> +       a maximal fixed point (which at the moment would mean revisiting
> +       statements at most once).  First push all SSA names that used an
> +       optimistic assumption about a backedge phi onto a worklist.
> +       While the worklist is nonempty, pick off an SSA name X and recompute
> +       INFO_MAP[X].  If the value changes, push all SSA names used in the
> +       definition of X onto the worklist.
> +
> +   (3) Iterate over each SSA name X with info in INFO_MAP, in the
> +       opposite order to (1), i.e. a forward reverse-post-order walk.
> +       Try to optimize the definition of X using INFO_MAP[X] and fold
> +       the result.  (This ensures that we fold definitions before uses.)
> +
> +   (4) Iterate over each SSA name X with info in INFO_MAP, in the same
> +       order as (1), and delete any statements that are now dead.
> +       (This ensures that if a sequence of statements is dead,
> +       we delete the last statement first.)
> +
> +   Note that this pass does not deal with direct redundancies,
> +   such as cos(-x)->cos(x).  match.pd handles those cases instead.  */
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "tree.h"
> +#include "gimple.h"
> +#include "gimple-iterator.h"
> +#include "ssa.h"
> +#include "fold-const.h"
> +#include "tree-pass.h"
> +#include "cfganal.h"
> +#include "gimple-pretty-print.h"
> +#include "tree-cfg.h"
> +#include "tree-ssa-propagate.h"
> +#include "gimple-fold.h"
> +#include "alloc-pool.h"
> +#include "tree-hash-traits.h"
> +
> +namespace {
> +
> +/* Information about a group of uses of an SSA name.  */
> +struct usage_info
> +{
> +  usage_info () : flag_word (0) {}
> +  usage_info &operator &= (const usage_info &);
> +  usage_info operator & (const usage_info &) const;
> +  bool operator == (const usage_info &) const;
> +  bool operator != (const usage_info &) const;
> +  bool is_useful () const;
> +
> +  static usage_info intersection_identity ();
> +
> +  union
> +  {
> +    struct
> +    {
> +      /* True if the uses treat x and -x in the same way.  */
> +      unsigned int ignore_sign : 1;
> +    } flags;
> +    /* All the flag bits as a single int.  */
> +    unsigned int flag_word;
> +  };
> +};
> +
> +/* Return an X such that X & Y == Y for all Y.  This is the most
> +   optimistic assumption possible.  */
> +
> +usage_info
> +usage_info::intersection_identity ()
> +{
> +  usage_info ret;
> +  ret.flag_word = -1;
> +  return ret;
> +}
> +
> +/* Intersect *THIS with OTHER, so that *THIS describes all uses covered
> +   by the original *THIS and OTHER.  */
> +
> +usage_info &
> +usage_info::operator &= (const usage_info &other)
> +{
> +  flag_word &= other.flag_word;
> +  return *this;
> +}
> +
> +/* Return the intersection of *THIS and OTHER, i.e. a structure that
> +   describes all uses covered by *THIS and OTHER.  */
> +
> +usage_info
> +usage_info::operator & (const usage_info &other) const
> +{
> +  usage_info info (*this);
> +  info &= other;
> +  return info;
> +}
> +
> +bool
> +usage_info::operator == (const usage_info &other) const
> +{
> +  return flag_word == other.flag_word;
> +}
> +
> +bool
> +usage_info::operator != (const usage_info &other) const
> +{
> +  return !operator == (other);
> +}
> +
> +/* Return true if *THIS is not simply the default, safe assumption.  */
> +
> +bool
> +usage_info::is_useful () const
> +{
> +  return flag_word != 0;
> +}
> +
> +/* Start a dump line about SSA name VAR.  */
> +
> +static void
> +dump_usage_prefix (FILE *file, tree var)
> +{
> +  fprintf (file, "  ");
> +  print_generic_expr (file, var, 0);
> +  fprintf (file, ": ");
> +}
> +
> +/* Print INFO to FILE.  */
> +
> +static void
> +dump_usage_info (FILE *file, tree var, usage_info *info)
> +{
> +  if (info->flags.ignore_sign)
> +    {
> +      dump_usage_prefix (file, var);
> +      fprintf (file, "sign bit not important\n");
> +    }
> +}
> +
> +/* Represents one execution of the pass.  */
> +class backprop
> +{
> +public:
> +  backprop (function *);
> +  ~backprop ();
> +
> +  void execute ();
> +
> +private:
> +  const usage_info *lookup_operand (tree);
> +
> +  void push_to_worklist (tree);
> +  tree pop_from_worklist ();
> +
> +  void process_builtin_call_use (gcall *, tree, usage_info *);
> +  void process_assign_use (gassign *, tree, usage_info *);
> +  void process_phi_use (gphi *, usage_info *);
> +  void process_use (gimple *, tree, usage_info *);
> +  bool intersect_uses (tree, usage_info *);
> +  void reprocess_inputs (gimple *);
> +  void process_var (tree);
> +  void process_block (basic_block);
> +
> +  void complete_change (gimple *);
> +  void optimize_builtin_call (gcall *, const usage_info *);
> +  void optimize_assign (gassign *, const usage_info *);
> +  void optimize_phi (gphi *, const usage_info *);
> +
> +  typedef hash_map <tree_ssa_name_hash, usage_info *> info_map_type;
> +  typedef std::pair <tree, usage_info *> var_info_pair;
> +
> +  /* The function we're optimizing.  */
> +  function *m_fn;
> +
> +  /* Pool for allocating usage_info structures.  */
> +  object_allocator <usage_info> m_info_pool;
> +
> +  /* Maps an SSA name to a description of all uses of that SSA name.
> +     All the usage_infos satisfy is_useful.  */
> +  info_map_type m_info_map;
> +
> +  /* Post-ordered list of all potentially-interesting SSA names,
> +     along with information that describes all uses.  */
> +  auto_vec <var_info_pair, 128> m_vars;
> +
> +  /* A bitmap of blocks that we have finished processing in the initial
> +     post-order walk.  */
> +  sbitmap m_visited_blocks;
> +
> +  /* A worklist of SSA names whose definitions need to be reconsidered.  */
> +  auto_vec <tree, 64> m_worklist;
> +
> +  /* The SSA names in M_WORKLIST, identified by their SSA_NAME_VERSION.  */
> +  bitmap m_worklist_names;
> +};
> +
> +backprop::backprop (function *fn)
> +  : m_fn (fn),
> +    m_info_pool ("usage_info"),
> +    m_visited_blocks (sbitmap_alloc (last_basic_block_for_fn (m_fn))),
> +    m_worklist_names (BITMAP_ALLOC (NULL))
> +{
> +  bitmap_clear (m_visited_blocks);
> +}
> +
> +backprop::~backprop ()
> +{
> +  BITMAP_FREE (m_worklist_names);
> +  sbitmap_free (m_visited_blocks);
> +  m_info_pool.release ();
> +}
> +
> +/* Return usage information for general operand OP, or null if none.  */
> +
> +const usage_info *
> +backprop::lookup_operand (tree op)
> +{
> +  if (op && TREE_CODE (op) == SSA_NAME)
> +    if (usage_info **slot = m_info_map.get (op))
> +      return *slot;

Is this usually fully populated at the end?  If so a direct mapping using
SSA_NAME_VERSION would be cheaper.

> +  return NULL;
> +}
> +
> +/* Add SSA name VAR to the worklist, if it isn't on the worklist already.  */
> +
> +void
> +backprop::push_to_worklist (tree var)
> +{
> +  if (!bitmap_set_bit (m_worklist_names, SSA_NAME_VERSION (var)))
> +    return;
> +  m_worklist.safe_push (var);
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "[WORKLIST] Pushing ");
> +      print_generic_expr (dump_file, var, 0);
> +      fprintf (dump_file, "\n");
> +    }
> +}
> +
> +/* Remove and return the next SSA name from the worklist.  The worklist
> +   is known to be nonempty.  */
> +
> +tree
> +backprop::pop_from_worklist ()
> +{
> +  tree var = m_worklist.pop ();
> +  bitmap_clear_bit (m_worklist_names, SSA_NAME_VERSION (var));
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "[WORKLIST] Popping ");
> +      print_generic_expr (dump_file, var, 0);
> +      fprintf (dump_file, "\n");
> +    }
> +  return var;
> +}
> +
> +/* Make INFO describe all uses of RHS in CALL, which is a call to a
> +   built-in function.  */
> +
> +void
> +backprop::process_builtin_call_use (gcall *call, tree rhs, usage_info *info)
> +{
> +  enum built_in_function fn = DECL_FUNCTION_CODE (gimple_call_fndecl (call));
> +  tree lhs = gimple_call_lhs (call);
> +  switch (fn)
> +    {
> +    CASE_FLT_FN (BUILT_IN_COS):
> +    CASE_FLT_FN (BUILT_IN_COSH):
> +    CASE_FLT_FN (BUILT_IN_CCOS):
> +    CASE_FLT_FN (BUILT_IN_CCOSH):
> +    CASE_FLT_FN (BUILT_IN_HYPOT):
> +      /* The signs of all inputs are ignored.  */
> +      info->flags.ignore_sign = true;
> +      break;
> +
> +    CASE_FLT_FN (BUILT_IN_COPYSIGN):
> +      /* The sign of the first input is ignored.  */
> +      if (rhs != gimple_call_arg (call, 1))
> +       info->flags.ignore_sign = true;
> +      break;
> +
> +    CASE_FLT_FN (BUILT_IN_POW):
> +      {
> +       /* The sign of the first input is ignored as long as the second
> +          input is an even real.  */
> +       tree power = gimple_call_arg (call, 1);
> +       HOST_WIDE_INT n;
> +       if (TREE_CODE (power) == REAL_CST
> +           && real_isinteger (&TREE_REAL_CST (power), &n)
> +           && (n & 1) == 0)
> +         info->flags.ignore_sign = true;
> +       break;
> +      }
> +
> +    CASE_FLT_FN (BUILT_IN_FMA):
> +      /* In X * X + Y, where Y is distinct from X, the sign of X doesn't
> +        matter.  */
> +      if (gimple_call_arg (call, 0) == rhs
> +         && gimple_call_arg (call, 1) == rhs
> +         && gimple_call_arg (call, 2) != rhs)
> +       info->flags.ignore_sign = true;
> +      break;
> +
> +    default:
> +      if (negate_mathfn_p (fn))
> +       /* The sign of the (single) input doesn't matter provided
> +          that the sign of the output doesn't matter.  */
> +       if (const usage_info *lhs_info = lookup_operand (lhs))
> +         info->flags.ignore_sign = lhs_info->flags.ignore_sign;
> +      break;
> +    }
> +}
> +
> +/* Make INFO describe all uses of RHS in ASSIGN.  */
> +
> +void
> +backprop::process_assign_use (gassign *assign, tree rhs, usage_info *info)
> +{
> +  tree lhs = gimple_assign_lhs (assign);
> +  switch (gimple_assign_rhs_code (assign))
> +    {
> +    case ABS_EXPR:
> +      /* The sign of the input doesn't matter.  */
> +      info->flags.ignore_sign = true;
> +      break;
> +
> +    case COND_EXPR:
> +      /* For A = B ? C : D, propagate information about all uses of A
> +        to B and C.  */
> +      if (rhs != gimple_assign_rhs1 (assign))
> +       if (const usage_info *lhs_info = lookup_operand (lhs))

Use && instead of nested if

> +         *info = *lhs_info;
> +      break;
> +
> +    case FMA_EXPR:
> +      /* In X * X + Y, where Y is distinct from X, the sign of X doesn't
> +        matter.  */
> +      if (gimple_assign_rhs1 (assign) == rhs
> +         && gimple_assign_rhs2 (assign) == rhs
> +         && gimple_assign_rhs3 (assign) != rhs)
> +       info->flags.ignore_sign = true;
> +      break;
> +
> +    case MULT_EXPR:
> +      /* In X * X, the sign of X doesn't matter.  */
> +      if (gimple_assign_rhs1 (assign) == rhs
> +         && gimple_assign_rhs2 (assign) == rhs)
> +       info->flags.ignore_sign = true;
> +      /* Fall through.  */
> +
> +    case NEGATE_EXPR:
> +    case RDIV_EXPR:
> +      /* If the sign of the result doesn't matter, the sign of the inputs
> +        doesn't matter either.  */
> +      if (FLOAT_TYPE_P (TREE_TYPE (rhs)))
> +       if (const usage_info *lhs_info = lookup_operand (lhs))
> +         info->flags.ignore_sign = lhs_info->flags.ignore_sign;
> +      break;
> +
> +    default:
> +      break;
> +    }
> +}
> +
> +/* Make INFO describe the uses of PHI's result.  */
> +
> +void
> +backprop::process_phi_use (gphi *phi, usage_info *info)
> +{
> +  tree result = gimple_phi_result (phi);
> +  if (const usage_info *result_info = lookup_operand (result))
> +    *info = *result_info;
> +}
> +
> +/* Make INFO describe all uses of RHS in STMT.  */
> +
> +void
> +backprop::process_use (gimple *stmt, tree rhs, usage_info *info)
> +{
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "[USE] ");
> +      print_generic_expr (dump_file, rhs, 0);
> +      fprintf (dump_file, " in ");
> +      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> +    }
> +
> +  if (gcall *call = dyn_cast <gcall *> (stmt))
> +    {
> +      if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
> +       process_builtin_call_use (call, rhs, info);
> +    }
> +  else if (gassign *assign = dyn_cast <gassign *> (stmt))
> +    process_assign_use (assign, rhs, info);
> +  else if (gphi *phi = dyn_cast <gphi *> (stmt))
> +    process_phi_use (phi, info);
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    dump_usage_info (dump_file, rhs, info);
> +}
> +
> +/* Make INFO describe all uses of VAR, returning true if the result
> +   is useful.  If the uses include phis that haven't been processed yet,
> +   make the most optimistic assumption possible, so that we aim for
> +   a maximum rather than a minimum fixed point.  */
> +
> +bool
> +backprop::intersect_uses (tree var, usage_info *info)
> +{
> +  imm_use_iterator iter;
> +  gimple *stmt;
> +  *info = usage_info::intersection_identity ();
> +  FOR_EACH_IMM_USE_STMT (stmt, iter, var)
> +    {
> +      if (is_a <gphi *> (stmt)
> +         && !bitmap_bit_p (m_visited_blocks, gimple_bb (stmt)->index))
> +       {
> +         /* Skip unprocessed phis.  */
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           {
> +             fprintf (dump_file, "[BACKEDGE] ");
> +             print_generic_expr (dump_file, var, 0);
> +             fprintf (dump_file, " in ");
> +             print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> +           }
> +       }
> +      else
> +       {
> +         usage_info subinfo;
> +         process_use (stmt, var, &subinfo);
> +         *info &= subinfo;
> +         if (!info->is_useful ())
> +           {
> +             BREAK_FROM_IMM_USE_STMT (iter);
> +             return false;
> +           }
> +       }
> +    }
> +  return true;
> +}
> +
> +/* Queue for reconsideration any input of STMT that has information
> +   associated with it.  This is used if that information might be
> +   too optimistic.  */
> +
> +void
> +backprop::reprocess_inputs (gimple *stmt)
> +{
> +  use_operand_p use_p;
> +  ssa_op_iter oi;
> +  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
> +    {
> +      tree var = get_use_from_ptr (use_p);
> +      if (lookup_operand (var))
> +       push_to_worklist (var);
> +    }
> +}
> +
> +/* Say that we're recording INFO for SSA name VAR, or that we're deleting
> +   existing information if INFO is null.  INTRO describes the change.  */
> +
> +static void
> +dump_var_info (tree var, usage_info *info, const char *intro)
> +{
> +  fprintf (dump_file, "[DEF] %s for ", intro);
> +  print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (var), 0, TDF_SLIM);
> +  if (info)
> +    dump_usage_info (dump_file, var, info);
> +}
> +
> +/* Process all uses of VAR and record or update the result in
> +   M_INFO_MAP and M_VARS.  */
> +
> +void
> +backprop::process_var (tree var)
> +{
> +  if (has_zero_uses (var))
> +    return;
> +
> +  usage_info info;
> +  intersect_uses (var, &info);

It feels somewhat backward that we do a backward walk over all stmts
(thus processing
all use-stmts before defs) but then "forward" process uses of defs.
Wouldn't it be
easier(?) to "intersect" incrementally by processing only uses for all
stmts we walk?
Or is this about avoiding to allocate a 'info' for DEFs that end up
with no interesting
info?

> +
> +  gimple *stmt = SSA_NAME_DEF_STMT (var);
> +  if (info.is_useful ())
> +    {
> +      bool existed;
> +      usage_info *&map_info = m_info_map.get_or_insert (var, &existed);
> +      if (!existed)
> +       {
> +         /* Recording information about VAR for the first time.  */
> +         map_info = m_info_pool.allocate ();
> +         *map_info = info;
> +         m_vars.safe_push (var_info_pair (var, map_info));
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           dump_var_info (var, map_info, "Recording new information");
> +
> +         /* If STMT is a phi, reprocess any backedge uses.  This is a
> +            no-op for other uses, which won't have any information
> +            associated with them.  */
> +         if (is_a <gphi *> (stmt))
> +           reprocess_inputs (stmt);
> +       }
> +      else if (info != *map_info)
> +       {
> +         /* Recording information that is less optimistic than before.  */
> +         gcc_checking_assert ((info & *map_info) == info);
> +         *map_info = info;
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           dump_var_info (var, map_info, "Updating information");
> +         reprocess_inputs (stmt);
> +       }
> +    }
> +  else
> +    {
> +      if (usage_info **slot = m_info_map.get (var))
> +       {
> +         /* Removing previously-recorded information.  */
> +         **slot = info;
> +         m_info_map.remove (var);
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           dump_var_info (var, NULL, "Deleting information");
> +         reprocess_inputs (stmt);
> +       }
> +      else
> +       {
> +         /* If STMT is a phi, remove any information recorded for
> +            its arguments.  */
> +         if (is_a <gphi *> (stmt))
> +           reprocess_inputs (stmt);
> +       }
> +    }
> +}
> +
> +/* Process all statements and phis in BB, during the first post-order walk.  */
> +
> +void
> +backprop::process_block (basic_block bb)
> +{
> +  for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
> +       gsi_prev (&gsi))
> +    {
> +      tree lhs = gimple_get_lhs (gsi_stmt (gsi));
> +      if (lhs && TREE_CODE (lhs) == SSA_NAME)
> +       process_var (lhs);

I think it would be cleaner to walk over al SSA defs (that includes asms which
you seem to miss, no idea if that's important)

> +    }
> +  for (gphi_iterator gpi = gsi_start_phis (bb); !gsi_end_p (gpi);
> +       gsi_next (&gpi))
> +    process_var (gimple_phi_result (gpi.phi ()));
> +}
> +
> +/* Delete the definition of VAR, which has no uses.  */
> +
> +static void
> +remove_unused_var (tree var)
> +{
> +  gimple *stmt = SSA_NAME_DEF_STMT (var);
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "Deleting ");
> +      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> +    }
> +  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
> +  gsi_remove (&gsi, true);
> +  release_ssa_name (var);

What if var wasn't the only def?  Please use release_defs ()
so in case this is a call with a virtual definition it is released.

> +}
> +
> +/* Note that we're replacing OLD_RHS with NEW_RHS in STMT.  */
> +
> +static void
> +note_replacement (gimple *stmt, tree old_rhs, tree new_rhs)
> +{
> +  fprintf (dump_file, "Replacing use of ");
> +  print_generic_expr (dump_file, old_rhs, 0);
> +  fprintf (dump_file, " with ");
> +  print_generic_expr (dump_file, new_rhs, 0);
> +  fprintf (dump_file, " in ");
> +  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> +}
> +
> +/* If RHS is an SSA name whose definition just changes the sign of a value,
> +   return that other value, otherwise return null.  */
> +
> +static tree
> +strip_sign_op_1 (tree rhs)
> +{
> +  if (TREE_CODE (rhs) != SSA_NAME)
> +    return NULL_TREE;
> +
> +  gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
> +  if (gassign *assign = dyn_cast <gassign *> (def_stmt))
> +    switch (gimple_assign_rhs_code (assign))
> +      {
> +      case ABS_EXPR:
> +      case NEGATE_EXPR:
> +       return gimple_assign_rhs1 (assign);
> +
> +      default:
> +       break;
> +      }
> +  else if (gcall *call = dyn_cast <gcall *> (def_stmt))
> +    {
> +      if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
> +       switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
> +         {
> +         CASE_FLT_FN (BUILT_IN_COPYSIGN):
> +           return gimple_call_arg (call, 0);
> +
> +         default:
> +           break;
> +         }
> +    }
> +
> +  return NULL_TREE;
> +}
> +
> +/* If RHS is an SSA name whose definition just changes the sign of a value,
> +   strip all such operations and return the ultimate input to them.
> +   Return null otherwise.
> +
> +   Although this could in principle lead to quadratic searching,
> +   in practice a long sequence of sign manipulations should already
> +   have been folded down.  E.g. --x -> x, abs(-x) -> abs(x).  We search
> +   for more than one operation in order to catch cases like -abs(x).  */
> +
> +static tree
> +strip_sign_op (tree rhs)
> +{
> +  tree new_rhs = strip_sign_op_1 (rhs);
> +  if (!new_rhs)
> +    return NULL_TREE;
> +  while (tree next = strip_sign_op_1 (new_rhs))
> +    new_rhs = next;
> +  return new_rhs;
> +}
> +
> +/* Strip all sign operations from the rvalue at *RHS_PTR in STMT.
> +   Return true if something changed.  The caller is responsible
> +   for the necessary bookkeeping.  */
> +
> +static bool
> +strip_sign_op (gimple *stmt, tree *rhs_ptr)
> +{
> +  if (tree new_rhs = strip_sign_op (*rhs_ptr))
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       note_replacement (stmt, *rhs_ptr, new_rhs);
> +      *rhs_ptr = new_rhs;

So it looks you are only changing stmts when the stmt result produces
the same value.  Just double-checking, as otherwise you'd need to care
about debug stmts ...

> +      return true;
> +    }
> +  return false;
> +}
> +
> +/* STMT has been changed.  Give the fold machinery a chance to simplify
> +   and canonicalize it (e.g. by ensuring that commutative operands have
> +   the right order), then record the updates.  */
> +
> +void
> +backprop::complete_change (gimple *stmt)
> +{
> +  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
> +  if (fold_stmt (&gsi))
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       {
> +         fprintf (dump_file, "  which folds to: ");
> +         print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_SLIM);
> +       }

I suppose it can't (yet) happen that the result no longer throws.

> +    }
> +  update_stmt (gsi_stmt (gsi));
> +}
> +
> +/* Optimize CALL, a call to a built-in function, on the basis that INFO
> +   describes all uses of the LHS.  */
> +
> +void
> +backprop::optimize_builtin_call (gcall *call, const usage_info *info)
> +{
> +  bool changed_p = false;
> +  enum built_in_function fn = DECL_FUNCTION_CODE (gimple_call_fndecl (call));
> +  /* If we have an f such that -f(x) = f(-x), and if the sign of the result
> +     doesn't matter, strip any sign operations from the input.  */
> +  if (info->flags.ignore_sign
> +      && negate_mathfn_p (fn)
> +      && strip_sign_op (call, gimple_call_arg_ptr (call, 0)))
> +    changed_p = true;
> +  if (changed_p)
> +    complete_change (call);
> +}
> +
> +/* Optimize ASSIGN on the basis that INFO describes all uses of the lhs.  */
> +
> +void
> +backprop::optimize_assign (gassign *assign, const usage_info *info)
> +{
> +  bool changed_p = false;
> +  switch (gimple_assign_rhs_code (assign))
> +    {
> +    case MULT_EXPR:
> +    case RDIV_EXPR:
> +      /* If the sign of the result doesn't matter, strip sign operations
> +        from both inputs.  */
> +      if (info->flags.ignore_sign
> +         && (strip_sign_op (assign, gimple_assign_rhs1_ptr (assign))
> +             || strip_sign_op (assign, gimple_assign_rhs2_ptr (assign))))
> +       changed_p = true;
> +      break;
> +
> +    case COND_EXPR:
> +      /* If the sign of A ? B : C doesn't matter, strip sign operations
> +        from both B and C.  */
> +      if (info->flags.ignore_sign
> +         && (strip_sign_op (assign, gimple_assign_rhs2_ptr (assign))
> +             || strip_sign_op (assign, gimple_assign_rhs3_ptr (assign))))
> +       changed_p = true;
> +      break;
> +
> +    default:
> +      break;
> +    }
> +  if (changed_p)
> +    complete_change (assign);
> +}
> +
> +/* Optimize PHI on the basis that INFO describes all uses of the result.  */
> +
> +void
> +backprop::optimize_phi (gphi *phi, const usage_info *info)
> +{
> +  /* If the sign of the result doesn't matter, strip sign operations
> +     from all arguments.  */
> +  if (info->flags.ignore_sign)
> +    {
> +      use_operand_p use;
> +      ssa_op_iter oi;
> +      FOR_EACH_PHI_ARG (use, phi, oi, SSA_OP_USE)
> +       if (tree new_arg = strip_sign_op (USE_FROM_PTR (use)))
> +         {
> +           if (dump_file && (dump_flags & TDF_DETAILS))
> +             note_replacement (phi, USE_FROM_PTR (use), new_arg);
> +           replace_exp (use, new_arg);

So this seems to be a case where the value _does_ change?  Why bother
to substitute in PHIs btw.?

Otherwise looks ok to me.

Thanks,
Richard.

> +         }
> +    }
> +}
> +
> +void
> +backprop::execute ()
> +{
> +  /* Phase 1: Traverse the function, making optimistic assumptions
> +     about any phi whose definition we haven't seen.  */
> +  int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (m_fn));
> +  unsigned int postorder_num = post_order_compute (postorder, false, false);
> +  for (unsigned int i = 0; i < postorder_num; ++i)
> +    {
> +      process_block (BASIC_BLOCK_FOR_FN (m_fn, postorder[i]));
> +      bitmap_set_bit (m_visited_blocks, postorder[i]);
> +    }
> +  XDELETEVEC (postorder);
> +
> +  /* Phase 2: Use the initial (perhaps overly optimistic) information
> +     to create a maximal fixed point solution.  */
> +  while (!m_worklist.is_empty ())
> +    process_var (pop_from_worklist ());
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    fprintf (dump_file, "\n");
> +
> +  /* Phase 3: Do a reverse post-order walk, using information about
> +     the uses of SSA names to optimize their definitions.  */
> +  for (unsigned int i = m_vars.length (); i-- > 0;)
> +    {
> +      usage_info *info = m_vars[i].second;
> +      if (info->is_useful ())
> +       {
> +         tree var = m_vars[i].first;
> +         gimple *stmt = SSA_NAME_DEF_STMT (var);
> +         if (gcall *call = dyn_cast <gcall *> (stmt))
> +           {
> +             if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
> +               optimize_builtin_call (call, info);
> +           }
> +         else if (gassign *assign = dyn_cast <gassign *> (stmt))
> +           optimize_assign (assign, info);
> +         else if (gphi *phi = dyn_cast <gphi *> (stmt))
> +           optimize_phi (phi, info);
> +       }
> +    }
> +
> +  /* Phase 4: Do a post-order walk, deleting statements that are no
> +     longer needed.  */
> +  for (unsigned int i = 0; i < m_vars.length (); ++i)
> +    {
> +      tree var = m_vars[i].first;
> +      if (has_zero_uses (var))
> +       remove_unused_var (var);
> +    }
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    fprintf (dump_file, "\n");
> +}
> +
> +const pass_data pass_data_backprop =
> +{
> +  GIMPLE_PASS, /* type */
> +  "backprop", /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_TREE_BACKPROP, /* tv_id */
> +  ( PROP_cfg | PROP_ssa ), /* properties_required */
> +  0, /* properties_provided */
> +  0, /* properties_destroyed */
> +  0, /* todo_flags_start */
> +  0, /* todo_flags_finish */
> +};
> +
> +class pass_backprop : public gimple_opt_pass
> +{
> +public:
> +  pass_backprop (gcc::context *ctxt)
> +    : gimple_opt_pass (pass_data_backprop, ctxt)
> +  {}
> +
> +  /* opt_pass methods: */
> +  opt_pass * clone () { return new pass_backprop (m_ctxt); }
> +  virtual bool gate (function *) { return flag_tree_backprop; }
> +  virtual unsigned int execute (function *);
> +
> +}; // class pass_backprop
> +
> +unsigned int
> +pass_backprop::execute (function *fn)
> +{
> +  backprop (fn).execute ();
> +  return 0;
> +}
> +
> +} // anon namespace
> +
> +gimple_opt_pass *
> +make_pass_backprop (gcc::context *ctxt)
> +{
> +  return new pass_backprop (ctxt);
> +}
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Add a pass to back-propagate use information
  2015-10-19 11:40 ` Richard Biener
@ 2015-10-19 12:38   ` Richard Sandiford
  2015-10-19 14:35     ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2015-10-19 12:38 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

Richard Biener <richard.guenther@gmail.com> writes:
> On Thu, Oct 15, 2015 at 3:17 PM, Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>> This patch adds a pass that collects information that is common to all
>> uses of an SSA name X and back-propagates that information up the statements
>> that generate X.  The general idea is to use the information to simplify
>> instructions (rather than a pure DCE) so I've simply called it
>> tree-ssa-backprop.c, to go with tree-ssa-forwprop.c.
>>
>> At the moment the only use of the pass is to remove unnecessry sign
>> operations, so that it's effectively a global version of
>> fold_strip_sign_ops.  I'm hoping it could be extended in future to
>> record which bits of an integer are significant.  There are probably
>> other potential uses too.
>>
>> A later patch gets rid of fold_strip_sign_ops.
>>
>> Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi.
>> OK to install?
>>
>> Thanks,
>> Richard
>>
>>
>> gcc/
>>         * doc/invoke.texi (-fdump-tree-backprop, -ftree-backprop): Document.
>>         * Makefile.in (OBJS): Add tree-ssa-backprop.o.
>>         * common.opt (ftree-backprop): New option.
>>         * fold-const.h (negate_mathfn_p): Declare.
>>         * fold-const.c (negate_mathfn_p): Make public.
>>         * timevar.def (TV_TREE_BACKPROP): New.
>>         * tree-passes.h (make_pass_backprop): Declare.
>>         * passes.def (pass_backprop): Add.
>>         * tree-ssa-backprop.c: New file.
>>
>> gcc/testsuite/
>>         * gcc.dg/tree-ssa/backprop-1.c, gcc.dg/tree-ssa/backprop-2.c,
>>         gcc.dg/tree-ssa/backprop-3.c, gcc.dg/tree-ssa/backprop-4.c,
>>         gcc.dg/tree-ssa/backprop-5.c, gcc.dg/tree-ssa/backprop-6.c: New tests.
>>
>> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
>> index 783e4c9..69e669d 100644
>> --- a/gcc/Makefile.in
>> +++ b/gcc/Makefile.in
>> @@ -1445,6 +1445,7 @@ OBJS = \
>>         tree-switch-conversion.o \
>>         tree-ssa-address.o \
>>         tree-ssa-alias.o \
>> +       tree-ssa-backprop.o \
>>         tree-ssa-ccp.o \
>>         tree-ssa-coalesce.o \
>>         tree-ssa-copy.o \
>> diff --git a/gcc/common.opt b/gcc/common.opt
>> index 5060208..5aef625 100644
>> --- a/gcc/common.opt
>> +++ b/gcc/common.opt
>> @@ -2364,6 +2364,10 @@ ftree-pta
>>  Common Report Var(flag_tree_pta) Optimization
>>  Perform function-local points-to analysis on trees.
>>
>> +ftree-backprop
>> +Common Report Var(flag_tree_backprop) Init(1) Optimization
>> +Enable backward propagation of use properties at the tree level.
>
> Don't add new -ftree-* "tree" doesn't add any info for our users.  I'd
> also refer to SSA level rather than "tree" level.  Not sure if -fbackprop
> is good, but let's go for it.

OK.

>> diff --git a/gcc/fold-const.c b/gcc/fold-const.c
>> index de45a2c..7f00e72 100644
>> --- a/gcc/fold-const.c
>> +++ b/gcc/fold-const.c
>> @@ -319,7 +318,7 @@ fold_overflow_warning (const char* gmsgid, enum
> warn_strict_overflow_code wc)
>>  /* Return true if the built-in mathematical function specified by CODE
>>     is odd, i.e. -f(x) == f(-x).  */
>>
>> -static bool
>> +bool
>>  negate_mathfn_p (enum built_in_function code)
>>  {
>>    switch (code)
>
> Belongs more to builtins.[ch] if exported.

The long-term plan is to abstract away whether it's a built-in function
or an internal function, in which case I hope to have a single predicate
that handles both.  I'm not sure where the code should go after that change.
Maybe a new file?

>> diff --git a/gcc/fold-const.h b/gcc/fold-const.h
>> index ee74dc8..4d5b24b 100644
>> --- a/gcc/fold-const.h
>> +++ b/gcc/fold-const.h
>> @@ -173,6 +173,7 @@ extern tree sign_bit_p (tree, const_tree);
>>  extern tree exact_inverse (tree, tree);
>>  extern tree const_unop (enum tree_code, tree, tree);
>>  extern tree const_binop (enum tree_code, tree, tree, tree);
>> +extern bool negate_mathfn_p (enum built_in_function);
>>
>>  /* Return OFF converted to a pointer offset type suitable as offset for
>>     POINTER_PLUS_EXPR.  Use location LOC for this conversion.  */
>> diff --git a/gcc/passes.def b/gcc/passes.def
>> index dc3f44c..36d2b3b 100644
>> --- a/gcc/passes.def
>> +++ b/gcc/passes.def
>> @@ -159,6 +159,7 @@ along with GCC; see the file COPYING3.  If not see
>>        /* After CCP we rewrite no longer addressed locals into SSA
>>          form if possible.  */
>>        NEXT_PASS (pass_complete_unrolli);
>> +      NEXT_PASS (pass_backprop);
>>        NEXT_PASS (pass_phiprop);
>>        NEXT_PASS (pass_forwprop);
>
> Any reason to not put this later?  I was thinking before reassoc.

I think we're relying on FRE to notice the redundancy in the
builtins-*.c tests, once this pass has converted the version
with redundant sign ops to make it look like the version without.
reassoc is likely to be too late.

I also thought it should go before rather than after some instance
of forwprop because the pass might expose more forward folding
opportunities.  E.g. if the sign of A = -B * B doesn't matter,
we'll end up with A = B * B, which might be foldable with uses of A.
It seems less likely that forwprop would expose more backprop
opportunities.

>> diff --git a/gcc/tree-ssa-backprop.c b/gcc/tree-ssa-backprop.c
>> new file mode 100644
>> index 0000000..bf2dcd9
>> --- /dev/null
>> +++ b/gcc/tree-ssa-backprop.c
>
> gimple-ssa-backprop.c

OK.

>> +/* Return usage information for general operand OP, or null if none.  */
>> +
>> +const usage_info *
>> +backprop::lookup_operand (tree op)
>> +{
>> +  if (op && TREE_CODE (op) == SSA_NAME)
>> +    if (usage_info **slot = m_info_map.get (op))
>> +      return *slot;
>
> Is this usually fully populated at the end?  If so a direct mapping using
> SSA_NAME_VERSION would be cheaper.

No, in practice it should be very sparse.

>> +/* Make INFO describe all uses of RHS in ASSIGN.  */
>> +
>> +void
>> +backprop::process_assign_use (gassign *assign, tree rhs, usage_info *info)
>> +{
>> +  tree lhs = gimple_assign_lhs (assign);
>> +  switch (gimple_assign_rhs_code (assign))
>> +    {
>> +    case ABS_EXPR:
>> +      /* The sign of the input doesn't matter.  */
>> +      info->flags.ignore_sign = true;
>> +      break;
>> +
>> +    case COND_EXPR:
>> +      /* For A = B ? C : D, propagate information about all uses of A
>> +        to B and C.  */
>> +      if (rhs != gimple_assign_rhs1 (assign))
>> +       if (const usage_info *lhs_info = lookup_operand (lhs))
>
> Use && instead of nested if

That means introducing an extra level of braces just for something
that that isn't needed by the first statement, i.e.:

    {
      const usage_info *lhs_info;
      if (rhs != gimple_assign_rhs1 (assign)
          && (lhs_info = lookup_operand (lhs)))
        *info = *lhs_info;
      break;
    }

There also used to be a strong preference for not embedding assignments
in && and || conditions.

If there had been some other set-up for the lookup_operand call, we
would have had:

      if (rhs != gimple_assign_rhs1 (assign))
        {
          ...
          if (const usage_info *lhs_info = lookup_operand (lhs))
            ..
        }

and presumably that would have been OK.  So if the original really isn't,
acceptable, I'd rather write it as:

      if (rhs != gimple_assign_rhs1 (assign))
        {
          const usage_info *lhs_info = lookup_operand (lhs);
          if (lhs_info)
            ..
        }

I thought we'd embraced the idea of declarations in if conditions though.

>> +/* Process all uses of VAR and record or update the result in
>> +   M_INFO_MAP and M_VARS.  */
>> +
>> +void
>> +backprop::process_var (tree var)
>> +{
>> +  if (has_zero_uses (var))
>> +    return;
>> +
>> +  usage_info info;
>> +  intersect_uses (var, &info);
>
> It feels somewhat backward that we do a backward walk over all stmts
> (thus processing all use-stmts before defs) but then "forward" process
> uses of defs.  Wouldn't it be easier(?) to "intersect" incrementally
> by processing only uses for all stmts we walk?  Or is this about
> avoiding to allocate a 'info' for DEFs that end up with no interesting
> info?

Yeah, that was the idea.  In practice very few SSA names end up
with any useful information.

>> +/* Process all statements and phis in BB, during the first post-order
> walk.  */
>> +
>> +void
>> +backprop::process_block (basic_block bb)
>> +{
>> +  for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
>> +       gsi_prev (&gsi))
>> +    {
>> +      tree lhs = gimple_get_lhs (gsi_stmt (gsi));
>> +      if (lhs && TREE_CODE (lhs) == SSA_NAME)
>> +       process_var (lhs);
>
> I think it would be cleaner to walk over al SSA defs (that includes asms which
> you seem to miss, no idea if that's important)

The idea was the same here.  The point of the pass is to back-propagate
information from uses to definitions, so there's no point recording
information about SSA names whose definitions could never benefit.

>> +    }
>> +  for (gphi_iterator gpi = gsi_start_phis (bb); !gsi_end_p (gpi);
>> +       gsi_next (&gpi))
>> +    process_var (gimple_phi_result (gpi.phi ()));
>> +}
>> +
>> +/* Delete the definition of VAR, which has no uses.  */
>> +
>> +static void
>> +remove_unused_var (tree var)
>> +{
>> +  gimple *stmt = SSA_NAME_DEF_STMT (var);
>> +  if (dump_file && (dump_flags & TDF_DETAILS))
>> +    {
>> +      fprintf (dump_file, "Deleting ");
>> +      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
>> +    }
>> +  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
>> +  gsi_remove (&gsi, true);
>> +  release_ssa_name (var);
>
> What if var wasn't the only def?  Please use release_defs ()
> so in case this is a call with a virtual definition it is released.

I didn't think the calls involved here could have those, but maybe
they could because of errno?  Ugh.  Are there any other valid reasons
for having VDEFs on these calls?

>> +/* Strip all sign operations from the rvalue at *RHS_PTR in STMT.
>> +   Return true if something changed.  The caller is responsible
>> +   for the necessary bookkeeping.  */
>> +
>> +static bool
>> +strip_sign_op (gimple *stmt, tree *rhs_ptr)
>> +{
>> +  if (tree new_rhs = strip_sign_op (*rhs_ptr))
>> +    {
>> +      if (dump_file && (dump_flags & TDF_DETAILS))
>> +       note_replacement (stmt, *rhs_ptr, new_rhs);
>> +      *rhs_ptr = new_rhs;
>
> So it looks you are only changing stmts when the stmt result produces
> the same value.  Just double-checking, as otherwise you'd need to care
> about debug stmts ...

No, it can change values, like the case you saw later for phis.
This applies to all the cases where the optimisation depends on the
propagated info for the lhs, rather than being inherent to the operation.
So e.g. we can change the value of A in A = B * C, if all uses of A
don't care about the sign.

At the moment the only change we can make is that the result could be
the negative of its original value.

>> +/* STMT has been changed.  Give the fold machinery a chance to simplify
>> +   and canonicalize it (e.g. by ensuring that commutative operands have
>> +   the right order), then record the updates.  */
>> +
>> +void
>> +backprop::complete_change (gimple *stmt)
>> +{
>> +  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
>> +  if (fold_stmt (&gsi))
>> +    {
>> +      if (dump_file && (dump_flags & TDF_DETAILS))
>> +       {
>> +         fprintf (dump_file, "  which folds to: ");
>> +         print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_SLIM);
>> +       }
>
> I suppose it can't (yet) happen that the result no longer throws.

Yeah, think so.

>> +/* Optimize PHI on the basis that INFO describes all uses of the result.  */
>> +
>> +void
>> +backprop::optimize_phi (gphi *phi, const usage_info *info)
>> +{
>> +  /* If the sign of the result doesn't matter, strip sign operations
>> +     from all arguments.  */
>> +  if (info->flags.ignore_sign)
>> +    {
>> +      use_operand_p use;
>> +      ssa_op_iter oi;
>> +      FOR_EACH_PHI_ARG (use, phi, oi, SSA_OP_USE)
>> +       if (tree new_arg = strip_sign_op (USE_FROM_PTR (use)))
>> +         {
>> +           if (dump_file && (dump_flags & TDF_DETAILS))
>> +             note_replacement (phi, USE_FROM_PTR (use), new_arg);
>> +           replace_exp (use, new_arg);
>
> So this seems to be a case where the value _does_ change?  Why bother
> to substitute in PHIs btw.?

For:

     double
     f (double *a, int n, double start)
     {
       double x = fabs (start);
       for (int i = 0; i < n; ++i)
	 x *= a[i];
       return __builtin_cos (x);
     }

it's the loop phi for x that has the redundant sign op as argument.
We want to rewrite it to "start" even if we need to keep "fabs (start)"
around for some other uses (assuming an extended example).

Thanks,
Richard

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Add a pass to back-propagate use information
  2015-10-19 12:38   ` Richard Sandiford
@ 2015-10-19 14:35     ` Richard Biener
  2015-10-19 15:01       ` Richard Sandiford
  2015-10-21  8:49       ` Richard Sandiford
  0 siblings, 2 replies; 7+ messages in thread
From: Richard Biener @ 2015-10-19 14:35 UTC (permalink / raw)
  To: Richard Biener, GCC Patches, richard.sandiford

On Mon, Oct 19, 2015 at 2:38 PM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> Richard Biener <richard.guenther@gmail.com> writes:
>> On Thu, Oct 15, 2015 at 3:17 PM, Richard Sandiford
>> <richard.sandiford@arm.com> wrote:
>>> This patch adds a pass that collects information that is common to all
>>> uses of an SSA name X and back-propagates that information up the statements
>>> that generate X.  The general idea is to use the information to simplify
>>> instructions (rather than a pure DCE) so I've simply called it
>>> tree-ssa-backprop.c, to go with tree-ssa-forwprop.c.
>>>
>>> At the moment the only use of the pass is to remove unnecessry sign
>>> operations, so that it's effectively a global version of
>>> fold_strip_sign_ops.  I'm hoping it could be extended in future to
>>> record which bits of an integer are significant.  There are probably
>>> other potential uses too.
>>>
>>> A later patch gets rid of fold_strip_sign_ops.
>>>
>>> Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi.
>>> OK to install?
>>>
>>> Thanks,
>>> Richard
>>>
>>>
>>> gcc/
>>>         * doc/invoke.texi (-fdump-tree-backprop, -ftree-backprop): Document.
>>>         * Makefile.in (OBJS): Add tree-ssa-backprop.o.
>>>         * common.opt (ftree-backprop): New option.
>>>         * fold-const.h (negate_mathfn_p): Declare.
>>>         * fold-const.c (negate_mathfn_p): Make public.
>>>         * timevar.def (TV_TREE_BACKPROP): New.
>>>         * tree-passes.h (make_pass_backprop): Declare.
>>>         * passes.def (pass_backprop): Add.
>>>         * tree-ssa-backprop.c: New file.
>>>
>>> gcc/testsuite/
>>>         * gcc.dg/tree-ssa/backprop-1.c, gcc.dg/tree-ssa/backprop-2.c,
>>>         gcc.dg/tree-ssa/backprop-3.c, gcc.dg/tree-ssa/backprop-4.c,
>>>         gcc.dg/tree-ssa/backprop-5.c, gcc.dg/tree-ssa/backprop-6.c: New tests.
>>>
>>> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
>>> index 783e4c9..69e669d 100644
>>> --- a/gcc/Makefile.in
>>> +++ b/gcc/Makefile.in
>>> @@ -1445,6 +1445,7 @@ OBJS = \
>>>         tree-switch-conversion.o \
>>>         tree-ssa-address.o \
>>>         tree-ssa-alias.o \
>>> +       tree-ssa-backprop.o \
>>>         tree-ssa-ccp.o \
>>>         tree-ssa-coalesce.o \
>>>         tree-ssa-copy.o \
>>> diff --git a/gcc/common.opt b/gcc/common.opt
>>> index 5060208..5aef625 100644
>>> --- a/gcc/common.opt
>>> +++ b/gcc/common.opt
>>> @@ -2364,6 +2364,10 @@ ftree-pta
>>>  Common Report Var(flag_tree_pta) Optimization
>>>  Perform function-local points-to analysis on trees.
>>>
>>> +ftree-backprop
>>> +Common Report Var(flag_tree_backprop) Init(1) Optimization
>>> +Enable backward propagation of use properties at the tree level.
>>
>> Don't add new -ftree-* "tree" doesn't add any info for our users.  I'd
>> also refer to SSA level rather than "tree" level.  Not sure if -fbackprop
>> is good, but let's go for it.
>
> OK.
>
>>> diff --git a/gcc/fold-const.c b/gcc/fold-const.c
>>> index de45a2c..7f00e72 100644
>>> --- a/gcc/fold-const.c
>>> +++ b/gcc/fold-const.c
>>> @@ -319,7 +318,7 @@ fold_overflow_warning (const char* gmsgid, enum
>> warn_strict_overflow_code wc)
>>>  /* Return true if the built-in mathematical function specified by CODE
>>>     is odd, i.e. -f(x) == f(-x).  */
>>>
>>> -static bool
>>> +bool
>>>  negate_mathfn_p (enum built_in_function code)
>>>  {
>>>    switch (code)
>>
>> Belongs more to builtins.[ch] if exported.
>
> The long-term plan is to abstract away whether it's a built-in function
> or an internal function, in which case I hope to have a single predicate
> that handles both.  I'm not sure where the code should go after that change.
> Maybe a new file?

Hmm, we'll see.  So just leave it in fold-const.c for now.

>>> diff --git a/gcc/fold-const.h b/gcc/fold-const.h
>>> index ee74dc8..4d5b24b 100644
>>> --- a/gcc/fold-const.h
>>> +++ b/gcc/fold-const.h
>>> @@ -173,6 +173,7 @@ extern tree sign_bit_p (tree, const_tree);
>>>  extern tree exact_inverse (tree, tree);
>>>  extern tree const_unop (enum tree_code, tree, tree);
>>>  extern tree const_binop (enum tree_code, tree, tree, tree);
>>> +extern bool negate_mathfn_p (enum built_in_function);
>>>
>>>  /* Return OFF converted to a pointer offset type suitable as offset for
>>>     POINTER_PLUS_EXPR.  Use location LOC for this conversion.  */
>>> diff --git a/gcc/passes.def b/gcc/passes.def
>>> index dc3f44c..36d2b3b 100644
>>> --- a/gcc/passes.def
>>> +++ b/gcc/passes.def
>>> @@ -159,6 +159,7 @@ along with GCC; see the file COPYING3.  If not see
>>>        /* After CCP we rewrite no longer addressed locals into SSA
>>>          form if possible.  */
>>>        NEXT_PASS (pass_complete_unrolli);
>>> +      NEXT_PASS (pass_backprop);
>>>        NEXT_PASS (pass_phiprop);
>>>        NEXT_PASS (pass_forwprop);
>>
>> Any reason to not put this later?  I was thinking before reassoc.
>
> I think we're relying on FRE to notice the redundancy in the
> builtins-*.c tests, once this pass has converted the version
> with redundant sign ops to make it look like the version without.
> reassoc is likely to be too late.

There is PRE after reassoc (run as FRE at -O1).

> I also thought it should go before rather than after some instance
> of forwprop because the pass might expose more forward folding
> opportunities.  E.g. if the sign of A = -B * B doesn't matter,
> we'll end up with A = B * B, which might be foldable with uses of A.
> It seems less likely that forwprop would expose more backprop
> opportunities.

Indeed.  I was asking because backprop runs after inlining but
with nearly no effective scalar cleanup after it to cleanup after
inlining.

In principle it only depends on some kind of DCE (DSE?) to avoid
false uses, right?

It's probably ok where you put it, I just wanted to get an idea of your
reasoning.

>
>>> diff --git a/gcc/tree-ssa-backprop.c b/gcc/tree-ssa-backprop.c
>>> new file mode 100644
>>> index 0000000..bf2dcd9
>>> --- /dev/null
>>> +++ b/gcc/tree-ssa-backprop.c
>>
>> gimple-ssa-backprop.c
>
> OK.
>
>>> +/* Return usage information for general operand OP, or null if none.  */
>>> +
>>> +const usage_info *
>>> +backprop::lookup_operand (tree op)
>>> +{
>>> +  if (op && TREE_CODE (op) == SSA_NAME)
>>> +    if (usage_info **slot = m_info_map.get (op))
>>> +      return *slot;
>>
>> Is this usually fully populated at the end?  If so a direct mapping using
>> SSA_NAME_VERSION would be cheaper.
>
> No, in practice it should be very sparse.
>
>>> +/* Make INFO describe all uses of RHS in ASSIGN.  */
>>> +
>>> +void
>>> +backprop::process_assign_use (gassign *assign, tree rhs, usage_info *info)
>>> +{
>>> +  tree lhs = gimple_assign_lhs (assign);
>>> +  switch (gimple_assign_rhs_code (assign))
>>> +    {
>>> +    case ABS_EXPR:
>>> +      /* The sign of the input doesn't matter.  */
>>> +      info->flags.ignore_sign = true;
>>> +      break;
>>> +
>>> +    case COND_EXPR:
>>> +      /* For A = B ? C : D, propagate information about all uses of A
>>> +        to B and C.  */
>>> +      if (rhs != gimple_assign_rhs1 (assign))
>>> +       if (const usage_info *lhs_info = lookup_operand (lhs))
>>
>> Use && instead of nested if
>
> That means introducing an extra level of braces just for something
> that that isn't needed by the first statement, i.e.:
>
>     {
>       const usage_info *lhs_info;
>       if (rhs != gimple_assign_rhs1 (assign)
>           && (lhs_info = lookup_operand (lhs)))
>         *info = *lhs_info;
>       break;
>     }
>
> There also used to be a strong preference for not embedding assignments
> in && and || conditions.
>
> If there had been some other set-up for the lookup_operand call, we
> would have had:
>
>       if (rhs != gimple_assign_rhs1 (assign))
>         {
>           ...
>           if (const usage_info *lhs_info = lookup_operand (lhs))
>             ..
>         }
>
> and presumably that would have been OK.  So if the original really isn't,
> acceptable, I'd rather write it as:
>
>       if (rhs != gimple_assign_rhs1 (assign))
>         {
>           const usage_info *lhs_info = lookup_operand (lhs);
>           if (lhs_info)
>             ..
>         }
>
> I thought we'd embraced the idea of declarations in if conditions though.

I missed the init in if ... yeah, there doesn't seem to be a good way
though I prefer {} around the inner if at least which means the last
form would be my reference.

>>> +/* Process all uses of VAR and record or update the result in
>>> +   M_INFO_MAP and M_VARS.  */
>>> +
>>> +void
>>> +backprop::process_var (tree var)
>>> +{
>>> +  if (has_zero_uses (var))
>>> +    return;
>>> +
>>> +  usage_info info;
>>> +  intersect_uses (var, &info);
>>
>> It feels somewhat backward that we do a backward walk over all stmts
>> (thus processing all use-stmts before defs) but then "forward" process
>> uses of defs.  Wouldn't it be easier(?) to "intersect" incrementally
>> by processing only uses for all stmts we walk?  Or is this about
>> avoiding to allocate a 'info' for DEFs that end up with no interesting
>> info?
>
> Yeah, that was the idea.  In practice very few SSA names end up
> with any useful information.

Ok, let's add a comment somewhere noting that and the choice of
"backward forward propagation".

>>> +/* Process all statements and phis in BB, during the first post-order
>> walk.  */
>>> +
>>> +void
>>> +backprop::process_block (basic_block bb)
>>> +{
>>> +  for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
>>> +       gsi_prev (&gsi))
>>> +    {
>>> +      tree lhs = gimple_get_lhs (gsi_stmt (gsi));
>>> +      if (lhs && TREE_CODE (lhs) == SSA_NAME)
>>> +       process_var (lhs);
>>
>> I think it would be cleaner to walk over al SSA defs (that includes asms which
>> you seem to miss, no idea if that's important)
>
> The idea was the same here.  The point of the pass is to back-propagate
> information from uses to definitions, so there's no point recording
> information about SSA names whose definitions could never benefit.
>
>>> +    }
>>> +  for (gphi_iterator gpi = gsi_start_phis (bb); !gsi_end_p (gpi);
>>> +       gsi_next (&gpi))
>>> +    process_var (gimple_phi_result (gpi.phi ()));
>>> +}
>>> +
>>> +/* Delete the definition of VAR, which has no uses.  */
>>> +
>>> +static void
>>> +remove_unused_var (tree var)
>>> +{
>>> +  gimple *stmt = SSA_NAME_DEF_STMT (var);
>>> +  if (dump_file && (dump_flags & TDF_DETAILS))
>>> +    {
>>> +      fprintf (dump_file, "Deleting ");
>>> +      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
>>> +    }
>>> +  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
>>> +  gsi_remove (&gsi, true);
>>> +  release_ssa_name (var);
>>
>> What if var wasn't the only def?  Please use release_defs ()
>> so in case this is a call with a virtual definition it is released.
>
> I didn't think the calls involved here could have those, but maybe
> they could because of errno?  Ugh.  Are there any other valid reasons
> for having VDEFs on these calls?

No, only errno.

>>> +/* Strip all sign operations from the rvalue at *RHS_PTR in STMT.
>>> +   Return true if something changed.  The caller is responsible
>>> +   for the necessary bookkeeping.  */
>>> +
>>> +static bool
>>> +strip_sign_op (gimple *stmt, tree *rhs_ptr)
>>> +{
>>> +  if (tree new_rhs = strip_sign_op (*rhs_ptr))
>>> +    {
>>> +      if (dump_file && (dump_flags & TDF_DETAILS))
>>> +       note_replacement (stmt, *rhs_ptr, new_rhs);
>>> +      *rhs_ptr = new_rhs;
>>
>> So it looks you are only changing stmts when the stmt result produces
>> the same value.  Just double-checking, as otherwise you'd need to care
>> about debug stmts ...
>
> No, it can change values, like the case you saw later for phis.
> This applies to all the cases where the optimisation depends on the
> propagated info for the lhs, rather than being inherent to the operation.
> So e.g. we can change the value of A in A = B * C, if all uses of A
> don't care about the sign.
>
> At the moment the only change we can make is that the result could be
> the negative of its original value.

Ok, so then there is the debug issue.  Consider

  x = ...;

which you change the sign for.  The user in gdb when printing 'x' needs to
see the original value or "optimized out", not the negated value.  This means
you have to replace the LHS of the stmt with a new SSA name which is
best done (with proper debug effects) by removing the original stmt and
replacing all uses of the LHS with the new stmt lhs.  Note that the
same is true for all derived values, so even if you only change

 _1 = ...;

(no user visible value) then a derived value


 x_2 = _1 + 2;

might change sign.

Well, you have to think about it at least ;)

>>> +/* STMT has been changed.  Give the fold machinery a chance to simplify
>>> +   and canonicalize it (e.g. by ensuring that commutative operands have
>>> +   the right order), then record the updates.  */
>>> +
>>> +void
>>> +backprop::complete_change (gimple *stmt)
>>> +{
>>> +  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
>>> +  if (fold_stmt (&gsi))
>>> +    {
>>> +      if (dump_file && (dump_flags & TDF_DETAILS))
>>> +       {
>>> +         fprintf (dump_file, "  which folds to: ");
>>> +         print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_SLIM);
>>> +       }
>>
>> I suppose it can't (yet) happen that the result no longer throws.
>
> Yeah, think so.
>
>>> +/* Optimize PHI on the basis that INFO describes all uses of the result.  */
>>> +
>>> +void
>>> +backprop::optimize_phi (gphi *phi, const usage_info *info)
>>> +{
>>> +  /* If the sign of the result doesn't matter, strip sign operations
>>> +     from all arguments.  */
>>> +  if (info->flags.ignore_sign)
>>> +    {
>>> +      use_operand_p use;
>>> +      ssa_op_iter oi;
>>> +      FOR_EACH_PHI_ARG (use, phi, oi, SSA_OP_USE)
>>> +       if (tree new_arg = strip_sign_op (USE_FROM_PTR (use)))
>>> +         {
>>> +           if (dump_file && (dump_flags & TDF_DETAILS))
>>> +             note_replacement (phi, USE_FROM_PTR (use), new_arg);
>>> +           replace_exp (use, new_arg);
>>
>> So this seems to be a case where the value _does_ change?  Why bother
>> to substitute in PHIs btw.?
>
> For:
>
>      double
>      f (double *a, int n, double start)
>      {
>        double x = fabs (start);
>        for (int i = 0; i < n; ++i)
>          x *= a[i];
>        return __builtin_cos (x);
>      }
>
> it's the loop phi for x that has the redundant sign op as argument.
> We want to rewrite it to "start" even if we need to keep "fabs (start)"
> around for some other uses (assuming an extended example).

Ah, I see.

So apart from the debug issue the patch looks fine then.

Thanks,
Richard.

> Thanks,
> Richard
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Add a pass to back-propagate use information
  2015-10-19 14:35     ` Richard Biener
@ 2015-10-19 15:01       ` Richard Sandiford
  2015-10-21  8:49       ` Richard Sandiford
  1 sibling, 0 replies; 7+ messages in thread
From: Richard Sandiford @ 2015-10-19 15:01 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

Richard Biener <richard.guenther@gmail.com> writes:
>>>> diff --git a/gcc/fold-const.c b/gcc/fold-const.c
>>>> index de45a2c..7f00e72 100644
>>>> --- a/gcc/fold-const.c
>>>> +++ b/gcc/fold-const.c
>>>> @@ -319,7 +318,7 @@ fold_overflow_warning (const char* gmsgid, enum
>>> warn_strict_overflow_code wc)
>>>>  /* Return true if the built-in mathematical function specified by CODE
>>>>     is odd, i.e. -f(x) == f(-x).  */
>>>>
>>>> -static bool
>>>> +bool
>>>>  negate_mathfn_p (enum built_in_function code)
>>>>  {
>>>>    switch (code)
>>>
>>> Belongs more to builtins.[ch] if exported.
>>
>> The long-term plan is to abstract away whether it's a built-in function
>> or an internal function, in which case I hope to have a single predicate
>> that handles both.  I'm not sure where the code should go after that change.
>> Maybe a new file?
>
> Hmm, we'll see.  So just leave it in fold-const.c for now.

OK, thanks.

>>>> diff --git a/gcc/fold-const.h b/gcc/fold-const.h
>>>> index ee74dc8..4d5b24b 100644
>>>> --- a/gcc/fold-const.h
>>>> +++ b/gcc/fold-const.h
>>>> @@ -173,6 +173,7 @@ extern tree sign_bit_p (tree, const_tree);
>>>>  extern tree exact_inverse (tree, tree);
>>>>  extern tree const_unop (enum tree_code, tree, tree);
>>>>  extern tree const_binop (enum tree_code, tree, tree, tree);
>>>> +extern bool negate_mathfn_p (enum built_in_function);
>>>>
>>>>  /* Return OFF converted to a pointer offset type suitable as offset for
>>>>     POINTER_PLUS_EXPR.  Use location LOC for this conversion.  */
>>>> diff --git a/gcc/passes.def b/gcc/passes.def
>>>> index dc3f44c..36d2b3b 100644
>>>> --- a/gcc/passes.def
>>>> +++ b/gcc/passes.def
>>>> @@ -159,6 +159,7 @@ along with GCC; see the file COPYING3.  If not see
>>>>        /* After CCP we rewrite no longer addressed locals into SSA
>>>>          form if possible.  */
>>>>        NEXT_PASS (pass_complete_unrolli);
>>>> +      NEXT_PASS (pass_backprop);
>>>>        NEXT_PASS (pass_phiprop);
>>>>        NEXT_PASS (pass_forwprop);
>>>
>>> Any reason to not put this later?  I was thinking before reassoc.
>>
>> I think we're relying on FRE to notice the redundancy in the
>> builtins-*.c tests, once this pass has converted the version
>> with redundant sign ops to make it look like the version without.
>> reassoc is likely to be too late.
>
> There is PRE after reassoc (run as FRE at -O1).

Ah, OK.  It looks like that also runs after sincos though, whereas
I think we want an FRE between this pass and sincos.

>> I also thought it should go before rather than after some instance
>> of forwprop because the pass might expose more forward folding
>> opportunities.  E.g. if the sign of A = -B * B doesn't matter,
>> we'll end up with A = B * B, which might be foldable with uses of A.
>> It seems less likely that forwprop would expose more backprop
>> opportunities.
>
> Indeed.  I was asking because backprop runs after inlining but
> with nearly no effective scalar cleanup after it to cleanup after
> inlining.
>
> In principle it only depends on some kind of DCE (DSE?) to avoid
> false uses, right?

Yeah, that sounds right.  Complex control-flow leading up to uses
shouldn't be a problem, but phantom uses would be a blocker.
>
> It's probably ok where you put it, I just wanted to get an idea of your
> reasoning.
>
>>
>>>> +/* Make INFO describe all uses of RHS in ASSIGN.  */
>>>> +
>>>> +void
>>>> +backprop::process_assign_use (gassign *assign, tree rhs, usage_info *info)
>>>> +{
>>>> +  tree lhs = gimple_assign_lhs (assign);
>>>> +  switch (gimple_assign_rhs_code (assign))
>>>> +    {
>>>> +    case ABS_EXPR:
>>>> +      /* The sign of the input doesn't matter.  */
>>>> +      info->flags.ignore_sign = true;
>>>> +      break;
>>>> +
>>>> +    case COND_EXPR:
>>>> +      /* For A = B ? C : D, propagate information about all uses of A
>>>> +        to B and C.  */
>>>> +      if (rhs != gimple_assign_rhs1 (assign))
>>>> +       if (const usage_info *lhs_info = lookup_operand (lhs))
>>>
>>> Use && instead of nested if
>>
>> That means introducing an extra level of braces just for something
>> that that isn't needed by the first statement, i.e.:
>>
>>     {
>>       const usage_info *lhs_info;
>>       if (rhs != gimple_assign_rhs1 (assign)
>>           && (lhs_info = lookup_operand (lhs)))
>>         *info = *lhs_info;
>>       break;
>>     }
>>
>> There also used to be a strong preference for not embedding assignments
>> in && and || conditions.
>>
>> If there had been some other set-up for the lookup_operand call, we
>> would have had:
>>
>>       if (rhs != gimple_assign_rhs1 (assign))
>>         {
>>           ...
>>           if (const usage_info *lhs_info = lookup_operand (lhs))
>>             ..
>>         }
>>
>> and presumably that would have been OK.  So if the original really isn't,
>> acceptable, I'd rather write it as:
>>
>>       if (rhs != gimple_assign_rhs1 (assign))
>>         {
>>           const usage_info *lhs_info = lookup_operand (lhs);
>>           if (lhs_info)
>>             ..
>>         }
>>
>> I thought we'd embraced the idea of declarations in if conditions though.
>
> I missed the init in if ... yeah, there doesn't seem to be a good way
> though I prefer {} around the inner if at least which means the last
> form would be my reference.

OK.

>>>> +/* Process all uses of VAR and record or update the result in
>>>> +   M_INFO_MAP and M_VARS.  */
>>>> +
>>>> +void
>>>> +backprop::process_var (tree var)
>>>> +{
>>>> +  if (has_zero_uses (var))
>>>> +    return;
>>>> +
>>>> +  usage_info info;
>>>> +  intersect_uses (var, &info);
>>>
>>> It feels somewhat backward that we do a backward walk over all stmts
>>> (thus processing all use-stmts before defs) but then "forward" process
>>> uses of defs.  Wouldn't it be easier(?) to "intersect" incrementally
>>> by processing only uses for all stmts we walk?  Or is this about
>>> avoiding to allocate a 'info' for DEFs that end up with no interesting
>>> info?
>>
>> Yeah, that was the idea.  In practice very few SSA names end up
>> with any useful information.
>
> Ok, let's add a comment somewhere noting that and the choice of
> "backward forward propagation".

Will do.  I think I'll also rework it to be lazy, which should be
more efficient as well as more obvious.  Not sure why I didn't do
that in the first place.

>>>> +/* Strip all sign operations from the rvalue at *RHS_PTR in STMT.
>>>> +   Return true if something changed.  The caller is responsible
>>>> +   for the necessary bookkeeping.  */
>>>> +
>>>> +static bool
>>>> +strip_sign_op (gimple *stmt, tree *rhs_ptr)
>>>> +{
>>>> +  if (tree new_rhs = strip_sign_op (*rhs_ptr))
>>>> +    {
>>>> +      if (dump_file && (dump_flags & TDF_DETAILS))
>>>> +       note_replacement (stmt, *rhs_ptr, new_rhs);
>>>> +      *rhs_ptr = new_rhs;
>>>
>>> So it looks you are only changing stmts when the stmt result produces
>>> the same value.  Just double-checking, as otherwise you'd need to care
>>> about debug stmts ...
>>
>> No, it can change values, like the case you saw later for phis.
>> This applies to all the cases where the optimisation depends on the
>> propagated info for the lhs, rather than being inherent to the operation.
>> So e.g. we can change the value of A in A = B * C, if all uses of A
>> don't care about the sign.
>>
>> At the moment the only change we can make is that the result could be
>> the negative of its original value.
>
> Ok, so then there is the debug issue.  Consider
>
>   x = ...;
>
> which you change the sign for.  The user in gdb when printing 'x' needs to
> see the original value or "optimized out", not the negated value.  This means
> you have to replace the LHS of the stmt with a new SSA name which is
> best done (with proper debug effects) by removing the original stmt and
> replacing all uses of the LHS with the new stmt lhs.  Note that the
> same is true for all derived values, so even if you only change
>
>  _1 = ...;
>
> (no user visible value) then a derived value
>
>
>  x_2 = _1 + 2;
>
> might change sign.
>
> Well, you have to think about it at least ;)

OK, will fix.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Add a pass to back-propagate use information
  2015-10-19 14:35     ` Richard Biener
  2015-10-19 15:01       ` Richard Sandiford
@ 2015-10-21  8:49       ` Richard Sandiford
  2015-10-21  9:58         ` Richard Biener
  1 sibling, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2015-10-21  8:49 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

Richard Biener <richard.guenther@gmail.com> writes:
> On Mon, Oct 19, 2015 at 2:38 PM, Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>> Richard Biener <richard.guenther@gmail.com> writes:
>>> On Thu, Oct 15, 2015 at 3:17 PM, Richard Sandiford
>>> <richard.sandiford@arm.com> wrote:
>>>> +/* Strip all sign operations from the rvalue at *RHS_PTR in STMT.
>>>> +   Return true if something changed.  The caller is responsible
>>>> +   for the necessary bookkeeping.  */
>>>> +
>>>> +static bool
>>>> +strip_sign_op (gimple *stmt, tree *rhs_ptr)
>>>> +{
>>>> +  if (tree new_rhs = strip_sign_op (*rhs_ptr))
>>>> +    {
>>>> +      if (dump_file && (dump_flags & TDF_DETAILS))
>>>> +       note_replacement (stmt, *rhs_ptr, new_rhs);
>>>> +      *rhs_ptr = new_rhs;
>>>
>>> So it looks you are only changing stmts when the stmt result produces
>>> the same value.  Just double-checking, as otherwise you'd need to care
>>> about debug stmts ...
>>
>> No, it can change values, like the case you saw later for phis.
>> This applies to all the cases where the optimisation depends on the
>> propagated info for the lhs, rather than being inherent to the operation.
>> So e.g. we can change the value of A in A = B * C, if all uses of A
>> don't care about the sign.
>>
>> At the moment the only change we can make is that the result could be
>> the negative of its original value.
>
> Ok, so then there is the debug issue.  Consider
>
>   x = ...;
>
> which you change the sign for.  The user in gdb when printing 'x' needs to
> see the original value or "optimized out", not the negated value.  This means
> you have to replace the LHS of the stmt with a new SSA name which is
> best done (with proper debug effects) by removing the original stmt and
> replacing all uses of the LHS with the new stmt lhs.  Note that the
> same is true for all derived values, so even if you only change
>
>  _1 = ...;
>
> (no user visible value) then a derived value
>
>
>  x_2 = _1 + 2;
>
> might change sign.
>
> Well, you have to think about it at least ;)

When addressing this, I tried to look for a way of doing it without
creating new SSA names and statements, since that should be much
simpler to code and hopefully also more efficient.  I noticed
gimple_replace_ssa_lhs, which seems to do what I want for assignments
and calls.  It copes with cases where the "new" lhs is different from
the old lhs and also cases where it's the same.

In backprop there shouldn't be any need for a new lhs once debug uses
have been updated.  All non-debug uses can cope with the new value so
they might as well continue to use the same SSA name.  The pass therefore
only needs the old lhs == new lhs behaviour.  It also needs to do the
same thing for phis, which aren't handled by gimple_replace_ssa_lhs.
In the end I simply took the active part of gimple_replace_ssa_lhs for
old lhs == new lhs:

  if (MAY_HAVE_DEBUG_STMTS)
    insert_debug_temp_for_var_def (NULL, lhs);

and used it directly.  Hope that's OK.

I also added -g to the dg-options lines of the tests.

> So apart from the debug issue the patch looks fine then.

Here's the updated patch.  Tested on x86_64-linux-gnu, aarch64-linux-gnu
and arm-linux-gnueabi.  OK to install?

Thanks,
Richard


gcc/
	* doc/invoke.texi (-fdump-tree-backprop, -fssa-backprop): Document.
	* Makefile.in (OBJS): Add gimple-ssa-backprop.o.
	* common.opt (fssa-backprop): New option.
	* fold-const.h (negate_mathfn_p): Declare.
	* fold-const.c (negate_mathfn_p): Make public.
	* timevar.def (TV_TREE_BACKPROP): New.
	* tree-pass.h (make_pass_backprop): Declare.
	* passes.def (pass_backprop): Add.
	* gimple-ssa-backprop.c: New file.

gcc/testsuite/
	* gcc.dg/tree-ssa/backprop-1.c, gcc.dg/tree-ssa/backprop-2.c,
	gcc.dg/tree-ssa/backprop-3.c, gcc.dg/tree-ssa/backprop-4.c,
	gcc.dg/tree-ssa/backprop-5.c, gcc.dg/tree-ssa/backprop-6.c: New tests.

Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi	2015-10-21 08:43:48.375797607 +0100
+++ gcc/doc/invoke.texi	2015-10-21 09:24:16.659564928 +0100
@@ -343,6 +343,7 @@ Objective-C and Objective-C++ Dialects}.
 -fdump-tree-dse@r{[}-@var{n}@r{]} @gol
 -fdump-tree-phiprop@r{[}-@var{n}@r{]} @gol
 -fdump-tree-phiopt@r{[}-@var{n}@r{]} @gol
+-fdump-tree-backprop@r{[}-@var{n}@r{]} @gol
 -fdump-tree-forwprop@r{[}-@var{n}@r{]} @gol
 -fdump-tree-nrv -fdump-tree-vect @gol
 -fdump-tree-sink @gol
@@ -443,9 +444,9 @@ Objective-C and Objective-C++ Dialects}.
 -fschedule-insns -fschedule-insns2 -fsection-anchors @gol
 -fselective-scheduling -fselective-scheduling2 @gol
 -fsel-sched-pipelining -fsel-sched-pipelining-outer-loops @gol
--fsemantic-interposition @gol
--fshrink-wrap -fsignaling-nans -fsingle-precision-constant @gol
--fsplit-ivs-in-unroller -fsplit-wide-types -fssa-phiopt @gol
+-fsemantic-interposition -fshrink-wrap -fsignaling-nans @gol
+-fsingle-precision-constant -fsplit-ivs-in-unroller @gol
+-fsplit-wide-types -fssa-backprop -fssa-phiopt @gol
 -fstack-protector -fstack-protector-all -fstack-protector-strong @gol
 -fstack-protector-explicit -fstdarg-opt -fstrict-aliasing @gol
 -fstrict-overflow -fthread-jumps -ftracer -ftree-bit-ccp @gol
@@ -7236,6 +7237,12 @@ name is made by appending @file{.dse} to
 Dump each function after optimizing PHI nodes into straightline code.  The file
 name is made by appending @file{.phiopt} to the source file name.
 
+@item backprop
+@opindex fdump-tree-backprop
+Dump each function after back-propagating use information up the definition
+chain.  The file name is made by appending @file{.backprop} to the
+source file name.
+
 @item forwprop
 @opindex fdump-tree-forwprop
 Dump each function after forward propagating single use variables.  The file
@@ -7707,9 +7714,10 @@ compilation time.
 -freorder-blocks @gol
 -fshrink-wrap @gol
 -fsplit-wide-types @gol
+-fssa-backprop @gol
+-fssa-phiopt @gol
 -ftree-bit-ccp @gol
 -ftree-ccp @gol
--fssa-phiopt @gol
 -ftree-ch @gol
 -ftree-coalesce-vars @gol
 -ftree-copy-prop @gol
@@ -8795,6 +8803,13 @@ Perform sparse conditional constant prop
 pass only operates on local scalar variables and is enabled by default
 at @option{-O} and higher.
 
+@item -fssa-backprop
+@opindex fssa-backprop
+Propagate information about uses of a value up the definition chain
+in order to simplify the definitions.  For example, this pass strips
+sign operations if the sign of a value never matters.  The flag is
+enabled by default at @option{-O} and higher.
+
 @item -fssa-phiopt
 @opindex fssa-phiopt
 Perform pattern matching on SSA PHI nodes to optimize conditional
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in	2015-10-21 08:43:49.879780139 +0100
+++ gcc/Makefile.in	2015-10-21 09:24:16.655564975 +0100
@@ -1266,6 +1266,7 @@ OBJS = \
 	gimple-laddress.o \
 	gimple-low.o \
 	gimple-pretty-print.o \
+	gimple-ssa-backprop.o \
 	gimple-ssa-isolate-paths.o \
 	gimple-ssa-strength-reduction.o \
 	gimple-streamer-in.o \
Index: gcc/common.opt
===================================================================
--- gcc/common.opt	2015-10-21 08:43:49.879780139 +0100
+++ gcc/common.opt	2015-10-21 09:24:16.655564975 +0100
@@ -2114,6 +2114,10 @@ fsplit-wide-types
 Common Report Var(flag_split_wide_types) Optimization
 Split wide types into independent registers
 
+fssa-backprop
+Common Report Var(flag_ssa_backprop) Init(1) Optimization
+Enable backward propagation of use properties at the SSA level.
+
 fssa-phiopt
 Common Report Var(flag_ssa_phiopt) Optimization
 Optimize conditional patterns using SSA PHI nodes
Index: gcc/fold-const.h
===================================================================
--- gcc/fold-const.h	2015-10-08 17:49:05.054371324 +0100
+++ gcc/fold-const.h	2015-10-21 09:24:16.663564880 +0100
@@ -173,6 +173,7 @@ extern tree sign_bit_p (tree, const_tree
 extern tree exact_inverse (tree, tree);
 extern tree const_unop (enum tree_code, tree, tree);
 extern tree const_binop (enum tree_code, tree, tree, tree);
+extern bool negate_mathfn_p (enum built_in_function);
 
 /* Return OFF converted to a pointer offset type suitable as offset for
    POINTER_PLUS_EXPR.  Use location LOC for this conversion.  */
Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	2015-10-21 09:24:01.443746048 +0100
+++ gcc/fold-const.c	2015-10-21 09:24:16.663564880 +0100
@@ -112,7 +112,6 @@ enum comparison_code {
   COMPCODE_TRUE = 15
 };
 
-static bool negate_mathfn_p (enum built_in_function);
 static bool negate_expr_p (tree);
 static tree negate_expr (tree);
 static tree split_tree (tree, enum tree_code, tree *, tree *, tree *, int);
@@ -321,7 +320,7 @@ fold_overflow_warning (const char* gmsgi
 /* Return true if the built-in mathematical function specified by CODE
    is odd, i.e. -f(x) == f(-x).  */
 
-static bool
+bool
 negate_mathfn_p (enum built_in_function code)
 {
   switch (code)
Index: gcc/timevar.def
===================================================================
--- gcc/timevar.def	2015-10-21 08:43:48.675794123 +0100
+++ gcc/timevar.def	2015-10-21 09:24:16.663564880 +0100
@@ -164,6 +164,7 @@ DEFTIMEVAR (TV_TREE_PRE		     , "tree PR
 DEFTIMEVAR (TV_TREE_FRE		     , "tree FRE")
 DEFTIMEVAR (TV_TREE_SINK             , "tree code sinking")
 DEFTIMEVAR (TV_TREE_PHIOPT	     , "tree linearize phis")
+DEFTIMEVAR (TV_TREE_BACKPROP	     , "tree backward propagate")
 DEFTIMEVAR (TV_TREE_FORWPROP	     , "tree forward propagate")
 DEFTIMEVAR (TV_TREE_PHIPROP	     , "tree phiprop")
 DEFTIMEVAR (TV_TREE_DCE		     , "tree conservative DCE")
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h	2015-10-15 10:49:00.889300501 +0100
+++ gcc/tree-pass.h	2015-10-21 09:24:16.663564880 +0100
@@ -497,6 +497,7 @@ extern gimple_opt_pass *make_pass_cleanu
 							       *ctxt);
 extern gimple_opt_pass *make_pass_init_datastructures (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_fixup_cfg (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_backprop (gcc::context *ctxt);
 
 extern rtl_opt_pass *make_pass_expand (gcc::context *ctxt);
 extern rtl_opt_pass *make_pass_instantiate_virtual_regs (gcc::context *ctxt);
Index: gcc/passes.def
===================================================================
--- gcc/passes.def	2015-10-15 10:48:46.713465500 +0100
+++ gcc/passes.def	2015-10-21 09:24:16.663564880 +0100
@@ -159,6 +159,7 @@ along with GCC; see the file COPYING3.
       /* After CCP we rewrite no longer addressed locals into SSA
 	 form if possible.  */
       NEXT_PASS (pass_complete_unrolli);
+      NEXT_PASS (pass_backprop);
       NEXT_PASS (pass_phiprop);
       NEXT_PASS (pass_forwprop);
       NEXT_PASS (pass_object_sizes);
Index: gcc/gimple-ssa-backprop.c
===================================================================
--- /dev/null	2015-10-14 14:19:37.164773004 +0100
+++ gcc/gimple-ssa-backprop.c	2015-10-21 09:24:16.663564880 +0100
@@ -0,0 +1,956 @@
+/* Back-propagation of usage information to definitions.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This pass propagates information that is common to all uses of an SSA
+   name back up through the sequence of statements that generate it,
+   simplifying the statements where possible.  Sometimes this can expose
+   fully or partially dead code, but the main focus is simplifying
+   computations.
+
+   At the moment the pass only handles one piece of information: whether the
+   sign of a value matters, and therefore whether sign-changing operations
+   can be skipped.  The pass could be extended to more interesting
+   information in future, such as which bits of an integer are significant.
+
+   For example, take the function:
+
+     double
+     f (double *a, int n, double start)
+     {
+       double x = fabs (start);
+       for (int i = 0; i < n; ++i)
+	 x *= a[i];
+       return __builtin_cos (x);
+     }
+
+   cos(x) == cos(-x), so the sign of the final x doesn't matter.
+   That x is the result of a series of multiplications, and if
+   the sign of the result of a multiplication doesn't matter,
+   the signs of the inputs don't matter either.
+
+   The pass would replace the incoming value of x (i.e. fabs(start))
+   with start.  Since there are no other uses of the fabs result,
+   the call would get deleted as dead.
+
+   The algorithm is:
+
+   (1) Do a post-order traversal of the blocks in the function, walking
+       each block backwards.  For each potentially-simplifiable statement
+       that defines an SSA name X, examine all uses of X to see what
+       information is actually significant.  Record this as INFO_MAP[X].
+       Optimistically ignore for now any back-edge references to
+       unprocessed phis.
+
+       (An alternative would be to record each use when we visit its
+       statement and take the intersection as we go along.  However,
+       this would lead to more SSA names being entered into INFO_MAP
+       unnecessarily, only to be taken out again later.  At the moment
+       very few SSA names end up with useful information.)
+
+   (2) Iteratively reduce the optimistic result of (1) until we reach
+       a maximal fixed point (which at the moment would mean revisiting
+       statements at most once).  First push all SSA names that used an
+       optimistic assumption about a backedge phi onto a worklist.
+       While the worklist is nonempty, pick off an SSA name X and recompute
+       INFO_MAP[X].  If the value changes, push all SSA names used in the
+       definition of X onto the worklist.
+
+   (3) Iterate over each SSA name X with info in INFO_MAP, in the
+       opposite order to (1), i.e. a forward reverse-post-order walk.
+       Try to optimize the definition of X using INFO_MAP[X] and fold
+       the result.  (This ensures that we fold definitions before uses.)
+
+   (4) Iterate over each SSA name X with info in INFO_MAP, in the same
+       order as (1), and delete any statements that are now dead.
+       (This ensures that if a sequence of statements is dead,
+       we delete the last statement first.)
+
+   Note that this pass does not deal with direct redundancies,
+   such as cos(-x)->cos(x).  match.pd handles those cases instead.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "ssa.h"
+#include "fold-const.h"
+#include "tree-pass.h"
+#include "cfganal.h"
+#include "gimple-pretty-print.h"
+#include "tree-cfg.h"
+#include "tree-ssa.h"
+#include "tree-ssa-propagate.h"
+#include "gimple-fold.h"
+#include "alloc-pool.h"
+#include "tree-hash-traits.h"
+
+namespace {
+
+/* Information about a group of uses of an SSA name.  */
+struct usage_info
+{
+  usage_info () : flag_word (0) {}
+  usage_info &operator &= (const usage_info &);
+  usage_info operator & (const usage_info &) const;
+  bool operator == (const usage_info &) const;
+  bool operator != (const usage_info &) const;
+  bool is_useful () const;
+
+  static usage_info intersection_identity ();
+
+  union
+  {
+    struct
+    {
+      /* True if the uses treat x and -x in the same way.  */
+      unsigned int ignore_sign : 1;
+    } flags;
+    /* All the flag bits as a single int.  */
+    unsigned int flag_word;
+  };
+};
+
+/* Return an X such that X & Y == Y for all Y.  This is the most
+   optimistic assumption possible.  */
+
+usage_info
+usage_info::intersection_identity ()
+{
+  usage_info ret;
+  ret.flag_word = -1;
+  return ret;
+}
+
+/* Intersect *THIS with OTHER, so that *THIS describes all uses covered
+   by the original *THIS and OTHER.  */
+
+usage_info &
+usage_info::operator &= (const usage_info &other)
+{
+  flag_word &= other.flag_word;
+  return *this;
+}
+
+/* Return the intersection of *THIS and OTHER, i.e. a structure that
+   describes all uses covered by *THIS and OTHER.  */
+
+usage_info
+usage_info::operator & (const usage_info &other) const
+{
+  usage_info info (*this);
+  info &= other;
+  return info;
+}
+
+bool
+usage_info::operator == (const usage_info &other) const
+{
+  return flag_word == other.flag_word;
+}
+
+bool
+usage_info::operator != (const usage_info &other) const
+{
+  return !operator == (other);
+}
+
+/* Return true if *THIS is not simply the default, safe assumption.  */
+
+bool
+usage_info::is_useful () const
+{
+  return flag_word != 0;
+}
+
+/* Start a dump line about SSA name VAR.  */
+
+static void
+dump_usage_prefix (FILE *file, tree var)
+{
+  fprintf (file, "  ");
+  print_generic_expr (file, var, 0);
+  fprintf (file, ": ");
+}
+
+/* Print INFO to FILE.  */
+
+static void
+dump_usage_info (FILE *file, tree var, usage_info *info)
+{
+  if (info->flags.ignore_sign)
+    {
+      dump_usage_prefix (file, var);
+      fprintf (file, "sign bit not important\n");
+    }
+}
+
+/* Represents one execution of the pass.  */
+class backprop
+{
+public:
+  backprop (function *);
+  ~backprop ();
+
+  void execute ();
+
+private:
+  const usage_info *lookup_operand (tree);
+
+  void push_to_worklist (tree);
+  tree pop_from_worklist ();
+
+  void process_builtin_call_use (gcall *, tree, usage_info *);
+  void process_assign_use (gassign *, tree, usage_info *);
+  void process_phi_use (gphi *, usage_info *);
+  void process_use (gimple *, tree, usage_info *);
+  bool intersect_uses (tree, usage_info *);
+  void reprocess_inputs (gimple *);
+  void process_var (tree);
+  void process_block (basic_block);
+
+  void prepare_change (tree);
+  void complete_change (gimple *);
+  void optimize_builtin_call (gcall *, tree, const usage_info *);
+  void replace_assign_rhs (gassign *, tree, tree, tree, tree);
+  void optimize_assign (gassign *, tree, const usage_info *);
+  void optimize_phi (gphi *, tree, const usage_info *);
+
+  typedef hash_map <tree_ssa_name_hash, usage_info *> info_map_type;
+  typedef std::pair <tree, usage_info *> var_info_pair;
+
+  /* The function we're optimizing.  */
+  function *m_fn;
+
+  /* Pool for allocating usage_info structures.  */
+  object_allocator <usage_info> m_info_pool;
+
+  /* Maps an SSA name to a description of all uses of that SSA name.
+     All the usage_infos satisfy is_useful.
+
+     We use a hash_map because the map is expected to be sparse
+     (i.e. most SSA names won't have useful information attached to them).
+     We could move to a directly-indexed array if that situation changes.  */
+  info_map_type m_info_map;
+
+  /* Post-ordered list of all potentially-interesting SSA names,
+     along with information that describes all uses.  */
+  auto_vec <var_info_pair, 128> m_vars;
+
+  /* A bitmap of blocks that we have finished processing in the initial
+     post-order walk.  */
+  sbitmap m_visited_blocks;
+
+  /* A worklist of SSA names whose definitions need to be reconsidered.  */
+  auto_vec <tree, 64> m_worklist;
+
+  /* The SSA names in M_WORKLIST, identified by their SSA_NAME_VERSION.
+     We use a bitmap rather than an sbitmap because most SSA names are
+     never added to the worklist.  */
+  bitmap m_worklist_names;
+};
+
+backprop::backprop (function *fn)
+  : m_fn (fn),
+    m_info_pool ("usage_info"),
+    m_visited_blocks (sbitmap_alloc (last_basic_block_for_fn (m_fn))),
+    m_worklist_names (BITMAP_ALLOC (NULL))
+{
+  bitmap_clear (m_visited_blocks);
+}
+
+backprop::~backprop ()
+{
+  BITMAP_FREE (m_worklist_names);
+  sbitmap_free (m_visited_blocks);
+  m_info_pool.release ();
+}
+
+/* Return usage information for general operand OP, or null if none.  */
+
+const usage_info *
+backprop::lookup_operand (tree op)
+{
+  if (op && TREE_CODE (op) == SSA_NAME)
+    {
+      usage_info **slot = m_info_map.get (op);
+      if (slot)
+	return *slot;
+    }
+  return NULL;
+}
+
+/* Add SSA name VAR to the worklist, if it isn't on the worklist already.  */
+
+void
+backprop::push_to_worklist (tree var)
+{
+  if (!bitmap_set_bit (m_worklist_names, SSA_NAME_VERSION (var)))
+    return;
+  m_worklist.safe_push (var);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "[WORKLIST] Pushing ");
+      print_generic_expr (dump_file, var, 0);
+      fprintf (dump_file, "\n");
+    }
+}
+
+/* Remove and return the next SSA name from the worklist.  The worklist
+   is known to be nonempty.  */
+
+tree
+backprop::pop_from_worklist ()
+{
+  tree var = m_worklist.pop ();
+  bitmap_clear_bit (m_worklist_names, SSA_NAME_VERSION (var));
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "[WORKLIST] Popping ");
+      print_generic_expr (dump_file, var, 0);
+      fprintf (dump_file, "\n");
+    }
+  return var;
+}
+
+/* Make INFO describe all uses of RHS in CALL, which is a call to a
+   built-in function.  */
+
+void
+backprop::process_builtin_call_use (gcall *call, tree rhs, usage_info *info)
+{
+  enum built_in_function fn = DECL_FUNCTION_CODE (gimple_call_fndecl (call));
+  tree lhs = gimple_call_lhs (call);
+  switch (fn)
+    {
+    CASE_FLT_FN (BUILT_IN_COS):
+    CASE_FLT_FN (BUILT_IN_COSH):
+    CASE_FLT_FN (BUILT_IN_CCOS):
+    CASE_FLT_FN (BUILT_IN_CCOSH):
+    CASE_FLT_FN (BUILT_IN_HYPOT):
+      /* The signs of all inputs are ignored.  */
+      info->flags.ignore_sign = true;
+      break;
+
+    CASE_FLT_FN (BUILT_IN_COPYSIGN):
+      /* The sign of the first input is ignored.  */
+      if (rhs != gimple_call_arg (call, 1))
+	info->flags.ignore_sign = true;
+      break;
+
+    CASE_FLT_FN (BUILT_IN_POW):
+      {
+	/* The sign of the first input is ignored as long as the second
+	   input is an even real.  */
+	tree power = gimple_call_arg (call, 1);
+	HOST_WIDE_INT n;
+	if (TREE_CODE (power) == REAL_CST
+	    && real_isinteger (&TREE_REAL_CST (power), &n)
+	    && (n & 1) == 0)
+	  info->flags.ignore_sign = true;
+	break;
+      }
+
+    CASE_FLT_FN (BUILT_IN_FMA):
+      /* In X * X + Y, where Y is distinct from X, the sign of X doesn't
+	 matter.  */
+      if (gimple_call_arg (call, 0) == rhs
+	  && gimple_call_arg (call, 1) == rhs
+	  && gimple_call_arg (call, 2) != rhs)
+	info->flags.ignore_sign = true;
+      break;
+
+    default:
+      if (negate_mathfn_p (fn))
+	{
+	  /* The sign of the (single) input doesn't matter provided
+	     that the sign of the output doesn't matter.  */
+	  const usage_info *lhs_info = lookup_operand (lhs);
+	  if (lhs_info)
+	    info->flags.ignore_sign = lhs_info->flags.ignore_sign;
+	}
+      break;
+    }
+}
+
+/* Make INFO describe all uses of RHS in ASSIGN.  */
+
+void
+backprop::process_assign_use (gassign *assign, tree rhs, usage_info *info)
+{
+  tree lhs = gimple_assign_lhs (assign);
+  switch (gimple_assign_rhs_code (assign))
+    {
+    case ABS_EXPR:
+      /* The sign of the input doesn't matter.  */
+      info->flags.ignore_sign = true;
+      break;
+
+    case COND_EXPR:
+      /* For A = B ? C : D, propagate information about all uses of A
+	 to C and D.  */
+      if (rhs != gimple_assign_rhs1 (assign))
+	{
+	  const usage_info *lhs_info = lookup_operand (lhs);
+	  if (lhs_info)
+	    *info = *lhs_info;
+	}
+      break;
+
+    case FMA_EXPR:
+      /* In X * X + Y, where Y is distinct from X, the sign of X doesn't
+	 matter.  */
+      if (gimple_assign_rhs1 (assign) == rhs
+	  && gimple_assign_rhs2 (assign) == rhs
+	  && gimple_assign_rhs3 (assign) != rhs)
+	info->flags.ignore_sign = true;
+      break;
+
+    case MULT_EXPR:
+      /* In X * X, the sign of X doesn't matter.  */
+      if (gimple_assign_rhs1 (assign) == rhs
+	  && gimple_assign_rhs2 (assign) == rhs)
+	info->flags.ignore_sign = true;
+      /* Fall through.  */
+
+    case NEGATE_EXPR:
+    case RDIV_EXPR:
+      /* If the sign of the result doesn't matter, the sign of the inputs
+	 doesn't matter either.  */
+      if (FLOAT_TYPE_P (TREE_TYPE (rhs)))
+	{
+	  const usage_info *lhs_info = lookup_operand (lhs);
+	  if (lhs_info)
+	    info->flags.ignore_sign = lhs_info->flags.ignore_sign;
+	}
+      break;
+
+    default:
+      break;
+    }
+}
+
+/* Make INFO describe the uses of PHI's result.  */
+
+void
+backprop::process_phi_use (gphi *phi, usage_info *info)
+{
+  tree result = gimple_phi_result (phi);
+  if (const usage_info *result_info = lookup_operand (result))
+    *info = *result_info;
+}
+
+/* Make INFO describe all uses of RHS in STMT.  */
+
+void
+backprop::process_use (gimple *stmt, tree rhs, usage_info *info)
+{
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "[USE] ");
+      print_generic_expr (dump_file, rhs, 0);
+      fprintf (dump_file, " in ");
+      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+    }
+
+  if (gcall *call = dyn_cast <gcall *> (stmt))
+    {
+      if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
+	process_builtin_call_use (call, rhs, info);
+    }
+  else if (gassign *assign = dyn_cast <gassign *> (stmt))
+    process_assign_use (assign, rhs, info);
+  else if (gphi *phi = dyn_cast <gphi *> (stmt))
+    process_phi_use (phi, info);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    dump_usage_info (dump_file, rhs, info);
+}
+
+/* Make INFO describe all uses of VAR, returning true if the result
+   is useful.  If the uses include phis that haven't been processed yet,
+   make the most optimistic assumption possible, so that we aim for
+   a maximum rather than a minimum fixed point.  */
+
+bool
+backprop::intersect_uses (tree var, usage_info *info)
+{
+  imm_use_iterator iter;
+  gimple *stmt;
+  *info = usage_info::intersection_identity ();
+  FOR_EACH_IMM_USE_STMT (stmt, iter, var)
+    {
+      if (is_gimple_debug (stmt))
+	continue;
+      if (is_a <gphi *> (stmt)
+	  && !bitmap_bit_p (m_visited_blocks, gimple_bb (stmt)->index))
+	{
+	  /* Skip unprocessed phis.  */
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "[BACKEDGE] ");
+	      print_generic_expr (dump_file, var, 0);
+	      fprintf (dump_file, " in ");
+	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+	    }
+	}
+      else
+	{
+	  usage_info subinfo;
+	  process_use (stmt, var, &subinfo);
+	  *info &= subinfo;
+	  if (!info->is_useful ())
+	    {
+	      BREAK_FROM_IMM_USE_STMT (iter);
+	      return false;
+	    }
+	}
+    }
+  return true;
+}
+
+/* Queue for reconsideration any input of STMT that has information
+   associated with it.  This is used if that information might be
+   too optimistic.  */
+
+void
+backprop::reprocess_inputs (gimple *stmt)
+{
+  use_operand_p use_p;
+  ssa_op_iter oi;
+  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
+    {
+      tree var = get_use_from_ptr (use_p);
+      if (lookup_operand (var))
+	push_to_worklist (var);
+    }
+}
+
+/* Say that we're recording INFO for SSA name VAR, or that we're deleting
+   existing information if INFO is null.  INTRO describes the change.  */
+
+static void
+dump_var_info (tree var, usage_info *info, const char *intro)
+{
+  fprintf (dump_file, "[DEF] %s for ", intro);
+  print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (var), 0, TDF_SLIM);
+  if (info)
+    dump_usage_info (dump_file, var, info);
+}
+
+/* Process all uses of VAR and record or update the result in
+   M_INFO_MAP and M_VARS.  */
+
+void
+backprop::process_var (tree var)
+{
+  if (has_zero_uses (var))
+    return;
+
+  usage_info info;
+  intersect_uses (var, &info);
+
+  gimple *stmt = SSA_NAME_DEF_STMT (var);
+  if (info.is_useful ())
+    {
+      bool existed;
+      usage_info *&map_info = m_info_map.get_or_insert (var, &existed);
+      if (!existed)
+	{
+	  /* Recording information about VAR for the first time.  */
+	  map_info = m_info_pool.allocate ();
+	  *map_info = info;
+	  m_vars.safe_push (var_info_pair (var, map_info));
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    dump_var_info (var, map_info, "Recording new information");
+
+	  /* If STMT is a phi, reprocess any backedge uses.  This is a
+	     no-op for other uses, which won't have any information
+	     associated with them.  */
+	  if (is_a <gphi *> (stmt))
+	    reprocess_inputs (stmt);
+	}
+      else if (info != *map_info)
+	{
+	  /* Recording information that is less optimistic than before.  */
+	  gcc_checking_assert ((info & *map_info) == info);
+	  *map_info = info;
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    dump_var_info (var, map_info, "Updating information");
+	  reprocess_inputs (stmt);
+	}
+    }
+  else
+    {
+      if (usage_info **slot = m_info_map.get (var))
+	{
+	  /* Removing previously-recorded information.  */
+	  **slot = info;
+	  m_info_map.remove (var);
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    dump_var_info (var, NULL, "Deleting information");
+	  reprocess_inputs (stmt);
+	}
+      else
+	{
+	  /* If STMT is a phi, remove any information recorded for
+	     its arguments.  */
+	  if (is_a <gphi *> (stmt))
+	    reprocess_inputs (stmt);
+	}
+    }
+}
+
+/* Process all statements and phis in BB, during the first post-order walk.  */
+
+void
+backprop::process_block (basic_block bb)
+{
+  for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
+       gsi_prev (&gsi))
+    {
+      tree lhs = gimple_get_lhs (gsi_stmt (gsi));
+      if (lhs && TREE_CODE (lhs) == SSA_NAME)
+	process_var (lhs);
+    }
+  for (gphi_iterator gpi = gsi_start_phis (bb); !gsi_end_p (gpi);
+       gsi_next (&gpi))
+    process_var (gimple_phi_result (gpi.phi ()));
+}
+
+/* Delete the definition of VAR, which has no uses.  */
+
+static void
+remove_unused_var (tree var)
+{
+  gimple *stmt = SSA_NAME_DEF_STMT (var);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "Deleting ");
+      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+    }
+  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+  gsi_remove (&gsi, true);
+  release_defs (stmt);
+}
+
+/* Note that we're replacing OLD_RHS with NEW_RHS in STMT.  */
+
+static void
+note_replacement (gimple *stmt, tree old_rhs, tree new_rhs)
+{
+  fprintf (dump_file, "Replacing use of ");
+  print_generic_expr (dump_file, old_rhs, 0);
+  fprintf (dump_file, " with ");
+  print_generic_expr (dump_file, new_rhs, 0);
+  fprintf (dump_file, " in ");
+  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+}
+
+/* If RHS is an SSA name whose definition just changes the sign of a value,
+   return that other value, otherwise return null.  */
+
+static tree
+strip_sign_op_1 (tree rhs)
+{
+  if (TREE_CODE (rhs) != SSA_NAME)
+    return NULL_TREE;
+
+  gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
+  if (gassign *assign = dyn_cast <gassign *> (def_stmt))
+    switch (gimple_assign_rhs_code (assign))
+      {
+      case ABS_EXPR:
+      case NEGATE_EXPR:
+	return gimple_assign_rhs1 (assign);
+
+      default:
+	break;
+      }
+  else if (gcall *call = dyn_cast <gcall *> (def_stmt))
+    {
+      if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
+	switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
+	  {
+	  CASE_FLT_FN (BUILT_IN_COPYSIGN):
+	    return gimple_call_arg (call, 0);
+
+	  default:
+	    break;
+	  }
+    }
+
+  return NULL_TREE;
+}
+
+/* If RHS is an SSA name whose definition just changes the sign of a value,
+   strip all such operations and return the ultimate input to them.
+   Return null otherwise.
+
+   Although this could in principle lead to quadratic searching,
+   in practice a long sequence of sign manipulations should already
+   have been folded down.  E.g. --x -> x, abs(-x) -> abs(x).  We search
+   for more than one operation in order to catch cases like -abs(x).  */
+
+static tree
+strip_sign_op (tree rhs)
+{
+  tree new_rhs = strip_sign_op_1 (rhs);
+  if (!new_rhs)
+    return NULL_TREE;
+  while (tree next = strip_sign_op_1 (new_rhs))
+    new_rhs = next;
+  return new_rhs;
+}
+
+/* Start a change in the value of VAR that is suitable for all non-debug
+   uses of VAR.  We need to make sure that debug statements continue to
+   use the original definition of VAR where possible, or are nullified
+   otherwise.  */
+
+void
+backprop::prepare_change (tree var)
+{
+  if (MAY_HAVE_DEBUG_STMTS)
+    insert_debug_temp_for_var_def (NULL, var);
+}
+
+/* STMT has been changed.  Give the fold machinery a chance to simplify
+   and canonicalize it (e.g. by ensuring that commutative operands have
+   the right order), then record the updates.  */
+
+void
+backprop::complete_change (gimple *stmt)
+{
+  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+  if (fold_stmt (&gsi))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file, "  which folds to: ");
+	  print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_SLIM);
+	}
+    }
+  update_stmt (gsi_stmt (gsi));
+}
+
+/* Optimize CALL, a call to a built-in function with lhs LHS, on the
+   basis that INFO describes all uses of LHS.  */
+
+void
+backprop::optimize_builtin_call (gcall *call, tree lhs, const usage_info *info)
+{
+  tree fndecl = gimple_call_fndecl (call);
+  enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
+  /* If we have an f such that -f(x) = f(-x), and if the sign of the result
+     doesn't matter, strip any sign operations from the input.  */
+  if (info->flags.ignore_sign && negate_mathfn_p (fn))
+    {
+      tree new_arg = strip_sign_op (gimple_call_arg (call, 0));
+      if (new_arg)
+	{
+	  prepare_change (lhs);
+	  gimple_call_set_arg (call, 0, new_arg);
+	  complete_change (call);
+	}
+    }
+}
+
+/* Optimize ASSIGN, an assignment to LHS, by replacing rhs operand N
+   with RHS<N>, if RHS<N> is nonnull.  This may change the value of LHS.  */
+
+void
+backprop::replace_assign_rhs (gassign *assign, tree lhs, tree rhs1,
+			      tree rhs2, tree rhs3)
+{
+  if (!rhs1 && !rhs2 && !rhs3)
+    return;
+
+  prepare_change (lhs);
+  if (rhs1)
+    gimple_assign_set_rhs1 (assign, rhs1);
+  if (rhs2)
+    gimple_assign_set_rhs2 (assign, rhs2);
+  if (rhs3)
+    gimple_assign_set_rhs3 (assign, rhs3);
+  complete_change (assign);
+}
+
+/* Optimize ASSIGN, an assignment to LHS, on the basis that INFO
+   describes all uses of LHS.  */
+
+void
+backprop::optimize_assign (gassign *assign, tree lhs, const usage_info *info)
+{
+  switch (gimple_assign_rhs_code (assign))
+    {
+    case MULT_EXPR:
+    case RDIV_EXPR:
+      /* If the sign of the result doesn't matter, strip sign operations
+	 from both inputs.  */
+      if (info->flags.ignore_sign)
+	replace_assign_rhs (assign, lhs,
+			    strip_sign_op (gimple_assign_rhs1 (assign)),
+			    strip_sign_op (gimple_assign_rhs2 (assign)),
+			    NULL_TREE);
+      break;
+
+    case COND_EXPR:
+      /* If the sign of A ? B : C doesn't matter, strip sign operations
+	 from both B and C.  */
+      if (info->flags.ignore_sign)
+	replace_assign_rhs (assign, lhs,
+			    NULL_TREE,
+			    strip_sign_op (gimple_assign_rhs2 (assign)),
+			    strip_sign_op (gimple_assign_rhs3 (assign)));
+      break;
+
+    default:
+      break;
+    }
+}
+
+/* Optimize PHI, which defines VAR, on the basis that INFO describes all
+   uses of the result.  */
+
+void
+backprop::optimize_phi (gphi *phi, tree var, const usage_info *info)
+{
+  /* If the sign of the result doesn't matter, strip sign operations
+     from all arguments.  */
+  if (info->flags.ignore_sign)
+    {
+      use_operand_p use;
+      ssa_op_iter oi;
+      bool replaced = false;
+      FOR_EACH_PHI_ARG (use, phi, oi, SSA_OP_USE)
+	{
+	  tree new_arg = strip_sign_op (USE_FROM_PTR (use));
+	  if (new_arg)
+	    {
+	      if (!replaced)
+		prepare_change (var);
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		note_replacement (phi, USE_FROM_PTR (use), new_arg);
+	      replace_exp (use, new_arg);
+	      replaced = true;
+	    }
+	}
+    }
+}
+
+void
+backprop::execute ()
+{
+  /* Phase 1: Traverse the function, making optimistic assumptions
+     about any phi whose definition we haven't seen.  */
+  int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (m_fn));
+  unsigned int postorder_num = post_order_compute (postorder, false, false);
+  for (unsigned int i = 0; i < postorder_num; ++i)
+    {
+      process_block (BASIC_BLOCK_FOR_FN (m_fn, postorder[i]));
+      bitmap_set_bit (m_visited_blocks, postorder[i]);
+    }
+  XDELETEVEC (postorder);
+
+  /* Phase 2: Use the initial (perhaps overly optimistic) information
+     to create a maximal fixed point solution.  */
+  while (!m_worklist.is_empty ())
+    process_var (pop_from_worklist ());
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "\n");
+
+  /* Phase 3: Do a reverse post-order walk, using information about
+     the uses of SSA names to optimize their definitions.  */
+  for (unsigned int i = m_vars.length (); i-- > 0;)
+    {
+      usage_info *info = m_vars[i].second;
+      if (info->is_useful ())
+	{
+	  tree var = m_vars[i].first;
+	  gimple *stmt = SSA_NAME_DEF_STMT (var);
+	  if (gcall *call = dyn_cast <gcall *> (stmt))
+	    {
+	      if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
+		optimize_builtin_call (call, var, info);
+	    }
+	  else if (gassign *assign = dyn_cast <gassign *> (stmt))
+	    optimize_assign (assign, var, info);
+	  else if (gphi *phi = dyn_cast <gphi *> (stmt))
+	    optimize_phi (phi, var, info);
+	}
+    }
+
+  /* Phase 4: Do a post-order walk, deleting statements that are no
+     longer needed.  */
+  for (unsigned int i = 0; i < m_vars.length (); ++i)
+    {
+      tree var = m_vars[i].first;
+      if (has_zero_uses (var))
+	remove_unused_var (var);
+    }
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "\n");
+}
+
+const pass_data pass_data_backprop =
+{
+  GIMPLE_PASS, /* type */
+  "backprop", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_TREE_BACKPROP, /* tv_id */
+  ( PROP_cfg | PROP_ssa ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_backprop : public gimple_opt_pass
+{
+public:
+  pass_backprop (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_backprop, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  opt_pass * clone () { return new pass_backprop (m_ctxt); }
+  virtual bool gate (function *) { return flag_ssa_backprop; }
+  virtual unsigned int execute (function *);
+
+}; // class pass_backprop
+
+unsigned int
+pass_backprop::execute (function *fn)
+{
+  backprop (fn).execute ();
+  return 0;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_backprop (gcc::context *ctxt)
+{
+  return new pass_backprop (ctxt);
+}
Index: gcc/testsuite/gcc.dg/tree-ssa/backprop-1.c
===================================================================
--- /dev/null	2015-10-14 14:19:37.164773004 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/backprop-1.c	2015-10-21 09:24:16.663564880 +0100
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O -g -fdump-tree-backprop-details" }  */
+
+/* Test a simple case of non-looping code in which both uses ignore
+   the sign and both definitions are sign ops.  */
+#define TEST_FUNCTION(TYPE, SUFFIX)				\
+  TYPE								\
+  test##SUFFIX (TYPE x, int sel1, int sel2)			\
+  {								\
+    TYPE input = sel1 ? -x : __builtin_fabs##SUFFIX (x);	\
+    if (sel2)							\
+      return __builtin_cos##SUFFIX (input);			\
+    else							\
+      return __builtin_cosh##SUFFIX (input);			\
+  }
+
+TEST_FUNCTION (float, f)
+TEST_FUNCTION (double, )
+TEST_FUNCTION (long double, l)
+
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -x} 3 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <x} 3 "backprop" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/backprop-2.c
===================================================================
--- /dev/null	2015-10-14 14:19:37.164773004 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/backprop-2.c	2015-10-21 09:24:16.663564880 +0100
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O -g -fdump-tree-backprop-details" }  */
+
+/* Test a simple case of non-looping code in which both uses ignore
+   the sign but only one definition is a sign op.  */
+#define TEST_FUNCTION(TYPE, SUFFIX)				\
+  TYPE								\
+  test##SUFFIX (TYPE x, int sel1, int sel2)			\
+  {								\
+    TYPE input = sel1 ? -x : x + 1;				\
+    if (sel2)							\
+      return __builtin_cos##SUFFIX (input);			\
+    else							\
+      return __builtin_cosh##SUFFIX (input);			\
+  }
+
+TEST_FUNCTION (float, f)
+TEST_FUNCTION (double, )
+TEST_FUNCTION (long double, l)
+
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -x} 3 "backprop" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/backprop-3.c
===================================================================
--- /dev/null	2015-10-14 14:19:37.164773004 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/backprop-3.c	2015-10-21 09:24:16.663564880 +0100
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O -g -fdump-tree-backprop-details" }  */
+
+/* Test a simple case of non-looping code in which one use ignores
+   the sign but another doesn't.  */
+#define TEST_FUNCTION(TYPE, SUFFIX)				\
+  TYPE								\
+  test##SUFFIX (TYPE x, int sel1, int sel2)			\
+  {								\
+    TYPE input = sel1 ? -x : __builtin_fabs##SUFFIX (x);	\
+    if (sel2)							\
+      return __builtin_cos##SUFFIX (input);			\
+    else							\
+      return __builtin_sin##SUFFIX (input);			\
+  }
+
+TEST_FUNCTION (float, f)
+TEST_FUNCTION (double, )
+TEST_FUNCTION (long double, l)
+
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -x} 0 "backprop" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/backprop-4.c
===================================================================
--- /dev/null	2015-10-14 14:19:37.164773004 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/backprop-4.c	2015-10-21 09:24:16.663564880 +0100
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O -g -fdump-tree-backprop-details" }  */
+
+/* Test a simple reduction loop in which all inputs are sign ops and
+   the consumer doesn't care about the sign.  */
+#define TEST_FUNCTION(TYPE, SUFFIX)			\
+  TYPE							\
+  test##SUFFIX (TYPE x, TYPE y, TYPE *array, int n)	\
+  {							\
+    x = __builtin_copysign##SUFFIX (x, y);		\
+    for (int i = 0; i < n; ++i)				\
+      x *= -array[i];					\
+    return __builtin_hypot##SUFFIX (x, y);		\
+  }
+
+TEST_FUNCTION (float, f)
+TEST_FUNCTION (double, )
+TEST_FUNCTION (long double, l)
+
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = __builtin_copysign} 3 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 3 "backprop" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/backprop-5.c
===================================================================
--- /dev/null	2015-10-14 14:19:37.164773004 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/backprop-5.c	2015-10-21 09:24:16.663564880 +0100
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O -g -fdump-tree-backprop-details" }  */
+
+/* Test a loop that does both a multiplication and addition.  The addition
+   should prevent any sign ops from being removed.  */
+#define TEST_FUNCTION(TYPE, SUFFIX)			\
+  TYPE							\
+  test##SUFFIX (TYPE x, TYPE y, TYPE *array, int n)	\
+  {							\
+    x = __builtin_copysign##SUFFIX (x, y);		\
+    for (int i = 0; i < n; ++i)				\
+      x = (x + 1) * array[i];				\
+    return __builtin_hypot##SUFFIX (x, y);		\
+  }
+
+TEST_FUNCTION (float, f)
+TEST_FUNCTION (double, )
+TEST_FUNCTION (long double, l)
+
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = __builtin_copysign} 0 "backprop" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
===================================================================
--- /dev/null	2015-10-14 14:19:37.164773004 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c	2015-10-21 09:24:16.663564880 +0100
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-backprop-details" }  */
+
+void start (void *);
+void end (void *);
+
+/* Test that we optimize the contents of infinite loops.  */
+#define TEST_FUNCTION(TYPE, SUFFIX)			\
+  void							\
+  test##SUFFIX (TYPE *array, TYPE y, int n)		\
+  {							\
+    for (;;)						\
+      {							\
+	start (array);					\
+	TYPE x = -__builtin_fabs##SUFFIX (array[-1]);	\
+	for (int i = 0; i < n; ++i)			\
+	  x = -x / array[i];				\
+	array[-1] = x * x;				\
+	array[-2] = __builtin_fma##SUFFIX (x, x, y);	\
+	array[-3] = __builtin_pow##SUFFIX (x, 20);	\
+	end (array);					\
+      }							\
+  }
+
+TEST_FUNCTION (float, f)
+TEST_FUNCTION (double, )
+TEST_FUNCTION (long double, l)
+
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Add a pass to back-propagate use information
  2015-10-21  8:49       ` Richard Sandiford
@ 2015-10-21  9:58         ` Richard Biener
  0 siblings, 0 replies; 7+ messages in thread
From: Richard Biener @ 2015-10-21  9:58 UTC (permalink / raw)
  To: Richard Biener, GCC Patches, richard.sandiford

On Wed, Oct 21, 2015 at 10:48 AM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> Richard Biener <richard.guenther@gmail.com> writes:
>> On Mon, Oct 19, 2015 at 2:38 PM, Richard Sandiford
>> <richard.sandiford@arm.com> wrote:
>>> Richard Biener <richard.guenther@gmail.com> writes:
>>>> On Thu, Oct 15, 2015 at 3:17 PM, Richard Sandiford
>>>> <richard.sandiford@arm.com> wrote:
>>>>> +/* Strip all sign operations from the rvalue at *RHS_PTR in STMT.
>>>>> +   Return true if something changed.  The caller is responsible
>>>>> +   for the necessary bookkeeping.  */
>>>>> +
>>>>> +static bool
>>>>> +strip_sign_op (gimple *stmt, tree *rhs_ptr)
>>>>> +{
>>>>> +  if (tree new_rhs = strip_sign_op (*rhs_ptr))
>>>>> +    {
>>>>> +      if (dump_file && (dump_flags & TDF_DETAILS))
>>>>> +       note_replacement (stmt, *rhs_ptr, new_rhs);
>>>>> +      *rhs_ptr = new_rhs;
>>>>
>>>> So it looks you are only changing stmts when the stmt result produces
>>>> the same value.  Just double-checking, as otherwise you'd need to care
>>>> about debug stmts ...
>>>
>>> No, it can change values, like the case you saw later for phis.
>>> This applies to all the cases where the optimisation depends on the
>>> propagated info for the lhs, rather than being inherent to the operation.
>>> So e.g. we can change the value of A in A = B * C, if all uses of A
>>> don't care about the sign.
>>>
>>> At the moment the only change we can make is that the result could be
>>> the negative of its original value.
>>
>> Ok, so then there is the debug issue.  Consider
>>
>>   x = ...;
>>
>> which you change the sign for.  The user in gdb when printing 'x' needs to
>> see the original value or "optimized out", not the negated value.  This means
>> you have to replace the LHS of the stmt with a new SSA name which is
>> best done (with proper debug effects) by removing the original stmt and
>> replacing all uses of the LHS with the new stmt lhs.  Note that the
>> same is true for all derived values, so even if you only change
>>
>>  _1 = ...;
>>
>> (no user visible value) then a derived value
>>
>>
>>  x_2 = _1 + 2;
>>
>> might change sign.
>>
>> Well, you have to think about it at least ;)
>
> When addressing this, I tried to look for a way of doing it without
> creating new SSA names and statements, since that should be much
> simpler to code and hopefully also more efficient.  I noticed
> gimple_replace_ssa_lhs, which seems to do what I want for assignments
> and calls.  It copes with cases where the "new" lhs is different from
> the old lhs and also cases where it's the same.
>
> In backprop there shouldn't be any need for a new lhs once debug uses
> have been updated.  All non-debug uses can cope with the new value so
> they might as well continue to use the same SSA name.  The pass therefore
> only needs the old lhs == new lhs behaviour.  It also needs to do the
> same thing for phis, which aren't handled by gimple_replace_ssa_lhs.
> In the end I simply took the active part of gimple_replace_ssa_lhs for
> old lhs == new lhs:
>
>   if (MAY_HAVE_DEBUG_STMTS)
>     insert_debug_temp_for_var_def (NULL, lhs);
>
> and used it directly.  Hope that's OK.

Heh, didn't know we have such helper, will try to remember.  Yes, the above
looks ok to me.

> I also added -g to the dg-options lines of the tests.
>
>> So apart from the debug issue the patch looks fine then.
>
> Here's the updated patch.  Tested on x86_64-linux-gnu, aarch64-linux-gnu
> and arm-linux-gnueabi.  OK to install?

Ok.

Thanks,
Richard.

> Thanks,
> Richard
>
>
> gcc/
>         * doc/invoke.texi (-fdump-tree-backprop, -fssa-backprop): Document.
>         * Makefile.in (OBJS): Add gimple-ssa-backprop.o.
>         * common.opt (fssa-backprop): New option.
>         * fold-const.h (negate_mathfn_p): Declare.
>         * fold-const.c (negate_mathfn_p): Make public.
>         * timevar.def (TV_TREE_BACKPROP): New.
>         * tree-pass.h (make_pass_backprop): Declare.
>         * passes.def (pass_backprop): Add.
>         * gimple-ssa-backprop.c: New file.
>
> gcc/testsuite/
>         * gcc.dg/tree-ssa/backprop-1.c, gcc.dg/tree-ssa/backprop-2.c,
>         gcc.dg/tree-ssa/backprop-3.c, gcc.dg/tree-ssa/backprop-4.c,
>         gcc.dg/tree-ssa/backprop-5.c, gcc.dg/tree-ssa/backprop-6.c: New tests.
>
> Index: gcc/doc/invoke.texi
> ===================================================================
> --- gcc/doc/invoke.texi 2015-10-21 08:43:48.375797607 +0100
> +++ gcc/doc/invoke.texi 2015-10-21 09:24:16.659564928 +0100
> @@ -343,6 +343,7 @@ Objective-C and Objective-C++ Dialects}.
>  -fdump-tree-dse@r{[}-@var{n}@r{]} @gol
>  -fdump-tree-phiprop@r{[}-@var{n}@r{]} @gol
>  -fdump-tree-phiopt@r{[}-@var{n}@r{]} @gol
> +-fdump-tree-backprop@r{[}-@var{n}@r{]} @gol
>  -fdump-tree-forwprop@r{[}-@var{n}@r{]} @gol
>  -fdump-tree-nrv -fdump-tree-vect @gol
>  -fdump-tree-sink @gol
> @@ -443,9 +444,9 @@ Objective-C and Objective-C++ Dialects}.
>  -fschedule-insns -fschedule-insns2 -fsection-anchors @gol
>  -fselective-scheduling -fselective-scheduling2 @gol
>  -fsel-sched-pipelining -fsel-sched-pipelining-outer-loops @gol
> --fsemantic-interposition @gol
> --fshrink-wrap -fsignaling-nans -fsingle-precision-constant @gol
> --fsplit-ivs-in-unroller -fsplit-wide-types -fssa-phiopt @gol
> +-fsemantic-interposition -fshrink-wrap -fsignaling-nans @gol
> +-fsingle-precision-constant -fsplit-ivs-in-unroller @gol
> +-fsplit-wide-types -fssa-backprop -fssa-phiopt @gol
>  -fstack-protector -fstack-protector-all -fstack-protector-strong @gol
>  -fstack-protector-explicit -fstdarg-opt -fstrict-aliasing @gol
>  -fstrict-overflow -fthread-jumps -ftracer -ftree-bit-ccp @gol
> @@ -7236,6 +7237,12 @@ name is made by appending @file{.dse} to
>  Dump each function after optimizing PHI nodes into straightline code.  The file
>  name is made by appending @file{.phiopt} to the source file name.
>
> +@item backprop
> +@opindex fdump-tree-backprop
> +Dump each function after back-propagating use information up the definition
> +chain.  The file name is made by appending @file{.backprop} to the
> +source file name.
> +
>  @item forwprop
>  @opindex fdump-tree-forwprop
>  Dump each function after forward propagating single use variables.  The file
> @@ -7707,9 +7714,10 @@ compilation time.
>  -freorder-blocks @gol
>  -fshrink-wrap @gol
>  -fsplit-wide-types @gol
> +-fssa-backprop @gol
> +-fssa-phiopt @gol
>  -ftree-bit-ccp @gol
>  -ftree-ccp @gol
> --fssa-phiopt @gol
>  -ftree-ch @gol
>  -ftree-coalesce-vars @gol
>  -ftree-copy-prop @gol
> @@ -8795,6 +8803,13 @@ Perform sparse conditional constant prop
>  pass only operates on local scalar variables and is enabled by default
>  at @option{-O} and higher.
>
> +@item -fssa-backprop
> +@opindex fssa-backprop
> +Propagate information about uses of a value up the definition chain
> +in order to simplify the definitions.  For example, this pass strips
> +sign operations if the sign of a value never matters.  The flag is
> +enabled by default at @option{-O} and higher.
> +
>  @item -fssa-phiopt
>  @opindex fssa-phiopt
>  Perform pattern matching on SSA PHI nodes to optimize conditional
> Index: gcc/Makefile.in
> ===================================================================
> --- gcc/Makefile.in     2015-10-21 08:43:49.879780139 +0100
> +++ gcc/Makefile.in     2015-10-21 09:24:16.655564975 +0100
> @@ -1266,6 +1266,7 @@ OBJS = \
>         gimple-laddress.o \
>         gimple-low.o \
>         gimple-pretty-print.o \
> +       gimple-ssa-backprop.o \
>         gimple-ssa-isolate-paths.o \
>         gimple-ssa-strength-reduction.o \
>         gimple-streamer-in.o \
> Index: gcc/common.opt
> ===================================================================
> --- gcc/common.opt      2015-10-21 08:43:49.879780139 +0100
> +++ gcc/common.opt      2015-10-21 09:24:16.655564975 +0100
> @@ -2114,6 +2114,10 @@ fsplit-wide-types
>  Common Report Var(flag_split_wide_types) Optimization
>  Split wide types into independent registers
>
> +fssa-backprop
> +Common Report Var(flag_ssa_backprop) Init(1) Optimization
> +Enable backward propagation of use properties at the SSA level.
> +
>  fssa-phiopt
>  Common Report Var(flag_ssa_phiopt) Optimization
>  Optimize conditional patterns using SSA PHI nodes
> Index: gcc/fold-const.h
> ===================================================================
> --- gcc/fold-const.h    2015-10-08 17:49:05.054371324 +0100
> +++ gcc/fold-const.h    2015-10-21 09:24:16.663564880 +0100
> @@ -173,6 +173,7 @@ extern tree sign_bit_p (tree, const_tree
>  extern tree exact_inverse (tree, tree);
>  extern tree const_unop (enum tree_code, tree, tree);
>  extern tree const_binop (enum tree_code, tree, tree, tree);
> +extern bool negate_mathfn_p (enum built_in_function);
>
>  /* Return OFF converted to a pointer offset type suitable as offset for
>     POINTER_PLUS_EXPR.  Use location LOC for this conversion.  */
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c    2015-10-21 09:24:01.443746048 +0100
> +++ gcc/fold-const.c    2015-10-21 09:24:16.663564880 +0100
> @@ -112,7 +112,6 @@ enum comparison_code {
>    COMPCODE_TRUE = 15
>  };
>
> -static bool negate_mathfn_p (enum built_in_function);
>  static bool negate_expr_p (tree);
>  static tree negate_expr (tree);
>  static tree split_tree (tree, enum tree_code, tree *, tree *, tree *, int);
> @@ -321,7 +320,7 @@ fold_overflow_warning (const char* gmsgi
>  /* Return true if the built-in mathematical function specified by CODE
>     is odd, i.e. -f(x) == f(-x).  */
>
> -static bool
> +bool
>  negate_mathfn_p (enum built_in_function code)
>  {
>    switch (code)
> Index: gcc/timevar.def
> ===================================================================
> --- gcc/timevar.def     2015-10-21 08:43:48.675794123 +0100
> +++ gcc/timevar.def     2015-10-21 09:24:16.663564880 +0100
> @@ -164,6 +164,7 @@ DEFTIMEVAR (TV_TREE_PRE                  , "tree PR
>  DEFTIMEVAR (TV_TREE_FRE                     , "tree FRE")
>  DEFTIMEVAR (TV_TREE_SINK             , "tree code sinking")
>  DEFTIMEVAR (TV_TREE_PHIOPT          , "tree linearize phis")
> +DEFTIMEVAR (TV_TREE_BACKPROP        , "tree backward propagate")
>  DEFTIMEVAR (TV_TREE_FORWPROP        , "tree forward propagate")
>  DEFTIMEVAR (TV_TREE_PHIPROP         , "tree phiprop")
>  DEFTIMEVAR (TV_TREE_DCE                     , "tree conservative DCE")
> Index: gcc/tree-pass.h
> ===================================================================
> --- gcc/tree-pass.h     2015-10-15 10:49:00.889300501 +0100
> +++ gcc/tree-pass.h     2015-10-21 09:24:16.663564880 +0100
> @@ -497,6 +497,7 @@ extern gimple_opt_pass *make_pass_cleanu
>                                                                *ctxt);
>  extern gimple_opt_pass *make_pass_init_datastructures (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_fixup_cfg (gcc::context *ctxt);
> +extern gimple_opt_pass *make_pass_backprop (gcc::context *ctxt);
>
>  extern rtl_opt_pass *make_pass_expand (gcc::context *ctxt);
>  extern rtl_opt_pass *make_pass_instantiate_virtual_regs (gcc::context *ctxt);
> Index: gcc/passes.def
> ===================================================================
> --- gcc/passes.def      2015-10-15 10:48:46.713465500 +0100
> +++ gcc/passes.def      2015-10-21 09:24:16.663564880 +0100
> @@ -159,6 +159,7 @@ along with GCC; see the file COPYING3.
>        /* After CCP we rewrite no longer addressed locals into SSA
>          form if possible.  */
>        NEXT_PASS (pass_complete_unrolli);
> +      NEXT_PASS (pass_backprop);
>        NEXT_PASS (pass_phiprop);
>        NEXT_PASS (pass_forwprop);
>        NEXT_PASS (pass_object_sizes);
> Index: gcc/gimple-ssa-backprop.c
> ===================================================================
> --- /dev/null   2015-10-14 14:19:37.164773004 +0100
> +++ gcc/gimple-ssa-backprop.c   2015-10-21 09:24:16.663564880 +0100
> @@ -0,0 +1,956 @@
> +/* Back-propagation of usage information to definitions.
> +   Copyright (C) 2015 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify
> +it under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful,
> +but WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +GNU General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +/* This pass propagates information that is common to all uses of an SSA
> +   name back up through the sequence of statements that generate it,
> +   simplifying the statements where possible.  Sometimes this can expose
> +   fully or partially dead code, but the main focus is simplifying
> +   computations.
> +
> +   At the moment the pass only handles one piece of information: whether the
> +   sign of a value matters, and therefore whether sign-changing operations
> +   can be skipped.  The pass could be extended to more interesting
> +   information in future, such as which bits of an integer are significant.
> +
> +   For example, take the function:
> +
> +     double
> +     f (double *a, int n, double start)
> +     {
> +       double x = fabs (start);
> +       for (int i = 0; i < n; ++i)
> +        x *= a[i];
> +       return __builtin_cos (x);
> +     }
> +
> +   cos(x) == cos(-x), so the sign of the final x doesn't matter.
> +   That x is the result of a series of multiplications, and if
> +   the sign of the result of a multiplication doesn't matter,
> +   the signs of the inputs don't matter either.
> +
> +   The pass would replace the incoming value of x (i.e. fabs(start))
> +   with start.  Since there are no other uses of the fabs result,
> +   the call would get deleted as dead.
> +
> +   The algorithm is:
> +
> +   (1) Do a post-order traversal of the blocks in the function, walking
> +       each block backwards.  For each potentially-simplifiable statement
> +       that defines an SSA name X, examine all uses of X to see what
> +       information is actually significant.  Record this as INFO_MAP[X].
> +       Optimistically ignore for now any back-edge references to
> +       unprocessed phis.
> +
> +       (An alternative would be to record each use when we visit its
> +       statement and take the intersection as we go along.  However,
> +       this would lead to more SSA names being entered into INFO_MAP
> +       unnecessarily, only to be taken out again later.  At the moment
> +       very few SSA names end up with useful information.)
> +
> +   (2) Iteratively reduce the optimistic result of (1) until we reach
> +       a maximal fixed point (which at the moment would mean revisiting
> +       statements at most once).  First push all SSA names that used an
> +       optimistic assumption about a backedge phi onto a worklist.
> +       While the worklist is nonempty, pick off an SSA name X and recompute
> +       INFO_MAP[X].  If the value changes, push all SSA names used in the
> +       definition of X onto the worklist.
> +
> +   (3) Iterate over each SSA name X with info in INFO_MAP, in the
> +       opposite order to (1), i.e. a forward reverse-post-order walk.
> +       Try to optimize the definition of X using INFO_MAP[X] and fold
> +       the result.  (This ensures that we fold definitions before uses.)
> +
> +   (4) Iterate over each SSA name X with info in INFO_MAP, in the same
> +       order as (1), and delete any statements that are now dead.
> +       (This ensures that if a sequence of statements is dead,
> +       we delete the last statement first.)
> +
> +   Note that this pass does not deal with direct redundancies,
> +   such as cos(-x)->cos(x).  match.pd handles those cases instead.  */
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "tree.h"
> +#include "gimple.h"
> +#include "gimple-iterator.h"
> +#include "ssa.h"
> +#include "fold-const.h"
> +#include "tree-pass.h"
> +#include "cfganal.h"
> +#include "gimple-pretty-print.h"
> +#include "tree-cfg.h"
> +#include "tree-ssa.h"
> +#include "tree-ssa-propagate.h"
> +#include "gimple-fold.h"
> +#include "alloc-pool.h"
> +#include "tree-hash-traits.h"
> +
> +namespace {
> +
> +/* Information about a group of uses of an SSA name.  */
> +struct usage_info
> +{
> +  usage_info () : flag_word (0) {}
> +  usage_info &operator &= (const usage_info &);
> +  usage_info operator & (const usage_info &) const;
> +  bool operator == (const usage_info &) const;
> +  bool operator != (const usage_info &) const;
> +  bool is_useful () const;
> +
> +  static usage_info intersection_identity ();
> +
> +  union
> +  {
> +    struct
> +    {
> +      /* True if the uses treat x and -x in the same way.  */
> +      unsigned int ignore_sign : 1;
> +    } flags;
> +    /* All the flag bits as a single int.  */
> +    unsigned int flag_word;
> +  };
> +};
> +
> +/* Return an X such that X & Y == Y for all Y.  This is the most
> +   optimistic assumption possible.  */
> +
> +usage_info
> +usage_info::intersection_identity ()
> +{
> +  usage_info ret;
> +  ret.flag_word = -1;
> +  return ret;
> +}
> +
> +/* Intersect *THIS with OTHER, so that *THIS describes all uses covered
> +   by the original *THIS and OTHER.  */
> +
> +usage_info &
> +usage_info::operator &= (const usage_info &other)
> +{
> +  flag_word &= other.flag_word;
> +  return *this;
> +}
> +
> +/* Return the intersection of *THIS and OTHER, i.e. a structure that
> +   describes all uses covered by *THIS and OTHER.  */
> +
> +usage_info
> +usage_info::operator & (const usage_info &other) const
> +{
> +  usage_info info (*this);
> +  info &= other;
> +  return info;
> +}
> +
> +bool
> +usage_info::operator == (const usage_info &other) const
> +{
> +  return flag_word == other.flag_word;
> +}
> +
> +bool
> +usage_info::operator != (const usage_info &other) const
> +{
> +  return !operator == (other);
> +}
> +
> +/* Return true if *THIS is not simply the default, safe assumption.  */
> +
> +bool
> +usage_info::is_useful () const
> +{
> +  return flag_word != 0;
> +}
> +
> +/* Start a dump line about SSA name VAR.  */
> +
> +static void
> +dump_usage_prefix (FILE *file, tree var)
> +{
> +  fprintf (file, "  ");
> +  print_generic_expr (file, var, 0);
> +  fprintf (file, ": ");
> +}
> +
> +/* Print INFO to FILE.  */
> +
> +static void
> +dump_usage_info (FILE *file, tree var, usage_info *info)
> +{
> +  if (info->flags.ignore_sign)
> +    {
> +      dump_usage_prefix (file, var);
> +      fprintf (file, "sign bit not important\n");
> +    }
> +}
> +
> +/* Represents one execution of the pass.  */
> +class backprop
> +{
> +public:
> +  backprop (function *);
> +  ~backprop ();
> +
> +  void execute ();
> +
> +private:
> +  const usage_info *lookup_operand (tree);
> +
> +  void push_to_worklist (tree);
> +  tree pop_from_worklist ();
> +
> +  void process_builtin_call_use (gcall *, tree, usage_info *);
> +  void process_assign_use (gassign *, tree, usage_info *);
> +  void process_phi_use (gphi *, usage_info *);
> +  void process_use (gimple *, tree, usage_info *);
> +  bool intersect_uses (tree, usage_info *);
> +  void reprocess_inputs (gimple *);
> +  void process_var (tree);
> +  void process_block (basic_block);
> +
> +  void prepare_change (tree);
> +  void complete_change (gimple *);
> +  void optimize_builtin_call (gcall *, tree, const usage_info *);
> +  void replace_assign_rhs (gassign *, tree, tree, tree, tree);
> +  void optimize_assign (gassign *, tree, const usage_info *);
> +  void optimize_phi (gphi *, tree, const usage_info *);
> +
> +  typedef hash_map <tree_ssa_name_hash, usage_info *> info_map_type;
> +  typedef std::pair <tree, usage_info *> var_info_pair;
> +
> +  /* The function we're optimizing.  */
> +  function *m_fn;
> +
> +  /* Pool for allocating usage_info structures.  */
> +  object_allocator <usage_info> m_info_pool;
> +
> +  /* Maps an SSA name to a description of all uses of that SSA name.
> +     All the usage_infos satisfy is_useful.
> +
> +     We use a hash_map because the map is expected to be sparse
> +     (i.e. most SSA names won't have useful information attached to them).
> +     We could move to a directly-indexed array if that situation changes.  */
> +  info_map_type m_info_map;
> +
> +  /* Post-ordered list of all potentially-interesting SSA names,
> +     along with information that describes all uses.  */
> +  auto_vec <var_info_pair, 128> m_vars;
> +
> +  /* A bitmap of blocks that we have finished processing in the initial
> +     post-order walk.  */
> +  sbitmap m_visited_blocks;
> +
> +  /* A worklist of SSA names whose definitions need to be reconsidered.  */
> +  auto_vec <tree, 64> m_worklist;
> +
> +  /* The SSA names in M_WORKLIST, identified by their SSA_NAME_VERSION.
> +     We use a bitmap rather than an sbitmap because most SSA names are
> +     never added to the worklist.  */
> +  bitmap m_worklist_names;
> +};
> +
> +backprop::backprop (function *fn)
> +  : m_fn (fn),
> +    m_info_pool ("usage_info"),
> +    m_visited_blocks (sbitmap_alloc (last_basic_block_for_fn (m_fn))),
> +    m_worklist_names (BITMAP_ALLOC (NULL))
> +{
> +  bitmap_clear (m_visited_blocks);
> +}
> +
> +backprop::~backprop ()
> +{
> +  BITMAP_FREE (m_worklist_names);
> +  sbitmap_free (m_visited_blocks);
> +  m_info_pool.release ();
> +}
> +
> +/* Return usage information for general operand OP, or null if none.  */
> +
> +const usage_info *
> +backprop::lookup_operand (tree op)
> +{
> +  if (op && TREE_CODE (op) == SSA_NAME)
> +    {
> +      usage_info **slot = m_info_map.get (op);
> +      if (slot)
> +       return *slot;
> +    }
> +  return NULL;
> +}
> +
> +/* Add SSA name VAR to the worklist, if it isn't on the worklist already.  */
> +
> +void
> +backprop::push_to_worklist (tree var)
> +{
> +  if (!bitmap_set_bit (m_worklist_names, SSA_NAME_VERSION (var)))
> +    return;
> +  m_worklist.safe_push (var);
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "[WORKLIST] Pushing ");
> +      print_generic_expr (dump_file, var, 0);
> +      fprintf (dump_file, "\n");
> +    }
> +}
> +
> +/* Remove and return the next SSA name from the worklist.  The worklist
> +   is known to be nonempty.  */
> +
> +tree
> +backprop::pop_from_worklist ()
> +{
> +  tree var = m_worklist.pop ();
> +  bitmap_clear_bit (m_worklist_names, SSA_NAME_VERSION (var));
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "[WORKLIST] Popping ");
> +      print_generic_expr (dump_file, var, 0);
> +      fprintf (dump_file, "\n");
> +    }
> +  return var;
> +}
> +
> +/* Make INFO describe all uses of RHS in CALL, which is a call to a
> +   built-in function.  */
> +
> +void
> +backprop::process_builtin_call_use (gcall *call, tree rhs, usage_info *info)
> +{
> +  enum built_in_function fn = DECL_FUNCTION_CODE (gimple_call_fndecl (call));
> +  tree lhs = gimple_call_lhs (call);
> +  switch (fn)
> +    {
> +    CASE_FLT_FN (BUILT_IN_COS):
> +    CASE_FLT_FN (BUILT_IN_COSH):
> +    CASE_FLT_FN (BUILT_IN_CCOS):
> +    CASE_FLT_FN (BUILT_IN_CCOSH):
> +    CASE_FLT_FN (BUILT_IN_HYPOT):
> +      /* The signs of all inputs are ignored.  */
> +      info->flags.ignore_sign = true;
> +      break;
> +
> +    CASE_FLT_FN (BUILT_IN_COPYSIGN):
> +      /* The sign of the first input is ignored.  */
> +      if (rhs != gimple_call_arg (call, 1))
> +       info->flags.ignore_sign = true;
> +      break;
> +
> +    CASE_FLT_FN (BUILT_IN_POW):
> +      {
> +       /* The sign of the first input is ignored as long as the second
> +          input is an even real.  */
> +       tree power = gimple_call_arg (call, 1);
> +       HOST_WIDE_INT n;
> +       if (TREE_CODE (power) == REAL_CST
> +           && real_isinteger (&TREE_REAL_CST (power), &n)
> +           && (n & 1) == 0)
> +         info->flags.ignore_sign = true;
> +       break;
> +      }
> +
> +    CASE_FLT_FN (BUILT_IN_FMA):
> +      /* In X * X + Y, where Y is distinct from X, the sign of X doesn't
> +        matter.  */
> +      if (gimple_call_arg (call, 0) == rhs
> +         && gimple_call_arg (call, 1) == rhs
> +         && gimple_call_arg (call, 2) != rhs)
> +       info->flags.ignore_sign = true;
> +      break;
> +
> +    default:
> +      if (negate_mathfn_p (fn))
> +       {
> +         /* The sign of the (single) input doesn't matter provided
> +            that the sign of the output doesn't matter.  */
> +         const usage_info *lhs_info = lookup_operand (lhs);
> +         if (lhs_info)
> +           info->flags.ignore_sign = lhs_info->flags.ignore_sign;
> +       }
> +      break;
> +    }
> +}
> +
> +/* Make INFO describe all uses of RHS in ASSIGN.  */
> +
> +void
> +backprop::process_assign_use (gassign *assign, tree rhs, usage_info *info)
> +{
> +  tree lhs = gimple_assign_lhs (assign);
> +  switch (gimple_assign_rhs_code (assign))
> +    {
> +    case ABS_EXPR:
> +      /* The sign of the input doesn't matter.  */
> +      info->flags.ignore_sign = true;
> +      break;
> +
> +    case COND_EXPR:
> +      /* For A = B ? C : D, propagate information about all uses of A
> +        to C and D.  */
> +      if (rhs != gimple_assign_rhs1 (assign))
> +       {
> +         const usage_info *lhs_info = lookup_operand (lhs);
> +         if (lhs_info)
> +           *info = *lhs_info;
> +       }
> +      break;
> +
> +    case FMA_EXPR:
> +      /* In X * X + Y, where Y is distinct from X, the sign of X doesn't
> +        matter.  */
> +      if (gimple_assign_rhs1 (assign) == rhs
> +         && gimple_assign_rhs2 (assign) == rhs
> +         && gimple_assign_rhs3 (assign) != rhs)
> +       info->flags.ignore_sign = true;
> +      break;
> +
> +    case MULT_EXPR:
> +      /* In X * X, the sign of X doesn't matter.  */
> +      if (gimple_assign_rhs1 (assign) == rhs
> +         && gimple_assign_rhs2 (assign) == rhs)
> +       info->flags.ignore_sign = true;
> +      /* Fall through.  */
> +
> +    case NEGATE_EXPR:
> +    case RDIV_EXPR:
> +      /* If the sign of the result doesn't matter, the sign of the inputs
> +        doesn't matter either.  */
> +      if (FLOAT_TYPE_P (TREE_TYPE (rhs)))
> +       {
> +         const usage_info *lhs_info = lookup_operand (lhs);
> +         if (lhs_info)
> +           info->flags.ignore_sign = lhs_info->flags.ignore_sign;
> +       }
> +      break;
> +
> +    default:
> +      break;
> +    }
> +}
> +
> +/* Make INFO describe the uses of PHI's result.  */
> +
> +void
> +backprop::process_phi_use (gphi *phi, usage_info *info)
> +{
> +  tree result = gimple_phi_result (phi);
> +  if (const usage_info *result_info = lookup_operand (result))
> +    *info = *result_info;
> +}
> +
> +/* Make INFO describe all uses of RHS in STMT.  */
> +
> +void
> +backprop::process_use (gimple *stmt, tree rhs, usage_info *info)
> +{
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "[USE] ");
> +      print_generic_expr (dump_file, rhs, 0);
> +      fprintf (dump_file, " in ");
> +      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> +    }
> +
> +  if (gcall *call = dyn_cast <gcall *> (stmt))
> +    {
> +      if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
> +       process_builtin_call_use (call, rhs, info);
> +    }
> +  else if (gassign *assign = dyn_cast <gassign *> (stmt))
> +    process_assign_use (assign, rhs, info);
> +  else if (gphi *phi = dyn_cast <gphi *> (stmt))
> +    process_phi_use (phi, info);
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    dump_usage_info (dump_file, rhs, info);
> +}
> +
> +/* Make INFO describe all uses of VAR, returning true if the result
> +   is useful.  If the uses include phis that haven't been processed yet,
> +   make the most optimistic assumption possible, so that we aim for
> +   a maximum rather than a minimum fixed point.  */
> +
> +bool
> +backprop::intersect_uses (tree var, usage_info *info)
> +{
> +  imm_use_iterator iter;
> +  gimple *stmt;
> +  *info = usage_info::intersection_identity ();
> +  FOR_EACH_IMM_USE_STMT (stmt, iter, var)
> +    {
> +      if (is_gimple_debug (stmt))
> +       continue;
> +      if (is_a <gphi *> (stmt)
> +         && !bitmap_bit_p (m_visited_blocks, gimple_bb (stmt)->index))
> +       {
> +         /* Skip unprocessed phis.  */
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           {
> +             fprintf (dump_file, "[BACKEDGE] ");
> +             print_generic_expr (dump_file, var, 0);
> +             fprintf (dump_file, " in ");
> +             print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> +           }
> +       }
> +      else
> +       {
> +         usage_info subinfo;
> +         process_use (stmt, var, &subinfo);
> +         *info &= subinfo;
> +         if (!info->is_useful ())
> +           {
> +             BREAK_FROM_IMM_USE_STMT (iter);
> +             return false;
> +           }
> +       }
> +    }
> +  return true;
> +}
> +
> +/* Queue for reconsideration any input of STMT that has information
> +   associated with it.  This is used if that information might be
> +   too optimistic.  */
> +
> +void
> +backprop::reprocess_inputs (gimple *stmt)
> +{
> +  use_operand_p use_p;
> +  ssa_op_iter oi;
> +  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
> +    {
> +      tree var = get_use_from_ptr (use_p);
> +      if (lookup_operand (var))
> +       push_to_worklist (var);
> +    }
> +}
> +
> +/* Say that we're recording INFO for SSA name VAR, or that we're deleting
> +   existing information if INFO is null.  INTRO describes the change.  */
> +
> +static void
> +dump_var_info (tree var, usage_info *info, const char *intro)
> +{
> +  fprintf (dump_file, "[DEF] %s for ", intro);
> +  print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (var), 0, TDF_SLIM);
> +  if (info)
> +    dump_usage_info (dump_file, var, info);
> +}
> +
> +/* Process all uses of VAR and record or update the result in
> +   M_INFO_MAP and M_VARS.  */
> +
> +void
> +backprop::process_var (tree var)
> +{
> +  if (has_zero_uses (var))
> +    return;
> +
> +  usage_info info;
> +  intersect_uses (var, &info);
> +
> +  gimple *stmt = SSA_NAME_DEF_STMT (var);
> +  if (info.is_useful ())
> +    {
> +      bool existed;
> +      usage_info *&map_info = m_info_map.get_or_insert (var, &existed);
> +      if (!existed)
> +       {
> +         /* Recording information about VAR for the first time.  */
> +         map_info = m_info_pool.allocate ();
> +         *map_info = info;
> +         m_vars.safe_push (var_info_pair (var, map_info));
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           dump_var_info (var, map_info, "Recording new information");
> +
> +         /* If STMT is a phi, reprocess any backedge uses.  This is a
> +            no-op for other uses, which won't have any information
> +            associated with them.  */
> +         if (is_a <gphi *> (stmt))
> +           reprocess_inputs (stmt);
> +       }
> +      else if (info != *map_info)
> +       {
> +         /* Recording information that is less optimistic than before.  */
> +         gcc_checking_assert ((info & *map_info) == info);
> +         *map_info = info;
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           dump_var_info (var, map_info, "Updating information");
> +         reprocess_inputs (stmt);
> +       }
> +    }
> +  else
> +    {
> +      if (usage_info **slot = m_info_map.get (var))
> +       {
> +         /* Removing previously-recorded information.  */
> +         **slot = info;
> +         m_info_map.remove (var);
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           dump_var_info (var, NULL, "Deleting information");
> +         reprocess_inputs (stmt);
> +       }
> +      else
> +       {
> +         /* If STMT is a phi, remove any information recorded for
> +            its arguments.  */
> +         if (is_a <gphi *> (stmt))
> +           reprocess_inputs (stmt);
> +       }
> +    }
> +}
> +
> +/* Process all statements and phis in BB, during the first post-order walk.  */
> +
> +void
> +backprop::process_block (basic_block bb)
> +{
> +  for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
> +       gsi_prev (&gsi))
> +    {
> +      tree lhs = gimple_get_lhs (gsi_stmt (gsi));
> +      if (lhs && TREE_CODE (lhs) == SSA_NAME)
> +       process_var (lhs);
> +    }
> +  for (gphi_iterator gpi = gsi_start_phis (bb); !gsi_end_p (gpi);
> +       gsi_next (&gpi))
> +    process_var (gimple_phi_result (gpi.phi ()));
> +}
> +
> +/* Delete the definition of VAR, which has no uses.  */
> +
> +static void
> +remove_unused_var (tree var)
> +{
> +  gimple *stmt = SSA_NAME_DEF_STMT (var);
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "Deleting ");
> +      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> +    }
> +  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
> +  gsi_remove (&gsi, true);
> +  release_defs (stmt);
> +}
> +
> +/* Note that we're replacing OLD_RHS with NEW_RHS in STMT.  */
> +
> +static void
> +note_replacement (gimple *stmt, tree old_rhs, tree new_rhs)
> +{
> +  fprintf (dump_file, "Replacing use of ");
> +  print_generic_expr (dump_file, old_rhs, 0);
> +  fprintf (dump_file, " with ");
> +  print_generic_expr (dump_file, new_rhs, 0);
> +  fprintf (dump_file, " in ");
> +  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> +}
> +
> +/* If RHS is an SSA name whose definition just changes the sign of a value,
> +   return that other value, otherwise return null.  */
> +
> +static tree
> +strip_sign_op_1 (tree rhs)
> +{
> +  if (TREE_CODE (rhs) != SSA_NAME)
> +    return NULL_TREE;
> +
> +  gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
> +  if (gassign *assign = dyn_cast <gassign *> (def_stmt))
> +    switch (gimple_assign_rhs_code (assign))
> +      {
> +      case ABS_EXPR:
> +      case NEGATE_EXPR:
> +       return gimple_assign_rhs1 (assign);
> +
> +      default:
> +       break;
> +      }
> +  else if (gcall *call = dyn_cast <gcall *> (def_stmt))
> +    {
> +      if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
> +       switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
> +         {
> +         CASE_FLT_FN (BUILT_IN_COPYSIGN):
> +           return gimple_call_arg (call, 0);
> +
> +         default:
> +           break;
> +         }
> +    }
> +
> +  return NULL_TREE;
> +}
> +
> +/* If RHS is an SSA name whose definition just changes the sign of a value,
> +   strip all such operations and return the ultimate input to them.
> +   Return null otherwise.
> +
> +   Although this could in principle lead to quadratic searching,
> +   in practice a long sequence of sign manipulations should already
> +   have been folded down.  E.g. --x -> x, abs(-x) -> abs(x).  We search
> +   for more than one operation in order to catch cases like -abs(x).  */
> +
> +static tree
> +strip_sign_op (tree rhs)
> +{
> +  tree new_rhs = strip_sign_op_1 (rhs);
> +  if (!new_rhs)
> +    return NULL_TREE;
> +  while (tree next = strip_sign_op_1 (new_rhs))
> +    new_rhs = next;
> +  return new_rhs;
> +}
> +
> +/* Start a change in the value of VAR that is suitable for all non-debug
> +   uses of VAR.  We need to make sure that debug statements continue to
> +   use the original definition of VAR where possible, or are nullified
> +   otherwise.  */
> +
> +void
> +backprop::prepare_change (tree var)
> +{
> +  if (MAY_HAVE_DEBUG_STMTS)
> +    insert_debug_temp_for_var_def (NULL, var);
> +}
> +
> +/* STMT has been changed.  Give the fold machinery a chance to simplify
> +   and canonicalize it (e.g. by ensuring that commutative operands have
> +   the right order), then record the updates.  */
> +
> +void
> +backprop::complete_change (gimple *stmt)
> +{
> +  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
> +  if (fold_stmt (&gsi))
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       {
> +         fprintf (dump_file, "  which folds to: ");
> +         print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_SLIM);
> +       }
> +    }
> +  update_stmt (gsi_stmt (gsi));
> +}
> +
> +/* Optimize CALL, a call to a built-in function with lhs LHS, on the
> +   basis that INFO describes all uses of LHS.  */
> +
> +void
> +backprop::optimize_builtin_call (gcall *call, tree lhs, const usage_info *info)
> +{
> +  tree fndecl = gimple_call_fndecl (call);
> +  enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
> +  /* If we have an f such that -f(x) = f(-x), and if the sign of the result
> +     doesn't matter, strip any sign operations from the input.  */
> +  if (info->flags.ignore_sign && negate_mathfn_p (fn))
> +    {
> +      tree new_arg = strip_sign_op (gimple_call_arg (call, 0));
> +      if (new_arg)
> +       {
> +         prepare_change (lhs);
> +         gimple_call_set_arg (call, 0, new_arg);
> +         complete_change (call);
> +       }
> +    }
> +}
> +
> +/* Optimize ASSIGN, an assignment to LHS, by replacing rhs operand N
> +   with RHS<N>, if RHS<N> is nonnull.  This may change the value of LHS.  */
> +
> +void
> +backprop::replace_assign_rhs (gassign *assign, tree lhs, tree rhs1,
> +                             tree rhs2, tree rhs3)
> +{
> +  if (!rhs1 && !rhs2 && !rhs3)
> +    return;
> +
> +  prepare_change (lhs);
> +  if (rhs1)
> +    gimple_assign_set_rhs1 (assign, rhs1);
> +  if (rhs2)
> +    gimple_assign_set_rhs2 (assign, rhs2);
> +  if (rhs3)
> +    gimple_assign_set_rhs3 (assign, rhs3);
> +  complete_change (assign);
> +}
> +
> +/* Optimize ASSIGN, an assignment to LHS, on the basis that INFO
> +   describes all uses of LHS.  */
> +
> +void
> +backprop::optimize_assign (gassign *assign, tree lhs, const usage_info *info)
> +{
> +  switch (gimple_assign_rhs_code (assign))
> +    {
> +    case MULT_EXPR:
> +    case RDIV_EXPR:
> +      /* If the sign of the result doesn't matter, strip sign operations
> +        from both inputs.  */
> +      if (info->flags.ignore_sign)
> +       replace_assign_rhs (assign, lhs,
> +                           strip_sign_op (gimple_assign_rhs1 (assign)),
> +                           strip_sign_op (gimple_assign_rhs2 (assign)),
> +                           NULL_TREE);
> +      break;
> +
> +    case COND_EXPR:
> +      /* If the sign of A ? B : C doesn't matter, strip sign operations
> +        from both B and C.  */
> +      if (info->flags.ignore_sign)
> +       replace_assign_rhs (assign, lhs,
> +                           NULL_TREE,
> +                           strip_sign_op (gimple_assign_rhs2 (assign)),
> +                           strip_sign_op (gimple_assign_rhs3 (assign)));
> +      break;
> +
> +    default:
> +      break;
> +    }
> +}
> +
> +/* Optimize PHI, which defines VAR, on the basis that INFO describes all
> +   uses of the result.  */
> +
> +void
> +backprop::optimize_phi (gphi *phi, tree var, const usage_info *info)
> +{
> +  /* If the sign of the result doesn't matter, strip sign operations
> +     from all arguments.  */
> +  if (info->flags.ignore_sign)
> +    {
> +      use_operand_p use;
> +      ssa_op_iter oi;
> +      bool replaced = false;
> +      FOR_EACH_PHI_ARG (use, phi, oi, SSA_OP_USE)
> +       {
> +         tree new_arg = strip_sign_op (USE_FROM_PTR (use));
> +         if (new_arg)
> +           {
> +             if (!replaced)
> +               prepare_change (var);
> +             if (dump_file && (dump_flags & TDF_DETAILS))
> +               note_replacement (phi, USE_FROM_PTR (use), new_arg);
> +             replace_exp (use, new_arg);
> +             replaced = true;
> +           }
> +       }
> +    }
> +}
> +
> +void
> +backprop::execute ()
> +{
> +  /* Phase 1: Traverse the function, making optimistic assumptions
> +     about any phi whose definition we haven't seen.  */
> +  int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (m_fn));
> +  unsigned int postorder_num = post_order_compute (postorder, false, false);
> +  for (unsigned int i = 0; i < postorder_num; ++i)
> +    {
> +      process_block (BASIC_BLOCK_FOR_FN (m_fn, postorder[i]));
> +      bitmap_set_bit (m_visited_blocks, postorder[i]);
> +    }
> +  XDELETEVEC (postorder);
> +
> +  /* Phase 2: Use the initial (perhaps overly optimistic) information
> +     to create a maximal fixed point solution.  */
> +  while (!m_worklist.is_empty ())
> +    process_var (pop_from_worklist ());
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    fprintf (dump_file, "\n");
> +
> +  /* Phase 3: Do a reverse post-order walk, using information about
> +     the uses of SSA names to optimize their definitions.  */
> +  for (unsigned int i = m_vars.length (); i-- > 0;)
> +    {
> +      usage_info *info = m_vars[i].second;
> +      if (info->is_useful ())
> +       {
> +         tree var = m_vars[i].first;
> +         gimple *stmt = SSA_NAME_DEF_STMT (var);
> +         if (gcall *call = dyn_cast <gcall *> (stmt))
> +           {
> +             if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
> +               optimize_builtin_call (call, var, info);
> +           }
> +         else if (gassign *assign = dyn_cast <gassign *> (stmt))
> +           optimize_assign (assign, var, info);
> +         else if (gphi *phi = dyn_cast <gphi *> (stmt))
> +           optimize_phi (phi, var, info);
> +       }
> +    }
> +
> +  /* Phase 4: Do a post-order walk, deleting statements that are no
> +     longer needed.  */
> +  for (unsigned int i = 0; i < m_vars.length (); ++i)
> +    {
> +      tree var = m_vars[i].first;
> +      if (has_zero_uses (var))
> +       remove_unused_var (var);
> +    }
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    fprintf (dump_file, "\n");
> +}
> +
> +const pass_data pass_data_backprop =
> +{
> +  GIMPLE_PASS, /* type */
> +  "backprop", /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_TREE_BACKPROP, /* tv_id */
> +  ( PROP_cfg | PROP_ssa ), /* properties_required */
> +  0, /* properties_provided */
> +  0, /* properties_destroyed */
> +  0, /* todo_flags_start */
> +  0, /* todo_flags_finish */
> +};
> +
> +class pass_backprop : public gimple_opt_pass
> +{
> +public:
> +  pass_backprop (gcc::context *ctxt)
> +    : gimple_opt_pass (pass_data_backprop, ctxt)
> +  {}
> +
> +  /* opt_pass methods: */
> +  opt_pass * clone () { return new pass_backprop (m_ctxt); }
> +  virtual bool gate (function *) { return flag_ssa_backprop; }
> +  virtual unsigned int execute (function *);
> +
> +}; // class pass_backprop
> +
> +unsigned int
> +pass_backprop::execute (function *fn)
> +{
> +  backprop (fn).execute ();
> +  return 0;
> +}
> +
> +} // anon namespace
> +
> +gimple_opt_pass *
> +make_pass_backprop (gcc::context *ctxt)
> +{
> +  return new pass_backprop (ctxt);
> +}
> Index: gcc/testsuite/gcc.dg/tree-ssa/backprop-1.c
> ===================================================================
> --- /dev/null   2015-10-14 14:19:37.164773004 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/backprop-1.c  2015-10-21 09:24:16.663564880 +0100
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -g -fdump-tree-backprop-details" }  */
> +
> +/* Test a simple case of non-looping code in which both uses ignore
> +   the sign and both definitions are sign ops.  */
> +#define TEST_FUNCTION(TYPE, SUFFIX)                            \
> +  TYPE                                                         \
> +  test##SUFFIX (TYPE x, int sel1, int sel2)                    \
> +  {                                                            \
> +    TYPE input = sel1 ? -x : __builtin_fabs##SUFFIX (x);       \
> +    if (sel2)                                                  \
> +      return __builtin_cos##SUFFIX (input);                    \
> +    else                                                       \
> +      return __builtin_cosh##SUFFIX (input);                   \
> +  }
> +
> +TEST_FUNCTION (float, f)
> +TEST_FUNCTION (double, )
> +TEST_FUNCTION (long double, l)
> +
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -x} 3 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <x} 3 "backprop" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/backprop-2.c
> ===================================================================
> --- /dev/null   2015-10-14 14:19:37.164773004 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/backprop-2.c  2015-10-21 09:24:16.663564880 +0100
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -g -fdump-tree-backprop-details" }  */
> +
> +/* Test a simple case of non-looping code in which both uses ignore
> +   the sign but only one definition is a sign op.  */
> +#define TEST_FUNCTION(TYPE, SUFFIX)                            \
> +  TYPE                                                         \
> +  test##SUFFIX (TYPE x, int sel1, int sel2)                    \
> +  {                                                            \
> +    TYPE input = sel1 ? -x : x + 1;                            \
> +    if (sel2)                                                  \
> +      return __builtin_cos##SUFFIX (input);                    \
> +    else                                                       \
> +      return __builtin_cosh##SUFFIX (input);                   \
> +  }
> +
> +TEST_FUNCTION (float, f)
> +TEST_FUNCTION (double, )
> +TEST_FUNCTION (long double, l)
> +
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -x} 3 "backprop" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/backprop-3.c
> ===================================================================
> --- /dev/null   2015-10-14 14:19:37.164773004 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/backprop-3.c  2015-10-21 09:24:16.663564880 +0100
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -g -fdump-tree-backprop-details" }  */
> +
> +/* Test a simple case of non-looping code in which one use ignores
> +   the sign but another doesn't.  */
> +#define TEST_FUNCTION(TYPE, SUFFIX)                            \
> +  TYPE                                                         \
> +  test##SUFFIX (TYPE x, int sel1, int sel2)                    \
> +  {                                                            \
> +    TYPE input = sel1 ? -x : __builtin_fabs##SUFFIX (x);       \
> +    if (sel2)                                                  \
> +      return __builtin_cos##SUFFIX (input);                    \
> +    else                                                       \
> +      return __builtin_sin##SUFFIX (input);                    \
> +  }
> +
> +TEST_FUNCTION (float, f)
> +TEST_FUNCTION (double, )
> +TEST_FUNCTION (long double, l)
> +
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -x} 0 "backprop" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/backprop-4.c
> ===================================================================
> --- /dev/null   2015-10-14 14:19:37.164773004 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/backprop-4.c  2015-10-21 09:24:16.663564880 +0100
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -g -fdump-tree-backprop-details" }  */
> +
> +/* Test a simple reduction loop in which all inputs are sign ops and
> +   the consumer doesn't care about the sign.  */
> +#define TEST_FUNCTION(TYPE, SUFFIX)                    \
> +  TYPE                                                 \
> +  test##SUFFIX (TYPE x, TYPE y, TYPE *array, int n)    \
> +  {                                                    \
> +    x = __builtin_copysign##SUFFIX (x, y);             \
> +    for (int i = 0; i < n; ++i)                                \
> +      x *= -array[i];                                  \
> +    return __builtin_hypot##SUFFIX (x, y);             \
> +  }
> +
> +TEST_FUNCTION (float, f)
> +TEST_FUNCTION (double, )
> +TEST_FUNCTION (long double, l)
> +
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = __builtin_copysign} 3 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 3 "backprop" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/backprop-5.c
> ===================================================================
> --- /dev/null   2015-10-14 14:19:37.164773004 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/backprop-5.c  2015-10-21 09:24:16.663564880 +0100
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -g -fdump-tree-backprop-details" }  */
> +
> +/* Test a loop that does both a multiplication and addition.  The addition
> +   should prevent any sign ops from being removed.  */
> +#define TEST_FUNCTION(TYPE, SUFFIX)                    \
> +  TYPE                                                 \
> +  test##SUFFIX (TYPE x, TYPE y, TYPE *array, int n)    \
> +  {                                                    \
> +    x = __builtin_copysign##SUFFIX (x, y);             \
> +    for (int i = 0; i < n; ++i)                                \
> +      x = (x + 1) * array[i];                          \
> +    return __builtin_hypot##SUFFIX (x, y);             \
> +  }
> +
> +TEST_FUNCTION (float, f)
> +TEST_FUNCTION (double, )
> +TEST_FUNCTION (long double, l)
> +
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = __builtin_copysign} 0 "backprop" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> ===================================================================
> --- /dev/null   2015-10-14 14:19:37.164773004 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c  2015-10-21 09:24:16.663564880 +0100
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-backprop-details" }  */
> +
> +void start (void *);
> +void end (void *);
> +
> +/* Test that we optimize the contents of infinite loops.  */
> +#define TEST_FUNCTION(TYPE, SUFFIX)                    \
> +  void                                                 \
> +  test##SUFFIX (TYPE *array, TYPE y, int n)            \
> +  {                                                    \
> +    for (;;)                                           \
> +      {                                                        \
> +       start (array);                                  \
> +       TYPE x = -__builtin_fabs##SUFFIX (array[-1]);   \
> +       for (int i = 0; i < n; ++i)                     \
> +         x = -x / array[i];                            \
> +       array[-1] = x * x;                              \
> +       array[-2] = __builtin_fma##SUFFIX (x, x, y);    \
> +       array[-3] = __builtin_pow##SUFFIX (x, 20);      \
> +       end (array);                                    \
> +      }                                                        \
> +  }
> +
> +TEST_FUNCTION (float, f)
> +TEST_FUNCTION (double, )
> +TEST_FUNCTION (long double, l)
> +
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-10-21  9:56 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-15 13:17 Add a pass to back-propagate use information Richard Sandiford
2015-10-19 11:40 ` Richard Biener
2015-10-19 12:38   ` Richard Sandiford
2015-10-19 14:35     ` Richard Biener
2015-10-19 15:01       ` Richard Sandiford
2015-10-21  8:49       ` Richard Sandiford
2015-10-21  9:58         ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).