public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH PR64434]
@ 2015-01-14 10:17 Yuri Rumyantsev
  2015-01-14 10:38 ` Jakub Jelinek
  0 siblings, 1 reply; 12+ messages in thread
From: Yuri Rumyantsev @ 2015-01-14 10:17 UTC (permalink / raw)
  To: gcc-patches, Richard Biener, Igor Zamyatin

[-- Attachment #1: Type: text/plain, Size: 526 bytes --]

Hi All,

Here is updated patch which was redesigned accordingly to Richard review.
It performs swapping operands of commutative operations to expand the
expensive one first.

Bootstrap and regression testing did not show any new failures.

Is it OK for trunk?

gcc/ChangeLog
2015-01-14  Yuri Rumyantsev  <ysrumyan@gmail.com>

PR tree-optimization/64434
* cfgexpand.c (reorder_operands): New function.
(expand_gimple_basic_block): Insert call of reorder_operands.

gcc/testsuite/ChangeLog
* gcc.dg/torture/pr64434.c: New test.

[-- Attachment #2: patch --]
[-- Type: application/octet-stream, Size: 7484 bytes --]

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 5200053..01b745d 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -4884,6 +4884,148 @@ expand_debug_locations (void)
   flag_strict_aliasing = save_strict_alias;
 }
 
+/* Determine number of statements required to compute value of VAR.
+   Return false if this number is invalid and true otherwise.  */
+
+static bool
+count_num_stmt (tree var, int *count)
+{
+  gimple stmt;
+  tree op;
+  ssa_op_iter iter;
+
+  /* Do not handle vector operamds.  */
+  if (TREE_CODE (TREE_TYPE (var)) == VECTOR_TYPE)
+    return false;
+  if (TREE_CODE (var) != SSA_NAME)
+    return true;
+  stmt = get_gimple_for_ssa_name (var);
+  if (stmt == NULL)
+    {
+      /* Assume that we have PHI node or def_stmt is in another bb.  */
+      *count += 2;
+      return true;
+    }
+
+  if (gimple_has_volatile_ops (stmt))
+    return false;
+  if (gimple_assign_load_p (stmt))
+    {
+      *count += 1;
+      return true;
+    }
+  if (is_gimple_call (stmt))
+    {
+      *count += 10;
+      return true;
+    }
+  if (gimple_clobber_p (stmt))
+    return true;
+
+  *count += 1;
+  FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
+    {
+      if (TREE_CODE (TREE_TYPE (op)) == VECTOR_TYPE)
+	return false;
+      if (!count_num_stmt (op, count))
+	return false;
+    }
+  return true;
+}
+
+/* Performs swap operands of commutative operations to compute
+   heavier operand first (which requires more instructions to
+   compute it).  It is done only in loops to save compile time.  */
+
+static void
+reorder_operands (basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+  gimple_seq stmts;
+  gimple stmt = NULL;
+  enum tree_code code;
+  gimple def0, def1;
+  tree op0, op1;
+  bool swap;
+  int count1, count2;
+
+  if (!flag_reorder_operands)
+    return;
+
+  if (!bb->loop_father)
+    return;
+  stmts = bb_seq (bb);
+  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      stmt = gsi_stmt (gsi);
+      /* Special handling of equality/non-equality which are not
+	 considered as commutative operations.  */
+      if (gimple_code (stmt) == GIMPLE_COND)
+	{
+	  code = gimple_cond_code (stmt);
+	  if ((code == EQ_EXPR || code == NE_EXPR)
+	      && (TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME)
+	      && (TREE_CODE (gimple_cond_rhs (stmt)) == SSA_NAME))
+	    {
+	      op0 = gimple_cond_lhs (stmt);
+	      op1 = gimple_cond_rhs (stmt);
+	    }
+	  else
+	    continue;
+	}
+      else
+	{
+	  /* Consider assignments only.  */
+	  if (gimple_code (stmt) != GIMPLE_ASSIGN
+	      || gimple_has_volatile_ops (stmt))
+	    continue;
+          code = gimple_assign_rhs_code (stmt);
+          if (!commutative_tree_code (code))
+	    continue;
+          gcc_assert (gimple_num_ops (stmt) == 3);
+          op0 = gimple_op (stmt, 1);
+          op1 = gimple_op (stmt, 2);
+          if (op0 ==NULL_TREE || op1 == NULL_TREE)
+	    continue;
+          if (TREE_CODE (op0) != SSA_NAME || TREE_CODE (op1) != SSA_NAME)
+	    continue;
+      }
+      def0 = get_gimple_for_ssa_name (op0);
+      def1 = get_gimple_for_ssa_name (op1);
+
+      /* Skip stmt if its operands do not have definition in BB.  */
+      if (def0 == NULL || def1 == NULL)
+	continue;
+
+      count1 = count2 = 0;
+      if (!count_num_stmt (op0, &count1)
+	  || !count_num_stmt (op1, &count2))
+	continue;
+      swap = false;
+      if (count2 > count1)
+        swap = true;
+      if (swap)
+	{
+	  /* Swap operands.  */
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Swap operands in stmt:\n");
+	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+	    }
+	  if (gimple_code (stmt) == GIMPLE_ASSIGN)
+	    {
+	      gimple_set_op (stmt, 1, op1);
+	      gimple_set_op (stmt, 2, op0);
+	    }
+	  else if (gimple_code (stmt) == GIMPLE_COND)
+	    {
+	      gimple_cond_set_lhs (stmt, op1);
+	      gimple_cond_set_rhs (stmt, op0);
+	    }
+	}
+    }
+}
+
 /* Expand basic block BB from GIMPLE trees to RTL.  */
 
 static basic_block
@@ -4905,6 +5047,7 @@ expand_gimple_basic_block (basic_block bb, bool disable_tail_calls)
      cannot use the gsi_*_bb() routines because they expect the basic
      block to be in GIMPLE, instead of RTL.  Therefore, we need to
      access the BB sequence directly.  */
+  reorder_operands (bb);
   stmts = bb_seq (bb);
   bb->il.gimple.seq = NULL;
   bb->il.gimple.phi_nodes = NULL;
diff --git a/gcc/common.opt b/gcc/common.opt
index da5250b..833bd10 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1848,6 +1848,10 @@ freorder-functions
 Common Report Var(flag_reorder_functions) Optimization
 Reorder functions to improve code placement
 
+freorder-operands
+Common Report Var(flag_reorder_operands) Optimization
+Perform operand reordering for commutative operations
+
 frerun-cse-after-loop
 Common Report Var(flag_rerun_cse_after_loop) Optimization
 Add a common subexpression elimination pass after loop optimizations
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ec3e056..23d5d95 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3829,6 +3829,10 @@ ix86_option_override_internal (bool main_args_p,
   if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
     opts->x_ix86_regparm = REGPARM_MAX;
 
+  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
+    if (opts->x_optimize >= 1 && !opts_set->x_flag_reorder_operands)
+      opts->x_flag_reorder_operands = 1;
+
   /* Default align_* from the processor table.  */
   if (opts->x_align_loops == 0)
     {
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 9f21c96..861da34 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -406,7 +406,7 @@ Objective-C and Objective-C++ Dialects}.
 -fprofile-generate=@var{path} @gol
 -fprofile-use -fprofile-use=@var{path} -fprofile-values -fprofile-reorder-functions @gol
 -freciprocal-math -free -frename-registers -freorder-blocks @gol
--freorder-blocks-and-partition -freorder-functions @gol
+-freorder-blocks-and-partition -freorder-functions  -freorder-operands @gol
 -frerun-cse-after-loop -freschedule-modulo-scheduled-loops @gol
 -frounding-math -fsched2-use-superblocks -fsched-pressure @gol
 -fsched-spec-load -fsched-spec-load-dangerous @gol
@@ -8662,6 +8662,13 @@ Also profile feedback must be available to make this option effective.  See
 
 Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}.
 
+@item -freorder-operands
+@opindex freorder-operands
+Reorder operands of commutative operations to compute heavier operand which
+requires more instructions for evaluation first to shrink register live range.
+
+Enabled for x86 32-bit targets at levels @option{-O2}, @option{-O3}.
+
 @item -fstrict-aliasing
 @opindex fstrict-aliasing
 Allow the compiler to assume the strictest aliasing rules applicable to
diff --git a/gcc/testsuite/gcc.target/i386/swap_opnd.c b/gcc/testsuite/gcc.target/i386/swap_opnd.c
new file mode 100644
index 0000000..5b0c11c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/swap_opnd.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ia32 } */
+/* { dg-options "-O2 -fdump-rtl-expand-details" } */
+#define N 256
+int a1[N], a2[N], a3[N], a4[N];
+
+void foo()
+{
+  int i;
+  for (i=0; i<N; i++) {
+    if (a3[i] != a1[i] + a2[i]) {
+       a4[i] += 1;
+       a1[i] = a2[i] - 1;
+    }
+  }
+}
+/* { dg-final { scan-rtl-dump-times "Swap operands" 1 "expand" } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2015-01-15 10:48 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-14 10:17 [PATCH PR64434] Yuri Rumyantsev
2015-01-14 10:38 ` Jakub Jelinek
2015-01-14 10:40   ` Yuri Rumyantsev
2015-01-14 10:58     ` Jakub Jelinek
2015-01-14 11:14       ` Richard Biener
2015-01-14 11:15         ` Jakub Jelinek
2015-01-14 13:37           ` Yuri Rumyantsev
2015-01-14 13:49             ` Jakub Jelinek
2015-01-14 14:07               ` Richard Biener
2015-01-14 14:26               ` Yuri Rumyantsev
2015-01-15 10:13                 ` Yuri Rumyantsev
2015-01-15 11:17                   ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).