public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH PR64434]
@ 2015-01-14 10:17 Yuri Rumyantsev
  2015-01-14 10:38 ` Jakub Jelinek
  0 siblings, 1 reply; 12+ messages in thread
From: Yuri Rumyantsev @ 2015-01-14 10:17 UTC (permalink / raw)
  To: gcc-patches, Richard Biener, Igor Zamyatin

[-- Attachment #1: Type: text/plain, Size: 526 bytes --]

Hi All,

Here is updated patch which was redesigned accordingly to Richard review.
It performs swapping operands of commutative operations to expand the
expensive one first.

Bootstrap and regression testing did not show any new failures.

Is it OK for trunk?

gcc/ChangeLog
2015-01-14  Yuri Rumyantsev  <ysrumyan@gmail.com>

PR tree-optimization/64434
* cfgexpand.c (reorder_operands): New function.
(expand_gimple_basic_block): Insert call of reorder_operands.

gcc/testsuite/ChangeLog
* gcc.dg/torture/pr64434.c: New test.

[-- Attachment #2: patch --]
[-- Type: application/octet-stream, Size: 7484 bytes --]

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 5200053..01b745d 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -4884,6 +4884,148 @@ expand_debug_locations (void)
   flag_strict_aliasing = save_strict_alias;
 }
 
+/* Determine number of statements required to compute value of VAR.
+   Return false if this number is invalid and true otherwise.  */
+
+static bool
+count_num_stmt (tree var, int *count)
+{
+  gimple stmt;
+  tree op;
+  ssa_op_iter iter;
+
+  /* Do not handle vector operamds.  */
+  if (TREE_CODE (TREE_TYPE (var)) == VECTOR_TYPE)
+    return false;
+  if (TREE_CODE (var) != SSA_NAME)
+    return true;
+  stmt = get_gimple_for_ssa_name (var);
+  if (stmt == NULL)
+    {
+      /* Assume that we have PHI node or def_stmt is in another bb.  */
+      *count += 2;
+      return true;
+    }
+
+  if (gimple_has_volatile_ops (stmt))
+    return false;
+  if (gimple_assign_load_p (stmt))
+    {
+      *count += 1;
+      return true;
+    }
+  if (is_gimple_call (stmt))
+    {
+      *count += 10;
+      return true;
+    }
+  if (gimple_clobber_p (stmt))
+    return true;
+
+  *count += 1;
+  FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
+    {
+      if (TREE_CODE (TREE_TYPE (op)) == VECTOR_TYPE)
+	return false;
+      if (!count_num_stmt (op, count))
+	return false;
+    }
+  return true;
+}
+
+/* Performs swap operands of commutative operations to compute
+   heavier operand first (which requires more instructions to
+   compute it).  It is done only in loops to save compile time.  */
+
+static void
+reorder_operands (basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+  gimple_seq stmts;
+  gimple stmt = NULL;
+  enum tree_code code;
+  gimple def0, def1;
+  tree op0, op1;
+  bool swap;
+  int count1, count2;
+
+  if (!flag_reorder_operands)
+    return;
+
+  if (!bb->loop_father)
+    return;
+  stmts = bb_seq (bb);
+  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      stmt = gsi_stmt (gsi);
+      /* Special handling of equality/non-equality which are not
+	 considered as commutative operations.  */
+      if (gimple_code (stmt) == GIMPLE_COND)
+	{
+	  code = gimple_cond_code (stmt);
+	  if ((code == EQ_EXPR || code == NE_EXPR)
+	      && (TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME)
+	      && (TREE_CODE (gimple_cond_rhs (stmt)) == SSA_NAME))
+	    {
+	      op0 = gimple_cond_lhs (stmt);
+	      op1 = gimple_cond_rhs (stmt);
+	    }
+	  else
+	    continue;
+	}
+      else
+	{
+	  /* Consider assignments only.  */
+	  if (gimple_code (stmt) != GIMPLE_ASSIGN
+	      || gimple_has_volatile_ops (stmt))
+	    continue;
+          code = gimple_assign_rhs_code (stmt);
+          if (!commutative_tree_code (code))
+	    continue;
+          gcc_assert (gimple_num_ops (stmt) == 3);
+          op0 = gimple_op (stmt, 1);
+          op1 = gimple_op (stmt, 2);
+          if (op0 ==NULL_TREE || op1 == NULL_TREE)
+	    continue;
+          if (TREE_CODE (op0) != SSA_NAME || TREE_CODE (op1) != SSA_NAME)
+	    continue;
+      }
+      def0 = get_gimple_for_ssa_name (op0);
+      def1 = get_gimple_for_ssa_name (op1);
+
+      /* Skip stmt if its operands do not have definition in BB.  */
+      if (def0 == NULL || def1 == NULL)
+	continue;
+
+      count1 = count2 = 0;
+      if (!count_num_stmt (op0, &count1)
+	  || !count_num_stmt (op1, &count2))
+	continue;
+      swap = false;
+      if (count2 > count1)
+        swap = true;
+      if (swap)
+	{
+	  /* Swap operands.  */
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Swap operands in stmt:\n");
+	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+	    }
+	  if (gimple_code (stmt) == GIMPLE_ASSIGN)
+	    {
+	      gimple_set_op (stmt, 1, op1);
+	      gimple_set_op (stmt, 2, op0);
+	    }
+	  else if (gimple_code (stmt) == GIMPLE_COND)
+	    {
+	      gimple_cond_set_lhs (stmt, op1);
+	      gimple_cond_set_rhs (stmt, op0);
+	    }
+	}
+    }
+}
+
 /* Expand basic block BB from GIMPLE trees to RTL.  */
 
 static basic_block
@@ -4905,6 +5047,7 @@ expand_gimple_basic_block (basic_block bb, bool disable_tail_calls)
      cannot use the gsi_*_bb() routines because they expect the basic
      block to be in GIMPLE, instead of RTL.  Therefore, we need to
      access the BB sequence directly.  */
+  reorder_operands (bb);
   stmts = bb_seq (bb);
   bb->il.gimple.seq = NULL;
   bb->il.gimple.phi_nodes = NULL;
diff --git a/gcc/common.opt b/gcc/common.opt
index da5250b..833bd10 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1848,6 +1848,10 @@ freorder-functions
 Common Report Var(flag_reorder_functions) Optimization
 Reorder functions to improve code placement
 
+freorder-operands
+Common Report Var(flag_reorder_operands) Optimization
+Perform operand reordering for commutative operations
+
 frerun-cse-after-loop
 Common Report Var(flag_rerun_cse_after_loop) Optimization
 Add a common subexpression elimination pass after loop optimizations
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ec3e056..23d5d95 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3829,6 +3829,10 @@ ix86_option_override_internal (bool main_args_p,
   if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
     opts->x_ix86_regparm = REGPARM_MAX;
 
+  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
+    if (opts->x_optimize >= 1 && !opts_set->x_flag_reorder_operands)
+      opts->x_flag_reorder_operands = 1;
+
   /* Default align_* from the processor table.  */
   if (opts->x_align_loops == 0)
     {
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 9f21c96..861da34 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -406,7 +406,7 @@ Objective-C and Objective-C++ Dialects}.
 -fprofile-generate=@var{path} @gol
 -fprofile-use -fprofile-use=@var{path} -fprofile-values -fprofile-reorder-functions @gol
 -freciprocal-math -free -frename-registers -freorder-blocks @gol
--freorder-blocks-and-partition -freorder-functions @gol
+-freorder-blocks-and-partition -freorder-functions  -freorder-operands @gol
 -frerun-cse-after-loop -freschedule-modulo-scheduled-loops @gol
 -frounding-math -fsched2-use-superblocks -fsched-pressure @gol
 -fsched-spec-load -fsched-spec-load-dangerous @gol
@@ -8662,6 +8662,13 @@ Also profile feedback must be available to make this option effective.  See
 
 Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}.
 
+@item -freorder-operands
+@opindex freorder-operands
+Reorder operands of commutative operations to compute heavier operand which
+requires more instructions for evaluation first to shrink register live range.
+
+Enabled for x86 32-bit targets at levels @option{-O2}, @option{-O3}.
+
 @item -fstrict-aliasing
 @opindex fstrict-aliasing
 Allow the compiler to assume the strictest aliasing rules applicable to
diff --git a/gcc/testsuite/gcc.target/i386/swap_opnd.c b/gcc/testsuite/gcc.target/i386/swap_opnd.c
new file mode 100644
index 0000000..5b0c11c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/swap_opnd.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ia32 } */
+/* { dg-options "-O2 -fdump-rtl-expand-details" } */
+#define N 256
+int a1[N], a2[N], a3[N], a4[N];
+
+void foo()
+{
+  int i;
+  for (i=0; i<N; i++) {
+    if (a3[i] != a1[i] + a2[i]) {
+       a4[i] += 1;
+       a1[i] = a2[i] - 1;
+    }
+  }
+}
+/* { dg-final { scan-rtl-dump-times "Swap operands" 1 "expand" } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH PR64434]
  2015-01-14 10:17 [PATCH PR64434] Yuri Rumyantsev
@ 2015-01-14 10:38 ` Jakub Jelinek
  2015-01-14 10:40   ` Yuri Rumyantsev
  0 siblings, 1 reply; 12+ messages in thread
From: Jakub Jelinek @ 2015-01-14 10:38 UTC (permalink / raw)
  To: Yuri Rumyantsev; +Cc: gcc-patches, Richard Biener, Igor Zamyatin

On Wed, Jan 14, 2015 at 01:12:20PM +0300, Yuri Rumyantsev wrote:
> Hi All,
> 
> Here is updated patch which was redesigned accordingly to Richard review.
> It performs swapping operands of commutative operations to expand the
> expensive one first.
> 
> Bootstrap and regression testing did not show any new failures.
> 
> Is it OK for trunk?

Haven't you just reposted the patch from December?  I don't see any changes
from then...

> gcc/ChangeLog
> 2015-01-14  Yuri Rumyantsev  <ysrumyan@gmail.com>
> 
> PR tree-optimization/64434
> * cfgexpand.c (reorder_operands): New function.
> (expand_gimple_basic_block): Insert call of reorder_operands.
> 
> gcc/testsuite/ChangeLog
> * gcc.dg/torture/pr64434.c: New test.

	Jakub

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH PR64434]
  2015-01-14 10:38 ` Jakub Jelinek
@ 2015-01-14 10:40   ` Yuri Rumyantsev
  2015-01-14 10:58     ` Jakub Jelinek
  0 siblings, 1 reply; 12+ messages in thread
From: Yuri Rumyantsev @ 2015-01-14 10:40 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Richard Biener, Igor Zamyatin

[-- Attachment #1: Type: text/plain, Size: 856 bytes --]

Sorry, I resend correct patch.

Yuri.

2015-01-14 13:23 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
> On Wed, Jan 14, 2015 at 01:12:20PM +0300, Yuri Rumyantsev wrote:
>> Hi All,
>>
>> Here is updated patch which was redesigned accordingly to Richard review.
>> It performs swapping operands of commutative operations to expand the
>> expensive one first.
>>
>> Bootstrap and regression testing did not show any new failures.
>>
>> Is it OK for trunk?
>
> Haven't you just reposted the patch from December?  I don't see any changes
> from then...
>
>> gcc/ChangeLog
>> 2015-01-14  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>
>> PR tree-optimization/64434
>> * cfgexpand.c (reorder_operands): New function.
>> (expand_gimple_basic_block): Insert call of reorder_operands.
>>
>> gcc/testsuite/ChangeLog
>> * gcc.dg/torture/pr64434.c: New test.
>
>         Jakub

[-- Attachment #2: patch --]
[-- Type: application/octet-stream, Size: 3830 bytes --]

Index: cfgexpand.c
===================================================================
--- cfgexpand.c	(revision 219439)
+++ cfgexpand.c	(working copy)
@@ -4964,6 +4964,89 @@
   flag_strict_aliasing = save_strict_alias;
 }
 
+/* Performs swapping operands of commutative operations to expand
+   the expensive one first.  */
+
+static void
+reorder_operands (basic_block bb)
+{
+  unsigned int *lattice;  /* Hold cost of each statement.  */
+  unsigned int i = 0, n = 0;
+  gimple_stmt_iterator gsi;
+  gimple_seq stmts;
+  gimple stmt;
+  bool swap;
+  tree op0, op1;
+  ssa_op_iter iter;
+  use_operand_p use_p;
+  enum tree_code code;
+  gimple def0, def1;
+
+  if (!optimize)
+    return;
+
+  /* Compute cost of each statement using estimate_num_insns.  */
+  stmts = bb_seq (bb);
+  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      stmt = gsi_stmt (gsi);
+      gimple_set_uid (stmt, n++);
+    }
+  lattice = XALLOCAVEC (unsigned, n);
+  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      unsigned cost;
+      stmt = gsi_stmt (gsi);
+      cost = estimate_num_insns (stmt, &eni_size_weights);
+      lattice[i] = cost;
+      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE | SSA_OP_VUSE)
+	{
+	  tree use = USE_FROM_PTR (use_p);
+	  gimple def_stmt;
+	  if (TREE_CODE (use) != SSA_NAME)
+	    continue;
+	  def_stmt = get_gimple_for_ssa_name (use);
+	  if (!def_stmt || gimple_bb (def_stmt) != bb)
+	    continue;
+	  lattice[i] += lattice[gimple_uid (def_stmt)];
+	}
+      i++;
+      if (gimple_code (stmt) != GIMPLE_ASSIGN
+	  || gimple_has_volatile_ops (stmt))
+	continue;
+      code = gimple_assign_rhs_code (stmt);
+      if (!commutative_tree_code (code))
+	continue;
+      gcc_assert (gimple_num_ops (stmt) == 3);
+      op0 = gimple_op (stmt, 1);
+      op1 = gimple_op (stmt, 2);
+      if (op0 ==NULL_TREE || op1 == NULL_TREE
+	  || TREE_CODE (op0) != SSA_NAME
+	  || TREE_CODE (op1) != SSA_NAME)
+	continue;
+      /* Swap operands if the second one is more expensive.  */
+      def0 = get_gimple_for_ssa_name (op0);
+      if (!def0)
+	continue;
+      def1 = get_gimple_for_ssa_name (op1);
+      if (!def1)
+	continue;
+      swap = false;
+      if (lattice[gimple_uid (def1)] > lattice[gimple_uid (def0)])
+	swap = true;
+      if (swap)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Swap operands in stmt:\n");
+	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+	    }
+	  gimple_set_op (stmt, 1, op1);
+	  gimple_set_op (stmt, 2, op0);
+	}
+    }
+}
+
 /* Expand basic block BB from GIMPLE trees to RTL.  */
 
 static basic_block
@@ -4985,6 +5068,7 @@
      cannot use the gsi_*_bb() routines because they expect the basic
      block to be in GIMPLE, instead of RTL.  Therefore, we need to
      access the BB sequence directly.  */
+  reorder_operands (bb);
   stmts = bb_seq (bb);
   bb->il.gimple.seq = NULL;
   bb->il.gimple.phi_nodes = NULL;
Index: testsuite/gcc.dg/torture/pr64434.c
===================================================================
--- testsuite/gcc.dg/torture/pr64434.c	(revision 0)
+++ testsuite/gcc.dg/torture/pr64434.c	(working copy)
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-rtl-expand-details" } */
+
+#define N 256
+int a1[N], a2[N], a3[N], a4[N];
+
+void foo ()
+{
+  int i;
+  for (i=0; i<N; i++) {
+    int c;
+    c = a3[i] + (a1[i] * a2[i]);
+    a4[i] = c + 1;
+    a1[i] = a2[i] - 1;
+  }
+}
+
+/* { dg-final { scan-rtl-dump-times "Swap operands" 1 "expand" } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */

Property changes on: testsuite/gcc.dg/torture/pr64434.c
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH PR64434]
  2015-01-14 10:40   ` Yuri Rumyantsev
@ 2015-01-14 10:58     ` Jakub Jelinek
  2015-01-14 11:14       ` Richard Biener
  0 siblings, 1 reply; 12+ messages in thread
From: Jakub Jelinek @ 2015-01-14 10:58 UTC (permalink / raw)
  To: Yuri Rumyantsev; +Cc: gcc-patches, Richard Biener, Igor Zamyatin

On Wed, Jan 14, 2015 at 01:32:13PM +0300, Yuri Rumyantsev wrote:
> Sorry, I resend correct patch.

> +reorder_operands (basic_block bb)
> +{
> +  unsigned int *lattice;  /* Hold cost of each statement.  */
> +  unsigned int i = 0, n = 0;
> +  gimple_stmt_iterator gsi;
> +  gimple_seq stmts;
> +  gimple stmt;
> +  bool swap;
> +  tree op0, op1;
> +  ssa_op_iter iter;
> +  use_operand_p use_p;
> +  enum tree_code code;
> +  gimple def0, def1;
> +
> +  if (!optimize)
> +    return;

Wouldn't it be better to move the !optimize guard to the caller?

> +  /* Compute cost of each statement using estimate_num_insns.  */
> +  stmts = bb_seq (bb);
> +  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
> +    {
> +      stmt = gsi_stmt (gsi);
> +      gimple_set_uid (stmt, n++);
> +    }
> +  lattice = XALLOCAVEC (unsigned, n);

I'd be afraid that for very large functions you'd ICE here, stack is far
more limited than heap on many hosts.  Either give up if n is say .5 million
or above, or allocate from heap in that case?

> +  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
> +    {
> +      unsigned cost;
> +      stmt = gsi_stmt (gsi);
> +      cost = estimate_num_insns (stmt, &eni_size_weights);
> +      lattice[i] = cost;
> +      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE | SSA_OP_VUSE)

Why the SSA_OP_VUSE?

> +      if (gimple_code (stmt) != GIMPLE_ASSIGN

!is_gimple_assign (stmt)
instead

> +      if (op0 ==NULL_TREE || op1 == NULL_TREE

Missing space after ==.

> +      /* Swap operands if the second one is more expensive.  */
> +      def0 = get_gimple_for_ssa_name (op0);
> +      if (!def0)
> +	continue;
> +      def1 = get_gimple_for_ssa_name (op1);
> +      if (!def1)
> +	continue;
> +      swap = false;

You don't check here if def0/def1 are from the same bb, is that guaranteed?

> +      if (swap)
> +	{
> +	  if (dump_file && (dump_flags & TDF_DETAILS))
> +	    {
> +	      fprintf (dump_file, "Swap operands in stmt:\n");
> +	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> +	    }
> +	  gimple_set_op (stmt, 1, op1);
> +	  gimple_set_op (stmt, 2, op0);

update_stmt (stmt); ?

> Index: testsuite/gcc.dg/torture/pr64434.c
> ===================================================================
> --- testsuite/gcc.dg/torture/pr64434.c	(revision 0)
> +++ testsuite/gcc.dg/torture/pr64434.c	(working copy)
> 
> Property changes on: testsuite/gcc.dg/torture/pr64434.c
> ___________________________________________________________________
> Added: svn:executable

Please don't make testcases executable.

> ## -0,0 +1 ##
> +*
> \ No newline at end of property

Please avoid these, terminate with newline.

	Jakub

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH PR64434]
  2015-01-14 10:58     ` Jakub Jelinek
@ 2015-01-14 11:14       ` Richard Biener
  2015-01-14 11:15         ` Jakub Jelinek
  0 siblings, 1 reply; 12+ messages in thread
From: Richard Biener @ 2015-01-14 11:14 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Yuri Rumyantsev, gcc-patches, Igor Zamyatin

On Wed, Jan 14, 2015 at 11:45 AM, Jakub Jelinek <jakub@redhat.com>
wrote:
> On Wed, Jan 14, 2015 at 01:32:13PM +0300, Yuri Rumyantsev wrote:
>> Sorry, I resend correct patch.
>
>> +reorder_operands (basic_block bb)
>> +{
>> +  unsigned int *lattice;  /* Hold cost of each statement.  */
>> +  unsigned int i = 0, n = 0;
>> +  gimple_stmt_iterator gsi;
>> +  gimple_seq stmts;
>> +  gimple stmt;
>> +  bool swap;
>> +  tree op0, op1;
>> +  ssa_op_iter iter;
>> +  use_operand_p use_p;
>> +  enum tree_code code;
>> +  gimple def0, def1;
>> +
>> +  if (!optimize)
>> +    return;
>
> Wouldn't it be better to move the !optimize guard to the caller?
>
>> +  /* Compute cost of each statement using estimate_num_insns.  */
>> +  stmts = bb_seq (bb);
>> +  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
>> +    {
>> +      stmt = gsi_stmt (gsi);
>> +      gimple_set_uid (stmt, n++);
>> +    }
>> +  lattice = XALLOCAVEC (unsigned, n);
>
> I'd be afraid that for very large functions you'd ICE here, stack is far
> more limited than heap on many hosts.  Either give up if n is say .5 million
> or above, or allocate from heap in that case?
>
>> +  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
>> +    {
>> +      unsigned cost;
>> +      stmt = gsi_stmt (gsi);
>> +      cost = estimate_num_insns (stmt, &eni_size_weights);
>> +      lattice[i] = cost;
>> +      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE | SSA_OP_VUSE)
>
> Why the SSA_OP_VUSE?

Shouldn't be needed.

>> +      if (gimple_code (stmt) != GIMPLE_ASSIGN
>
> !is_gimple_assign (stmt)
> instead
>
>> +      if (op0 ==NULL_TREE || op1 == NULL_TREE
>
> Missing space after ==.
>
>> +      /* Swap operands if the second one is more expensive.  */
>> +      def0 = get_gimple_for_ssa_name (op0);
>> +      if (!def0)
>> +     continue;
>> +      def1 = get_gimple_for_ssa_name (op1);
>> +      if (!def1)
>> +     continue;
>> +      swap = false;
>
> You don't check here if def0/def1 are from the same bb, is that guaranteed?

I think so - we only TER inside BBs.

>> +      if (swap)
>> +     {
>> +       if (dump_file && (dump_flags & TDF_DETAILS))
>> +         {
>> +           fprintf (dump_file, "Swap operands in stmt:\n");
>> +           print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
>> +         }
>> +       gimple_set_op (stmt, 1, op1);
>> +       gimple_set_op (stmt, 2, op0);
>
> update_stmt (stmt); ?

Or rather swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
gimple_assign_rhs2_ptr (stmt))

>> Index: testsuite/gcc.dg/torture/pr64434.c
>> ===================================================================
>> --- testsuite/gcc.dg/torture/pr64434.c        (revision 0)
>> +++ testsuite/gcc.dg/torture/pr64434.c        (working copy)
>>
>> Property changes on: testsuite/gcc.dg/torture/pr64434.c
>> ___________________________________________________________________
>> Added: svn:executable
>
> Please don't make testcases executable.
>
>> ## -0,0 +1 ##
>> +*
>> \ No newline at end of property
>
> Please avoid these, terminate with newline.
>
>         Jakub

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH PR64434]
  2015-01-14 11:14       ` Richard Biener
@ 2015-01-14 11:15         ` Jakub Jelinek
  2015-01-14 13:37           ` Yuri Rumyantsev
  0 siblings, 1 reply; 12+ messages in thread
From: Jakub Jelinek @ 2015-01-14 11:15 UTC (permalink / raw)
  To: Richard Biener; +Cc: Yuri Rumyantsev, gcc-patches, Igor Zamyatin

On Wed, Jan 14, 2015 at 11:58:50AM +0100, Richard Biener wrote:
> >> +      /* Swap operands if the second one is more expensive.  */
> >> +      def0 = get_gimple_for_ssa_name (op0);
> >> +      if (!def0)
> >> +     continue;
> >> +      def1 = get_gimple_for_ssa_name (op1);
> >> +      if (!def1)
> >> +     continue;
> >> +      swap = false;
> >
> > You don't check here if def0/def1 are from the same bb, is that guaranteed?
> 
> I think so - we only TER inside BBs.

But then why to check for it a few lines above:

+         def_stmt = get_gimple_for_ssa_name (use);
+         if (!def_stmt || gimple_bb (def_stmt) != bb)

If get_gimple_for_ssa_name != NULL guarantees that gimple_bb of the result == bb, then
even the || gimple_bb (def_stmt) != bb shouldn't be needed.

	Jakub

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH PR64434]
  2015-01-14 11:15         ` Jakub Jelinek
@ 2015-01-14 13:37           ` Yuri Rumyantsev
  2015-01-14 13:49             ` Jakub Jelinek
  0 siblings, 1 reply; 12+ messages in thread
From: Yuri Rumyantsev @ 2015-01-14 13:37 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Biener, gcc-patches, Igor Zamyatin

[-- Attachment #1: Type: text/plain, Size: 1150 bytes --]

Hi All,

I did all changes proposed by Richard and delete check on def in the
same block as Jakub proposed.
I also moved check  on optimization to call site..

I also checked that bootstrap and regression testing did not show any
new failures.

Is it OK for trunk?

2015-01-14 14:02 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
> On Wed, Jan 14, 2015 at 11:58:50AM +0100, Richard Biener wrote:
>> >> +      /* Swap operands if the second one is more expensive.  */
>> >> +      def0 = get_gimple_for_ssa_name (op0);
>> >> +      if (!def0)
>> >> +     continue;
>> >> +      def1 = get_gimple_for_ssa_name (op1);
>> >> +      if (!def1)
>> >> +     continue;
>> >> +      swap = false;
>> >
>> > You don't check here if def0/def1 are from the same bb, is that guaranteed?
>>
>> I think so - we only TER inside BBs.
>
> But then why to check for it a few lines above:
>
> +         def_stmt = get_gimple_for_ssa_name (use);
> +         if (!def_stmt || gimple_bb (def_stmt) != bb)
>
> If get_gimple_for_ssa_name != NULL guarantees that gimple_bb of the result == bb, then
> even the || gimple_bb (def_stmt) != bb shouldn't be needed.
>
>         Jakub

[-- Attachment #2: patch --]
[-- Type: application/octet-stream, Size: 3809 bytes --]

Index: cfgexpand.c
===================================================================
--- cfgexpand.c	(revision 219439)
+++ cfgexpand.c	(working copy)
@@ -4964,6 +4964,86 @@
   flag_strict_aliasing = save_strict_alias;
 }
 
+/* Performs swapping operands of commutative operations to expand
+   the expensive one first.  */
+
+static void
+reorder_operands (basic_block bb)
+{
+  unsigned int *lattice;  /* Hold cost of each statement.  */
+  unsigned int i = 0, n = 0;
+  gimple_stmt_iterator gsi;
+  gimple_seq stmts;
+  gimple stmt;
+  bool swap;
+  tree op0, op1;
+  ssa_op_iter iter;
+  use_operand_p use_p;
+  enum tree_code code;
+  gimple def0, def1;
+
+  /* Compute cost of each statement using estimate_num_insns.  */
+  stmts = bb_seq (bb);
+  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      stmt = gsi_stmt (gsi);
+      gimple_set_uid (stmt, n++);
+    }
+  lattice = XALLOCAVEC (unsigned, n);
+  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      unsigned cost;
+      stmt = gsi_stmt (gsi);
+      cost = estimate_num_insns (stmt, &eni_size_weights);
+      lattice[i] = cost;
+      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE | SSA_OP_VUSE)
+	{
+	  tree use = USE_FROM_PTR (use_p);
+	  gimple def_stmt;
+	  if (TREE_CODE (use) != SSA_NAME)
+	    continue;
+	  def_stmt = get_gimple_for_ssa_name (use);
+	  if (!def_stmt)
+	    continue;
+	  lattice[i] += lattice[gimple_uid (def_stmt)];
+	}
+      i++;
+      if (!is_gimple_assign (stmt)
+	  || gimple_has_volatile_ops (stmt))
+	continue;
+      code = gimple_assign_rhs_code (stmt);
+      if (!commutative_tree_code (code))
+	continue;
+      gcc_assert (gimple_num_ops (stmt) == 3);
+      op0 = gimple_op (stmt, 1);
+      op1 = gimple_op (stmt, 2);
+      if (op0 == NULL_TREE || op1 == NULL_TREE
+	  || TREE_CODE (op0) != SSA_NAME
+	  || TREE_CODE (op1) != SSA_NAME)
+	continue;
+      /* Swap operands if the second one is more expensive.  */
+      def0 = get_gimple_for_ssa_name (op0);
+      if (!def0)
+	continue;
+      def1 = get_gimple_for_ssa_name (op1);
+      if (!def1)
+	continue;
+      swap = false;
+      if (lattice[gimple_uid (def1)] > lattice[gimple_uid (def0)])
+	swap = true;
+      if (swap)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Swap operands in stmt:\n");
+	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+	    }
+	  swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+			     gimple_assign_rhs2_ptr (stmt));
+	}
+    }
+}
+
 /* Expand basic block BB from GIMPLE trees to RTL.  */
 
 static basic_block
@@ -4985,6 +5065,8 @@
      cannot use the gsi_*_bb() routines because they expect the basic
      block to be in GIMPLE, instead of RTL.  Therefore, we need to
      access the BB sequence directly.  */
+  if (optimize)
+    reorder_operands (bb);
   stmts = bb_seq (bb);
   bb->il.gimple.seq = NULL;
   bb->il.gimple.phi_nodes = NULL;
Index: testsuite/gcc.dg/torture/pr64434.c
===================================================================
--- testsuite/gcc.dg/torture/pr64434.c	(revision 0)
+++ testsuite/gcc.dg/torture/pr64434.c	(working copy)
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-rtl-expand-details" } */
+
+#define N 256
+int a1[N], a2[N], a3[N], a4[N];
+
+void foo ()
+{
+  int i;
+  for (i=0; i<N; i++) {
+    int c;
+    c = a3[i] + (a1[i] * a2[i]);
+    a4[i] = c + 1;
+    a1[i] = a2[i] - 1;
+  }
+}
+
+/* { dg-final { scan-rtl-dump-times "Swap operands" 1 "expand" } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */

Property changes on: testsuite/gcc.dg/torture/pr64434.c
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH PR64434]
  2015-01-14 13:37           ` Yuri Rumyantsev
@ 2015-01-14 13:49             ` Jakub Jelinek
  2015-01-14 14:07               ` Richard Biener
  2015-01-14 14:26               ` Yuri Rumyantsev
  0 siblings, 2 replies; 12+ messages in thread
From: Jakub Jelinek @ 2015-01-14 13:49 UTC (permalink / raw)
  To: Yuri Rumyantsev; +Cc: Richard Biener, gcc-patches, Igor Zamyatin

On Wed, Jan 14, 2015 at 04:28:42PM +0300, Yuri Rumyantsev wrote:
> Hi All,
> 
> I did all changes proposed by Richard and delete check on def in the
> same block as Jakub proposed.
> I also moved check  on optimization to call site..
> 
> I also checked that bootstrap and regression testing did not show any
> new failures.
> 
> Is it OK for trunk?

The  | SSA_OP_VUSE is still in there, the testcase is still executable,
still doesn't end with newline, and I really think you should replace
  lattice = XALLOCAVEC (unsigned int, n);
with something like:
  if (n >= 100000)
    lattice = XNEWVEC (unsigned int, n);
  else
    lattice = XALLOCAVEC (unsigned int, n);
...
  if (n >= 100000)
    XDELETE (lattice);
or similar.

	Jakub

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH PR64434]
  2015-01-14 13:49             ` Jakub Jelinek
@ 2015-01-14 14:07               ` Richard Biener
  2015-01-14 14:26               ` Yuri Rumyantsev
  1 sibling, 0 replies; 12+ messages in thread
From: Richard Biener @ 2015-01-14 14:07 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Yuri Rumyantsev, gcc-patches, Igor Zamyatin

On Wed, Jan 14, 2015 at 2:33 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Wed, Jan 14, 2015 at 04:28:42PM +0300, Yuri Rumyantsev wrote:
>> Hi All,
>>
>> I did all changes proposed by Richard and delete check on def in the
>> same block as Jakub proposed.
>> I also moved check  on optimization to call site..
>>
>> I also checked that bootstrap and regression testing did not show any
>> new failures.
>>
>> Is it OK for trunk?
>
> The  | SSA_OP_VUSE is still in there, the testcase is still executable,
> still doesn't end with newline, and I really think you should replace
>   lattice = XALLOCAVEC (unsigned int, n);
> with something like:
>   if (n >= 100000)
>     lattice = XNEWVEC (unsigned int, n);
>   else
>     lattice = XALLOCAVEC (unsigned int, n);
> ...
>   if (n >= 100000)
>     XDELETE (lattice);
> or similar.

Just unconditionally allocate from the heap.

Richard.

>
>         Jakub

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH PR64434]
  2015-01-14 13:49             ` Jakub Jelinek
  2015-01-14 14:07               ` Richard Biener
@ 2015-01-14 14:26               ` Yuri Rumyantsev
  2015-01-15 10:13                 ` Yuri Rumyantsev
  1 sibling, 1 reply; 12+ messages in thread
From: Yuri Rumyantsev @ 2015-01-14 14:26 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Biener, gcc-patches, Igor Zamyatin

[-- Attachment #1: Type: text/plain, Size: 978 bytes --]

Jakub,

I did all changes requested by you.

Here is updated patch.

BTW I thought that gcc performs splitting of blocks with huge  size.


2015-01-14 16:33 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
> On Wed, Jan 14, 2015 at 04:28:42PM +0300, Yuri Rumyantsev wrote:
>> Hi All,
>>
>> I did all changes proposed by Richard and delete check on def in the
>> same block as Jakub proposed.
>> I also moved check  on optimization to call site..
>>
>> I also checked that bootstrap and regression testing did not show any
>> new failures.
>>
>> Is it OK for trunk?
>
> The  | SSA_OP_VUSE is still in there, the testcase is still executable,
> still doesn't end with newline, and I really think you should replace
>   lattice = XALLOCAVEC (unsigned int, n);
> with something like:
>   if (n >= 100000)
>     lattice = XNEWVEC (unsigned int, n);
>   else
>     lattice = XALLOCAVEC (unsigned int, n);
> ...
>   if (n >= 100000)
>     XDELETE (lattice);
> or similar.
>
>         Jakub

[-- Attachment #2: patch1 --]
[-- Type: application/octet-stream, Size: 3718 bytes --]

Index: cfgexpand.c
===================================================================
--- cfgexpand.c	(revision 219439)
+++ cfgexpand.c	(working copy)
@@ -4964,6 +4964,91 @@
   flag_strict_aliasing = save_strict_alias;
 }
 
+/* Performs swapping operands of commutative operations to expand
+   the expensive one first.  */
+
+static void
+reorder_operands (basic_block bb)
+{
+  unsigned int *lattice;  /* Hold cost of each statement.  */
+  unsigned int i = 0, n = 0;
+  gimple_stmt_iterator gsi;
+  gimple_seq stmts;
+  gimple stmt;
+  bool swap;
+  tree op0, op1;
+  ssa_op_iter iter;
+  use_operand_p use_p;
+  enum tree_code code;
+  gimple def0, def1;
+
+  /* Compute cost of each statement using estimate_num_insns.  */
+  stmts = bb_seq (bb);
+  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      stmt = gsi_stmt (gsi);
+      gimple_set_uid (stmt, n++);
+    }
+  if (n >= 10000)
+    lattice = XNEWVEC (unsigned int, n);
+  else
+    lattice = XALLOCAVEC (unsigned, n);
+  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      unsigned cost;
+      stmt = gsi_stmt (gsi);
+      cost = estimate_num_insns (stmt, &eni_size_weights);
+      lattice[i] = cost;
+      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
+	{
+	  tree use = USE_FROM_PTR (use_p);
+	  gimple def_stmt;
+	  if (TREE_CODE (use) != SSA_NAME)
+	    continue;
+	  def_stmt = get_gimple_for_ssa_name (use);
+	  if (!def_stmt)
+	    continue;
+	  lattice[i] += lattice[gimple_uid (def_stmt)];
+	}
+      i++;
+      if (!is_gimple_assign (stmt)
+	  || gimple_has_volatile_ops (stmt))
+	continue;
+      code = gimple_assign_rhs_code (stmt);
+      if (!commutative_tree_code (code))
+	continue;
+      gcc_assert (gimple_num_ops (stmt) == 3);
+      op0 = gimple_op (stmt, 1);
+      op1 = gimple_op (stmt, 2);
+      if (op0 == NULL_TREE || op1 == NULL_TREE
+	  || TREE_CODE (op0) != SSA_NAME
+	  || TREE_CODE (op1) != SSA_NAME)
+	continue;
+      /* Swap operands if the second one is more expensive.  */
+      def0 = get_gimple_for_ssa_name (op0);
+      if (!def0)
+	continue;
+      def1 = get_gimple_for_ssa_name (op1);
+      if (!def1)
+	continue;
+      swap = false;
+      if (lattice[gimple_uid (def1)] > lattice[gimple_uid (def0)])
+	swap = true;
+      if (swap)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Swap operands in stmt:\n");
+	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+	    }
+	  swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+			     gimple_assign_rhs2_ptr (stmt));
+	}
+    }
+  if (n >= 10000)
+    XDELETE (lattice);
+}
+
 /* Expand basic block BB from GIMPLE trees to RTL.  */
 
 static basic_block
@@ -4985,6 +5070,8 @@
      cannot use the gsi_*_bb() routines because they expect the basic
      block to be in GIMPLE, instead of RTL.  Therefore, we need to
      access the BB sequence directly.  */
+  if (optimize)
+    reorder_operands (bb);
   stmts = bb_seq (bb);
   bb->il.gimple.seq = NULL;
   bb->il.gimple.phi_nodes = NULL;
Index: testsuite/gcc.dg/torture/pr64434.c
===================================================================
--- testsuite/gcc.dg/torture/pr64434.c	(revision 0)
+++ testsuite/gcc.dg/torture/pr64434.c	(working copy)
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-rtl-expand-details" } */
+
+#define N 256
+int a1[N], a2[N], a3[N], a4[N];
+
+void foo ()
+{
+  int i;
+  for (i=0; i<N; i++) {
+    int c;
+    c = a3[i] + (a1[i] * a2[i]);
+    a4[i] = c + 1;
+    a1[i] = a2[i] - 1;
+  }
+}
+
+/* { dg-final { scan-rtl-dump-times "Swap operands" 1 "expand" } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH PR64434]
  2015-01-14 14:26               ` Yuri Rumyantsev
@ 2015-01-15 10:13                 ` Yuri Rumyantsev
  2015-01-15 11:17                   ` Richard Biener
  0 siblings, 1 reply; 12+ messages in thread
From: Yuri Rumyantsev @ 2015-01-15 10:13 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Biener, gcc-patches, Igor Zamyatin

[-- Attachment #1: Type: text/plain, Size: 1541 bytes --]

Hi All,

I did a change proposed by Richard - unconditionally allocate from the heap.

Bootstrap and regression testing did not show any new failures.

Is it OK for trunk?

ChangeLog

2015-01-15  Yuri Rumyantsev  <ysrumyan@gmail.com>

PR tree-optimization/64434
* cfgexpand.c (reorder_operands): New function.
(expand_gimple_basic_block): Insert call of reorder_operands if
optimized is true.

gcc/testsuite/ChangeLog
* gcc.dg/torture/pr64434.c: New test.

2015-01-14 17:07 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
> Jakub,
>
> I did all changes requested by you.
>
> Here is updated patch.
>
> BTW I thought that gcc performs splitting of blocks with huge  size.
>
>
> 2015-01-14 16:33 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
>> On Wed, Jan 14, 2015 at 04:28:42PM +0300, Yuri Rumyantsev wrote:
>>> Hi All,
>>>
>>> I did all changes proposed by Richard and delete check on def in the
>>> same block as Jakub proposed.
>>> I also moved check  on optimization to call site..
>>>
>>> I also checked that bootstrap and regression testing did not show any
>>> new failures.
>>>
>>> Is it OK for trunk?
>>
>> The  | SSA_OP_VUSE is still in there, the testcase is still executable,
>> still doesn't end with newline, and I really think you should replace
>>   lattice = XALLOCAVEC (unsigned int, n);
>> with something like:
>>   if (n >= 100000)
>>     lattice = XNEWVEC (unsigned int, n);
>>   else
>>     lattice = XALLOCAVEC (unsigned int, n);
>> ...
>>   if (n >= 100000)
>>     XDELETE (lattice);
>> or similar.
>>
>>         Jakub

[-- Attachment #2: patch2 --]
[-- Type: application/octet-stream, Size: 3626 bytes --]

Index: cfgexpand.c
===================================================================
--- cfgexpand.c	(revision 219439)
+++ cfgexpand.c	(working copy)
@@ -4964,6 +4964,87 @@
   flag_strict_aliasing = save_strict_alias;
 }
 
+/* Performs swapping operands of commutative operations to expand
+   the expensive one first.  */
+
+static void
+reorder_operands (basic_block bb)
+{
+  unsigned int *lattice;  /* Hold cost of each statement.  */
+  unsigned int i = 0, n = 0;
+  gimple_stmt_iterator gsi;
+  gimple_seq stmts;
+  gimple stmt;
+  bool swap;
+  tree op0, op1;
+  ssa_op_iter iter;
+  use_operand_p use_p;
+  enum tree_code code;
+  gimple def0, def1;
+
+  /* Compute cost of each statement using estimate_num_insns.  */
+  stmts = bb_seq (bb);
+  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      stmt = gsi_stmt (gsi);
+      gimple_set_uid (stmt, n++);
+    }
+  lattice = XNEWVEC (unsigned int, n);
+  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      unsigned cost;
+      stmt = gsi_stmt (gsi);
+      cost = estimate_num_insns (stmt, &eni_size_weights);
+      lattice[i] = cost;
+      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
+	{
+	  tree use = USE_FROM_PTR (use_p);
+	  gimple def_stmt;
+	  if (TREE_CODE (use) != SSA_NAME)
+	    continue;
+	  def_stmt = get_gimple_for_ssa_name (use);
+	  if (!def_stmt)
+	    continue;
+	  lattice[i] += lattice[gimple_uid (def_stmt)];
+	}
+      i++;
+      if (!is_gimple_assign (stmt)
+	  || gimple_has_volatile_ops (stmt))
+	continue;
+      code = gimple_assign_rhs_code (stmt);
+      if (!commutative_tree_code (code))
+	continue;
+      gcc_assert (gimple_num_ops (stmt) == 3);
+      op0 = gimple_op (stmt, 1);
+      op1 = gimple_op (stmt, 2);
+      if (op0 == NULL_TREE || op1 == NULL_TREE
+	  || TREE_CODE (op0) != SSA_NAME
+	  || TREE_CODE (op1) != SSA_NAME)
+	continue;
+      /* Swap operands if the second one is more expensive.  */
+      def0 = get_gimple_for_ssa_name (op0);
+      if (!def0)
+	continue;
+      def1 = get_gimple_for_ssa_name (op1);
+      if (!def1)
+	continue;
+      swap = false;
+      if (lattice[gimple_uid (def1)] > lattice[gimple_uid (def0)])
+	swap = true;
+      if (swap)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Swap operands in stmt:\n");
+	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+	    }
+	  swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+			     gimple_assign_rhs2_ptr (stmt));
+	}
+    }
+  XDELETE (lattice);
+}
+
 /* Expand basic block BB from GIMPLE trees to RTL.  */
 
 static basic_block
@@ -4985,6 +5066,8 @@
      cannot use the gsi_*_bb() routines because they expect the basic
      block to be in GIMPLE, instead of RTL.  Therefore, we need to
      access the BB sequence directly.  */
+  if (optimize)
+    reorder_operands (bb);
   stmts = bb_seq (bb);
   bb->il.gimple.seq = NULL;
   bb->il.gimple.phi_nodes = NULL;
Index: testsuite/gcc.dg/torture/pr64434.c
===================================================================
--- testsuite/gcc.dg/torture/pr64434.c	(revision 0)
+++ testsuite/gcc.dg/torture/pr64434.c	(working copy)
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-rtl-expand-details" } */
+
+#define N 256
+int a1[N], a2[N], a3[N], a4[N];
+
+void foo ()
+{
+  int i;
+  for (i=0; i<N; i++) {
+    int c;
+    c = a3[i] + (a1[i] * a2[i]);
+    a4[i] = c + 1;
+    a1[i] = a2[i] - 1;
+  }
+}
+
+/* { dg-final { scan-rtl-dump-times "Swap operands" 1 "expand" } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH PR64434]
  2015-01-15 10:13                 ` Yuri Rumyantsev
@ 2015-01-15 11:17                   ` Richard Biener
  0 siblings, 0 replies; 12+ messages in thread
From: Richard Biener @ 2015-01-15 11:17 UTC (permalink / raw)
  To: Yuri Rumyantsev; +Cc: Jakub Jelinek, gcc-patches, Igor Zamyatin

On Thu, Jan 15, 2015 at 10:39 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
> Hi All,
>
> I did a change proposed by Richard - unconditionally allocate from the heap.
>
> Bootstrap and regression testing did not show any new failures.
>
> Is it OK for trunk?

+      if (!is_gimple_assign (stmt)
+         || gimple_has_volatile_ops (stmt))
+       continue;
+      code = gimple_assign_rhs_code (stmt);
+      if (!commutative_tree_code (code))
+       continue;
+      gcc_assert (gimple_num_ops (stmt) == 3);
+      op0 = gimple_op (stmt, 1);
+      op1 = gimple_op (stmt, 2);
+      if (op0 == NULL_TREE || op1 == NULL_TREE
+         || TREE_CODE (op0) != SSA_NAME
+         || TREE_CODE (op1) != SSA_NAME)
+       continue;

you can simplify this to

         if (!is_gimple_assign (stmt)
            || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
           continue;
         op0 = gimple_assign_rhs1 (stmt);
         op1 = gimple_assign_rhs2 (Stmt);
         if (TREE_CODE (op0) != SSA_NAME
             || TREE_CODE (op1) != SSA_NAME)
           continue;

Please output the computed costs for both operands in

+         if (dump_file && (dump_flags & TDF_DETAILS))
+           {
+             fprintf (dump_file, "Swap operands in stmt:\n");
+             print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+           }

as that will help debugging if odd things happen.

Ok with that changes.  I belive this may fix some duplicate bugs
as well.

Thanks,
Richard.


> ChangeLog
>
> 2015-01-15  Yuri Rumyantsev  <ysrumyan@gmail.com>
>
> PR tree-optimization/64434
> * cfgexpand.c (reorder_operands): New function.
> (expand_gimple_basic_block): Insert call of reorder_operands if
> optimized is true.
>
> gcc/testsuite/ChangeLog
> * gcc.dg/torture/pr64434.c: New test.
>
> 2015-01-14 17:07 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>> Jakub,
>>
>> I did all changes requested by you.
>>
>> Here is updated patch.
>>
>> BTW I thought that gcc performs splitting of blocks with huge  size.
>>
>>
>> 2015-01-14 16:33 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
>>> On Wed, Jan 14, 2015 at 04:28:42PM +0300, Yuri Rumyantsev wrote:
>>>> Hi All,
>>>>
>>>> I did all changes proposed by Richard and delete check on def in the
>>>> same block as Jakub proposed.
>>>> I also moved check  on optimization to call site..
>>>>
>>>> I also checked that bootstrap and regression testing did not show any
>>>> new failures.
>>>>
>>>> Is it OK for trunk?
>>>
>>> The  | SSA_OP_VUSE is still in there, the testcase is still executable,
>>> still doesn't end with newline, and I really think you should replace
>>>   lattice = XALLOCAVEC (unsigned int, n);
>>> with something like:
>>>   if (n >= 100000)
>>>     lattice = XNEWVEC (unsigned int, n);
>>>   else
>>>     lattice = XALLOCAVEC (unsigned int, n);
>>> ...
>>>   if (n >= 100000)
>>>     XDELETE (lattice);
>>> or similar.
>>>
>>>         Jakub

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2015-01-15 10:48 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-14 10:17 [PATCH PR64434] Yuri Rumyantsev
2015-01-14 10:38 ` Jakub Jelinek
2015-01-14 10:40   ` Yuri Rumyantsev
2015-01-14 10:58     ` Jakub Jelinek
2015-01-14 11:14       ` Richard Biener
2015-01-14 11:15         ` Jakub Jelinek
2015-01-14 13:37           ` Yuri Rumyantsev
2015-01-14 13:49             ` Jakub Jelinek
2015-01-14 14:07               ` Richard Biener
2015-01-14 14:26               ` Yuri Rumyantsev
2015-01-15 10:13                 ` Yuri Rumyantsev
2015-01-15 11:17                   ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).