public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Remove inefficient branchless conditional negate optimization
@ 2015-02-26 18:06 Wilco Dijkstra
  2015-02-26 22:48 ` Jeff Law
  0 siblings, 1 reply; 10+ messages in thread
From: Wilco Dijkstra @ 2015-02-26 18:06 UTC (permalink / raw)
  To: gcc-patches

Several GCC versions ago a conditional negate optimization was introduced as a workaround for
PR45685. However the branchless expansion for conditional negate is extremely inefficient on most
targets (5 sequentially dependent instructions rather than 2 on AArch64). Since the underlying issue
has been resolved (the example in PR45685 no longer generates a branch on x64), remove the
workaround so that conditional negates are treated in exactly the same way as conditional invert,
add, subtract, and, orr, xor etc. Simple example:

int f(int x) { if (x > 3) x = -x; return x; }

Before:
        cmp     w0, 3
        cset    w2, gt
        neg     w1, w2
        eor     w0, w0, w1
        add     w0, w2, w0
        ret

        xorl    %ecx, %ecx
        cmpl    $3, %edi
        setg    %cl
        movl    %ecx, %edx
        negl    %edx
        xorl    %edx, %edi
        leal    (%rcx,%rdi), %eax
        ret

After:
        cmp     w0, 4
        csneg   w0, w0, w0, lt
        ret

        movl    %edi, %edx
        movl    %edi, %eax
        negl    %edx
        cmpl    $4, %edi
        cmovge  %edx, %eax
        ret

ChangeLog: 
2015-02-26  Wilco Dijkstra  wdijkstr@arm.com

	* gcc/tree-ssa-phiopt.c (neg_replacement): Remove.
	(tree_ssa_phiopt_worker): Remove negate optimization.

---
 gcc/tree-ssa-phiopt.c | 156 --------------------------------------------------
 1 file changed, 156 deletions(-)

diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index bad546d..38a59ab 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -96,8 +96,6 @@ static bool minmax_replacement (basic_block, basic_block,
 				edge, edge, gimple, tree, tree);
 static bool abs_replacement (basic_block, basic_block,
 			     edge, edge, gimple, tree, tree);
-static bool neg_replacement (basic_block, basic_block,
-			     edge, edge, gimple, tree, tree);
 static bool cond_store_replacement (basic_block, basic_block, edge, edge,
 				    hash_set<tree> *);
 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
@@ -209,23 +207,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
     /* Calculate the set of non-trapping memory accesses.  */
     nontrap = get_non_trapping ();
 
-  /* The replacement of conditional negation with a non-branching
-     sequence is really only a win when optimizing for speed and we
-     can avoid transformations by gimple if-conversion that result
-     in poor RTL generation.
-
-     Ideally either gimple if-conversion or the RTL expanders will
-     be improved and the code to emit branchless conditional negation
-     can be removed.  */
-  bool replace_conditional_negation = false;
-  if (!do_store_elim)
-    replace_conditional_negation
-      = ((!optimize_size && optimize >= 2)
-	 || (((flag_tree_loop_vectorize || cfun->has_force_vectorize_loops)
-	      && flag_tree_loop_if_convert != 0)
-	     || flag_tree_loop_if_convert == 1
-	     || flag_tree_loop_if_convert_stores == 1));
-
   /* Search every basic block for COND_EXPR we may be able to optimize.
 
      We walk the blocks in order that guarantees that a block with
@@ -380,9 +361,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
 	    cfgchanged = true;
 	  else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
 	    cfgchanged = true;
-	  else if (replace_conditional_negation
-		   && neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
-	    cfgchanged = true;
 	  else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
 	    cfgchanged = true;
 	}
@@ -1294,140 +1272,6 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
   return true;
 }
 
-/*  The function neg_replacement replaces conditional negation with
-    equivalent straight line code.  Returns TRUE if replacement is done,
-    otherwise returns FALSE.
-
-    COND_BB branches around negation occuring in MIDDLE_BB.
-
-    E0 and E1 are edges out of COND_BB.  E0 reaches MIDDLE_BB and
-    E1 reaches the other successor which should contain PHI with
-    arguments ARG0 and ARG1.
-
-    Assuming negation is to occur when the condition is true,
-    then the non-branching sequence is:
-
-       result = (rhs ^ -cond) + cond
-
-    Inverting the condition or its result gives us negation
-    when the original condition is false.  */
-
-static bool
-neg_replacement (basic_block cond_bb, basic_block middle_bb,
-		 edge e0 ATTRIBUTE_UNUSED, edge e1,
-		 gimple phi, tree arg0, tree arg1)
-{
-  gimple new_stmt, cond;
-  gimple_stmt_iterator gsi;
-  gimple assign;
-  edge true_edge, false_edge;
-  tree rhs, lhs;
-  enum tree_code cond_code;
-  bool invert = false;
-
-  /* This transformation performs logical operations on the
-     incoming arguments.  So force them to be integral types.   */
-  if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
-    return false;
-
-  /* OTHER_BLOCK must have only one executable statement which must have the
-     form arg0 = -arg1 or arg1 = -arg0.  */
-
-  assign = last_and_only_stmt (middle_bb);
-  /* If we did not find the proper negation assignment, then we can not
-     optimize.  */
-  if (assign == NULL)
-    return false;
-
-  /* If we got here, then we have found the only executable statement
-     in OTHER_BLOCK.  If it is anything other than arg0 = -arg1 or
-     arg1 = -arg0, then we can not optimize.  */
-  if (gimple_code (assign) != GIMPLE_ASSIGN)
-    return false;
-
-  lhs = gimple_assign_lhs (assign);
-
-  if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
-    return false;
-
-  rhs = gimple_assign_rhs1 (assign);
-
-  /* The assignment has to be arg0 = -arg1 or arg1 = -arg0.  */
-  if (!(lhs == arg0 && rhs == arg1)
-      && !(lhs == arg1 && rhs == arg0))
-    return false;
-
-  /* The basic sequence assumes we negate when the condition is true.
-     If we need the opposite, then we will either need to invert the
-     condition or its result.  */
-  extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
-  invert = false_edge->dest == middle_bb;
-
-  /* Unlike abs_replacement, we can handle arbitrary conditionals here.  */
-  cond = last_stmt (cond_bb);
-  cond_code = gimple_cond_code (cond);
-
-  /* If inversion is needed, first try to invert the test since
-     that's cheapest.  */
-  if (invert)
-    {
-      bool honor_nans = HONOR_NANS (gimple_cond_lhs (cond));
-      enum tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
-
-      /* If invert_tree_comparison was successful, then use its return
-	 value as the new code and note that inversion is no longer
-	 needed.  */
-      if (new_code != ERROR_MARK)
-	{
-	  cond_code = new_code;
-	  invert = false;
-	}
-    }
-
-  tree cond_val = make_ssa_name (boolean_type_node);
-  new_stmt = gimple_build_assign (cond_val, cond_code,
-				  gimple_cond_lhs (cond),
-				  gimple_cond_rhs (cond));
-  gsi = gsi_last_bb (cond_bb);
-  gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
-
-  /* If we still need inversion, then invert the result of the
-     condition.  */
-  if (invert)
-    {
-      tree tmp = make_ssa_name (boolean_type_node);
-      new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, cond_val,
-				      boolean_true_node);
-      gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-      cond_val = tmp;
-    }
-
-  /* Get the condition in the right type so that we can perform
-     logical and arithmetic operations on it.  */
-  tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
-  new_stmt = gimple_build_assign (cond_val_converted, NOP_EXPR, cond_val);
-  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
-  tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
-  new_stmt = gimple_build_assign (neg_cond_val_converted, NEGATE_EXPR,
-				  cond_val_converted);
-  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
-  tree tmp = make_ssa_name (TREE_TYPE (rhs));
-  new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, rhs,
-				  neg_cond_val_converted);
-  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
-  tree new_lhs = make_ssa_name (TREE_TYPE (rhs));
-  new_stmt = gimple_build_assign (new_lhs, PLUS_EXPR, tmp, cond_val_converted);
-  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
-  replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs);
-
-  /* Note that we optimized this PHI.  */
-  return true;
-}
-
 /* Auxiliary functions to determine the set of memory accesses which
    can't trap because they are preceded by accesses to the same memory
    portion.  We do that for MEM_REFs, so we only need to track
-- 
1.9.1




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Remove inefficient branchless conditional negate optimization
  2015-02-26 18:06 [PATCH] Remove inefficient branchless conditional negate optimization Wilco Dijkstra
@ 2015-02-26 22:48 ` Jeff Law
  2015-02-27  8:19   ` Richard Biener
  2015-03-04 16:18   ` Wilco Dijkstra
  0 siblings, 2 replies; 10+ messages in thread
From: Jeff Law @ 2015-02-26 22:48 UTC (permalink / raw)
  To: Wilco Dijkstra, gcc-patches

On 02/26/15 10:30, Wilco Dijkstra wrote:
> Several GCC versions ago a conditional negate optimization was introduced as a workaround for
> PR45685. However the branchless expansion for conditional negate is extremely inefficient on most
> targets (5 sequentially dependent instructions rather than 2 on AArch64). Since the underlying issue
> has been resolved (the example in PR45685 no longer generates a branch on x64), remove the
> workaround so that conditional negates are treated in exactly the same way as conditional invert,
> add, subtract, and, orr, xor etc. Simple example:
>
> int f(int x) { if (x > 3) x = -x; return x; }
You need to bootstrap and regression test the change before it can be 
approved.

You should turn this little example into a testcase.  It's fine with me 
if this new test is ARM specific.


You should also find a way to change the test gcc.dg/tree-ssa/pr45685.c 
in such a way that it ensures there aren't any undesirable branches.

I've got enough history to know this is fixing a regression of sorts for 
the ARM platform.  So once the issues above are addressed it can go 
forward even without a BZ noting the regression.

jeff

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Remove inefficient branchless conditional negate optimization
  2015-02-26 22:48 ` Jeff Law
@ 2015-02-27  8:19   ` Richard Biener
  2015-02-27 19:53     ` Wilco Dijkstra
  2015-03-04 16:18   ` Wilco Dijkstra
  1 sibling, 1 reply; 10+ messages in thread
From: Richard Biener @ 2015-02-27  8:19 UTC (permalink / raw)
  To: Jeff Law; +Cc: Wilco Dijkstra, GCC Patches

On Thu, Feb 26, 2015 at 11:20 PM, Jeff Law <law@redhat.com> wrote:
> On 02/26/15 10:30, Wilco Dijkstra wrote:
>>
>> Several GCC versions ago a conditional negate optimization was introduced
>> as a workaround for
>> PR45685. However the branchless expansion for conditional negate is
>> extremely inefficient on most
>> targets (5 sequentially dependent instructions rather than 2 on AArch64).
>> Since the underlying issue
>> has been resolved (the example in PR45685 no longer generates a branch on
>> x64), remove the
>> workaround so that conditional negates are treated in exactly the same way
>> as conditional invert,
>> add, subtract, and, orr, xor etc. Simple example:
>>
>> int f(int x) { if (x > 3) x = -x; return x; }
>
> You need to bootstrap and regression test the change before it can be
> approved.

As Jeff added a testcase for the PHI opt transform to happen I'm sure
testing would shown this as fallout.

> You should turn this little example into a testcase.  It's fine with me if
> this new test is ARM specific.
>
>
> You should also find a way to change the test gcc.dg/tree-ssa/pr45685.c in
> such a way that it ensures there aren't any undesirable branches.

I'd be also interested in results of vectorizing a loop with a
conditional negate.
I can very well imagine reverting this patch causing code quality regressions
there.

> I've got enough history to know this is fixing a regression of sorts for the
> ARM platform.  So once the issues above are addressed it can go forward even
> without a BZ noting the regression.

But I'd say this is stage1 material at this point.

Richard.

> jeff
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] Remove inefficient branchless conditional negate optimization
  2015-02-27  8:19   ` Richard Biener
@ 2015-02-27 19:53     ` Wilco Dijkstra
  0 siblings, 0 replies; 10+ messages in thread
From: Wilco Dijkstra @ 2015-02-27 19:53 UTC (permalink / raw)
  To: 'Richard Biener', Jeff Law; +Cc: GCC Patches

> Richard Biener wrote: 
> On Thu, Feb 26, 2015 at 11:20 PM, Jeff Law <law@redhat.com> wrote:
> > On 02/26/15 10:30, Wilco Dijkstra wrote:
> >>
> >> Several GCC versions ago a conditional negate optimization was introduced
> >> as a workaround for
> >> PR45685. However the branchless expansion for conditional negate is
> >> extremely inefficient on most
> >> targets (5 sequentially dependent instructions rather than 2 on AArch64).
> >> Since the underlying issue
> >> has been resolved (the example in PR45685 no longer generates a branch on
> >> x64), remove the
> >> workaround so that conditional negates are treated in exactly the same way
> >> as conditional invert,
> >> add, subtract, and, orr, xor etc. Simple example:
> >>
> >> int f(int x) { if (x > 3) x = -x; return x; }
> >
> > You need to bootstrap and regression test the change before it can be
> > approved.
> 
> As Jeff added a testcase for the PHI opt transform to happen I'm sure
> testing would shown this as fallout.

Yes that's the only test that starts to fail. I've changed it to scan
for cmov/csel instead. Bootstrap is fine for AArch64 and x64 of course.

Should the test be moved somewhere else now it is no longer a tree-ssa test?

> > You should turn this little example into a testcase.  It's fine with me if
> > this new test is ARM specific.
> >
> >
> > You should also find a way to change the test gcc.dg/tree-ssa/pr45685.c in
> > such a way that it ensures there aren't any undesirable branches.
>
> I'd be also interested in results of vectorizing a loop with a
> conditional negate.
> I can very well imagine reverting this patch causing code quality regressions
> there.

Well vectorized code improves in the same way as you'd expect:

void f(int *p, int *q) 
{ 
  int i; 
  for (i = 0; i < 1000; i++) p[i] = (q[i] > 3) ? -q[i] : q[i]; 
}

Before:
.L6:
        vmovdqa (%r9,%rax), %ymm2
        addl    $1, %edx
        vpcmpgtd        %ymm5, %ymm2, %ymm0
        vpand   %ymm4, %ymm0, %ymm1
        vpsubd  %ymm1, %ymm3, %ymm0
        vpxor   %ymm0, %ymm2, %ymm0
        vpaddd  %ymm0, %ymm1, %ymm0
        vmovups %xmm0, (%rcx,%rax)
        vextracti128    $0x1, %ymm0, 16(%rcx,%rax)
        addq    $32, %rax
        cmpl    %r8d, %edx
        jb      .L6

After:
.L6:
        vmovdqa (%r9,%rax), %ymm0
        addl    $1, %edx
        vpcmpgtd        %ymm4, %ymm0, %ymm2
        vpsubd  %ymm0, %ymm3, %ymm1
        vpblendvb       %ymm2, %ymm1, %ymm0, %ymm0
        vmovups %xmm0, (%rcx,%rax)
        vextracti128    $0x1, %ymm0, 16(%rcx,%rax)
        addq    $32, %rax
        cmpl    %r8d, %edx
        jb      .L6

> > I've got enough history to know this is fixing a regression of sorts for the
> > ARM platform.  So once the issues above are addressed it can go forward even
> > without a BZ noting the regression.
> 
> But I'd say this is stage1 material at this point.

I suppose it doesn't matter as it'll have to be backported anyway.

Wilco



^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] Remove inefficient branchless conditional negate optimization
  2015-02-26 22:48 ` Jeff Law
  2015-02-27  8:19   ` Richard Biener
@ 2015-03-04 16:18   ` Wilco Dijkstra
  2015-03-04 17:10     ` Jeff Law
  1 sibling, 1 reply; 10+ messages in thread
From: Wilco Dijkstra @ 2015-03-04 16:18 UTC (permalink / raw)
  To: 'Jeff Law'; +Cc: gcc-patches

> Jeff Law wrote:
> On 02/26/15 10:30, Wilco Dijkstra wrote:
> > Several GCC versions ago a conditional negate optimization was introduced as a workaround
> for
> > PR45685. However the branchless expansion for conditional negate is extremely inefficient on
> most
> > targets (5 sequentially dependent instructions rather than 2 on AArch64). Since the
> underlying issue
> > has been resolved (the example in PR45685 no longer generates a branch on x64), remove the
> > workaround so that conditional negates are treated in exactly the same way as conditional
> invert,
> > add, subtract, and, orr, xor etc. Simple example:
> >
> > int f(int x) { if (x > 3) x = -x; return x; }
> You need to bootstrap and regression test the change before it can be
> approved.
> 
> You should turn this little example into a testcase.  It's fine with me
> if this new test is ARM specific.
> 
> 
> You should also find a way to change the test gcc.dg/tree-ssa/pr45685.c
> in such a way that it ensures there aren't any undesirable branches.
> 
> I've got enough history to know this is fixing a regression of sorts for
> the ARM platform.  So once the issues above are addressed it can go
> forward even without a BZ noting the regression.

I updated the x64 testcase to explicitly check for conditional move (which
was simpler than checking for unnecessary branches). Also add a new testcase 
for AArch64 to ensure we continue to emit csneg. Bootstrapped & regression
tested on AArch64 and x64.

ChangeLog: 
2015-03-04  Wilco Dijkstra  <wdijkstr@arm.com>

        * gcc/tree-ssa-phiopt.c (neg_replacement): Remove.
        (tree_ssa_phiopt_worker): Remove negate optimization.
        * gcc/testsuite/gcc.dg/tree-ssa/pr45685.c: Update test case
        to check for conditional move on x64.
        * gcc/testsuite/gcc.target/aarch64/csneg-1.c (test_csneg_cmp):
        New test.

---
 gcc/testsuite/gcc.dg/tree-ssa/pr45685.c    |   5 +-
 gcc/testsuite/gcc.target/aarch64/csneg-1.c |   8 ++
 gcc/tree-ssa-phiopt.c                      | 156 -----------------------------
 3 files changed, 10 insertions(+), 159 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c b/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
index 0628943..53b2928 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -fdump-tree-phiopt1-details" } */
+/* { dg-options "-O3" } */
 
 typedef unsigned long int uint64_t;
 typedef long int int64_t;
@@ -36,6 +36,5 @@ int summation_helper_2(int64_t* products, uint64_t count)
 	return s;
 }
 
-/* { dg-final { scan-tree-dump-times "converted to straightline code" 2 "phiopt1" } } */
-/* { dg-final { cleanup-tree-dump "phiopt1" } } */
+/* { dg-final { scan-assembler-times "cmov" 4 { target x86_64-*-* } } } */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/csneg-1.c b/gcc/testsuite/gcc.target/aarch64/csneg-1.c
index 08001af..29d4e4e 100644
--- a/gcc/testsuite/gcc.target/aarch64/csneg-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/csneg-1.c
@@ -48,3 +48,11 @@ test_csneg64_condasn2(long long x0,
   x4 = (x0 == x1) ? x3 : -x2;
   return x4;
 }
+
+int test_csneg_cmp(int x)
+{
+  /* { dg-final { scan-assembler "csneg\tw\[0-9\]" } } */
+  if (x > 3)
+    x = -x;
+  return x;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index bad546d..38a59ab 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -96,8 +96,6 @@ static bool minmax_replacement (basic_block, basic_block,
 				edge, edge, gimple, tree, tree);
 static bool abs_replacement (basic_block, basic_block,
 			     edge, edge, gimple, tree, tree);
-static bool neg_replacement (basic_block, basic_block,
-			     edge, edge, gimple, tree, tree);
 static bool cond_store_replacement (basic_block, basic_block, edge, edge,
 				    hash_set<tree> *);
 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
@@ -209,23 +207,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
     /* Calculate the set of non-trapping memory accesses.  */
     nontrap = get_non_trapping ();
 
-  /* The replacement of conditional negation with a non-branching
-     sequence is really only a win when optimizing for speed and we
-     can avoid transformations by gimple if-conversion that result
-     in poor RTL generation.
-
-     Ideally either gimple if-conversion or the RTL expanders will
-     be improved and the code to emit branchless conditional negation
-     can be removed.  */
-  bool replace_conditional_negation = false;
-  if (!do_store_elim)
-    replace_conditional_negation
-      = ((!optimize_size && optimize >= 2)
-	 || (((flag_tree_loop_vectorize || cfun->has_force_vectorize_loops)
-	      && flag_tree_loop_if_convert != 0)
-	     || flag_tree_loop_if_convert == 1
-	     || flag_tree_loop_if_convert_stores == 1));
-
   /* Search every basic block for COND_EXPR we may be able to optimize.
 
      We walk the blocks in order that guarantees that a block with
@@ -380,9 +361,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
 	    cfgchanged = true;
 	  else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
 	    cfgchanged = true;
-	  else if (replace_conditional_negation
-		   && neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
-	    cfgchanged = true;
 	  else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
 	    cfgchanged = true;
 	}
@@ -1294,140 +1272,6 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
   return true;
 }
 
-/*  The function neg_replacement replaces conditional negation with
-    equivalent straight line code.  Returns TRUE if replacement is done,
-    otherwise returns FALSE.
-
-    COND_BB branches around negation occuring in MIDDLE_BB.
-
-    E0 and E1 are edges out of COND_BB.  E0 reaches MIDDLE_BB and
-    E1 reaches the other successor which should contain PHI with
-    arguments ARG0 and ARG1.
-
-    Assuming negation is to occur when the condition is true,
-    then the non-branching sequence is:
-
-       result = (rhs ^ -cond) + cond
-
-    Inverting the condition or its result gives us negation
-    when the original condition is false.  */
-
-static bool
-neg_replacement (basic_block cond_bb, basic_block middle_bb,
-		 edge e0 ATTRIBUTE_UNUSED, edge e1,
-		 gimple phi, tree arg0, tree arg1)
-{
-  gimple new_stmt, cond;
-  gimple_stmt_iterator gsi;
-  gimple assign;
-  edge true_edge, false_edge;
-  tree rhs, lhs;
-  enum tree_code cond_code;
-  bool invert = false;
-
-  /* This transformation performs logical operations on the
-     incoming arguments.  So force them to be integral types.   */
-  if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
-    return false;
-
-  /* OTHER_BLOCK must have only one executable statement which must have the
-     form arg0 = -arg1 or arg1 = -arg0.  */
-
-  assign = last_and_only_stmt (middle_bb);
-  /* If we did not find the proper negation assignment, then we can not
-     optimize.  */
-  if (assign == NULL)
-    return false;
-
-  /* If we got here, then we have found the only executable statement
-     in OTHER_BLOCK.  If it is anything other than arg0 = -arg1 or
-     arg1 = -arg0, then we can not optimize.  */
-  if (gimple_code (assign) != GIMPLE_ASSIGN)
-    return false;
-
-  lhs = gimple_assign_lhs (assign);
-
-  if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
-    return false;
-
-  rhs = gimple_assign_rhs1 (assign);
-
-  /* The assignment has to be arg0 = -arg1 or arg1 = -arg0.  */
-  if (!(lhs == arg0 && rhs == arg1)
-      && !(lhs == arg1 && rhs == arg0))
-    return false;
-
-  /* The basic sequence assumes we negate when the condition is true.
-     If we need the opposite, then we will either need to invert the
-     condition or its result.  */
-  extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
-  invert = false_edge->dest == middle_bb;
-
-  /* Unlike abs_replacement, we can handle arbitrary conditionals here.  */
-  cond = last_stmt (cond_bb);
-  cond_code = gimple_cond_code (cond);
-
-  /* If inversion is needed, first try to invert the test since
-     that's cheapest.  */
-  if (invert)
-    {
-      bool honor_nans = HONOR_NANS (gimple_cond_lhs (cond));
-      enum tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
-
-      /* If invert_tree_comparison was successful, then use its return
-	 value as the new code and note that inversion is no longer
-	 needed.  */
-      if (new_code != ERROR_MARK)
-	{
-	  cond_code = new_code;
-	  invert = false;
-	}
-    }
-
-  tree cond_val = make_ssa_name (boolean_type_node);
-  new_stmt = gimple_build_assign (cond_val, cond_code,
-				  gimple_cond_lhs (cond),
-				  gimple_cond_rhs (cond));
-  gsi = gsi_last_bb (cond_bb);
-  gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
-
-  /* If we still need inversion, then invert the result of the
-     condition.  */
-  if (invert)
-    {
-      tree tmp = make_ssa_name (boolean_type_node);
-      new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, cond_val,
-				      boolean_true_node);
-      gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-      cond_val = tmp;
-    }
-
-  /* Get the condition in the right type so that we can perform
-     logical and arithmetic operations on it.  */
-  tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
-  new_stmt = gimple_build_assign (cond_val_converted, NOP_EXPR, cond_val);
-  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
-  tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
-  new_stmt = gimple_build_assign (neg_cond_val_converted, NEGATE_EXPR,
-				  cond_val_converted);
-  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
-  tree tmp = make_ssa_name (TREE_TYPE (rhs));
-  new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, rhs,
-				  neg_cond_val_converted);
-  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
-  tree new_lhs = make_ssa_name (TREE_TYPE (rhs));
-  new_stmt = gimple_build_assign (new_lhs, PLUS_EXPR, tmp, cond_val_converted);
-  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
-  replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs);
-
-  /* Note that we optimized this PHI.  */
-  return true;
-}
-
 /* Auxiliary functions to determine the set of memory accesses which
    can't trap because they are preceded by accesses to the same memory
    portion.  We do that for MEM_REFs, so we only need to track
-- 
1.9.1




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Remove inefficient branchless conditional negate optimization
  2015-03-04 16:18   ` Wilco Dijkstra
@ 2015-03-04 17:10     ` Jeff Law
  2015-03-04 17:27       ` H.J. Lu
  2015-03-06 16:12       ` Wilco Dijkstra
  0 siblings, 2 replies; 10+ messages in thread
From: Jeff Law @ 2015-03-04 17:10 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: gcc-patches

On 03/04/15 09:18, Wilco Dijkstra wrote:
>> Jeff Law wrote:
>> On 02/26/15 10:30, Wilco Dijkstra wrote:
>>> Several GCC versions ago a conditional negate optimization was introduced as a workaround
>> for
>>> PR45685. However the branchless expansion for conditional negate is extremely inefficient on
>> most
>>> targets (5 sequentially dependent instructions rather than 2 on AArch64). Since the
>> underlying issue
>>> has been resolved (the example in PR45685 no longer generates a branch on x64), remove the
>>> workaround so that conditional negates are treated in exactly the same way as conditional
>> invert,
>>> add, subtract, and, orr, xor etc. Simple example:
>>>
>>> int f(int x) { if (x > 3) x = -x; return x; }
>> You need to bootstrap and regression test the change before it can be
>> approved.
>>
>> You should turn this little example into a testcase.  It's fine with me
>> if this new test is ARM specific.
>>
>>
>> You should also find a way to change the test gcc.dg/tree-ssa/pr45685.c
>> in such a way that it ensures there aren't any undesirable branches.
>>
>> I've got enough history to know this is fixing a regression of sorts for
>> the ARM platform.  So once the issues above are addressed it can go
>> forward even without a BZ noting the regression.
>
> I updated the x64 testcase to explicitly check for conditional move (which
> was simpler than checking for unnecessary branches). Also add a new testcase
> for AArch64 to ensure we continue to emit csneg. Bootstrapped & regression
> tested on AArch64 and x64.
>
> ChangeLog:
> 2015-03-04  Wilco Dijkstra  <wdijkstr@arm.com>
>
>          * gcc/tree-ssa-phiopt.c (neg_replacement): Remove.
>          (tree_ssa_phiopt_worker): Remove negate optimization.
>          * gcc/testsuite/gcc.dg/tree-ssa/pr45685.c: Update test case
>          to check for conditional move on x64.
>          * gcc/testsuite/gcc.target/aarch64/csneg-1.c (test_csneg_cmp):
>          New test.
Can you move pr45685.c into gcc.target/i386?

I know Richi said next stage1, but given this fixes a performance 
regression for ARM and it's reverting rather than adding new code, I 
think this is OK for the trunk with the testcase moved.

So, OK with the testcase moved into gcc.target/i386/

jeff

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Remove inefficient branchless conditional negate optimization
  2015-03-04 17:10     ` Jeff Law
@ 2015-03-04 17:27       ` H.J. Lu
  2015-03-06 16:12       ` Wilco Dijkstra
  1 sibling, 0 replies; 10+ messages in thread
From: H.J. Lu @ 2015-03-04 17:27 UTC (permalink / raw)
  To: Jeff Law; +Cc: Wilco Dijkstra, GCC Patches

On Wed, Mar 4, 2015 at 9:10 AM, Jeff Law <law@redhat.com> wrote:
> On 03/04/15 09:18, Wilco Dijkstra wrote:
>>>
>>> Jeff Law wrote:
>>> On 02/26/15 10:30, Wilco Dijkstra wrote:
>>>>
>>>> Several GCC versions ago a conditional negate optimization was
>>>> introduced as a workaround
>>>
>>> for
>>>>
>>>> PR45685. However the branchless expansion for conditional negate is
>>>> extremely inefficient on
>>>
>>> most
>>>>
>>>> targets (5 sequentially dependent instructions rather than 2 on
>>>> AArch64). Since the
>>>
>>> underlying issue
>>>>
>>>> has been resolved (the example in PR45685 no longer generates a branch
>>>> on x64), remove the
>>>> workaround so that conditional negates are treated in exactly the same
>>>> way as conditional
>>>
>>> invert,
>>>>
>>>> add, subtract, and, orr, xor etc. Simple example:
>>>>
>>>> int f(int x) { if (x > 3) x = -x; return x; }
>>>
>>> You need to bootstrap and regression test the change before it can be
>>> approved.
>>>
>>> You should turn this little example into a testcase.  It's fine with me
>>> if this new test is ARM specific.
>>>
>>>
>>> You should also find a way to change the test gcc.dg/tree-ssa/pr45685.c
>>> in such a way that it ensures there aren't any undesirable branches.
>>>
>>> I've got enough history to know this is fixing a regression of sorts for
>>> the ARM platform.  So once the issues above are addressed it can go
>>> forward even without a BZ noting the regression.
>>
>>
>> I updated the x64 testcase to explicitly check for conditional move (which
>> was simpler than checking for unnecessary branches). Also add a new
>> testcase
>> for AArch64 to ensure we continue to emit csneg. Bootstrapped & regression
>> tested on AArch64 and x64.
>>
>> ChangeLog:
>> 2015-03-04  Wilco Dijkstra  <wdijkstr@arm.com>
>>
>>          * gcc/tree-ssa-phiopt.c (neg_replacement): Remove.
>>          (tree_ssa_phiopt_worker): Remove negate optimization.
>>          * gcc/testsuite/gcc.dg/tree-ssa/pr45685.c: Update test case
>>          to check for conditional move on x64.
>>          * gcc/testsuite/gcc.target/aarch64/csneg-1.c (test_csneg_cmp):
>>          New test.
>
> Can you move pr45685.c into gcc.target/i386?
>
> I know Richi said next stage1, but given this fixes a performance regression
> for ARM and it's reverting rather than adding new code, I think this is OK
> for the trunk with the testcase moved.
>
> So, OK with the testcase moved into gcc.target/i386/
>

Condition on pr45685.c should be target { ! ia32 }, not
target x86_64-*-*.

-- 
H.J.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] Remove inefficient branchless conditional negate optimization
  2015-03-04 17:10     ` Jeff Law
  2015-03-04 17:27       ` H.J. Lu
@ 2015-03-06 16:12       ` Wilco Dijkstra
  2015-03-06 16:37         ` Jiong Wang
  1 sibling, 1 reply; 10+ messages in thread
From: Wilco Dijkstra @ 2015-03-06 16:12 UTC (permalink / raw)
  To: Jiong Wang; +Cc: gcc-patches, 'Jeff Law'

[-- Attachment #1: Type: text/plain, Size: 873 bytes --]

> Jeff Law wrote:
> Can you move pr45685.c into gcc.target/i386?
> 
> I know Richi said next stage1, but given this fixes a performance
> regression for ARM and it's reverting rather than adding new code, I
> think this is OK for the trunk with the testcase moved.
> 
> So, OK with the testcase moved into gcc.target/i386/

I've moved it and changed the compile condition:

/* { dg-do compile { target { ! { ia32 } } } } */

Jiong, can you commit this please?

Wilco

2015-03-06  Wilco Dijkstra  <wdijkstr@arm.com>

	* gcc/tree-ssa-phiopt.c (neg_replacement): Remove.
	(tree_ssa_phiopt_worker): Remove negate optimization.
	* gcc/testsuite/gcc.dg/tree-ssa/pr45685.c: Move to gcc.target/i386.
	* gcc/testsuite/gcc.target/aarch64/csneg-1.c (test_csneg_cmp):
	New test.
	* gcc/gcc.target/i386/pr45685.c: Moved test, check for conditional
	move on x64.

[-- Attachment #2: Remove-negate-optimization.txt --]
[-- Type: text/plain, Size: 10385 bytes --]

---
 gcc/testsuite/gcc.dg/tree-ssa/pr45685.c    |  41 --------
 gcc/testsuite/gcc.target/aarch64/csneg-1.c |   8 ++
 gcc/testsuite/gcc.target/i386/pr45685.c    |  39 ++++++++
 gcc/tree-ssa-phiopt.c                      | 156 -----------------------------
 4 files changed, 47 insertions(+), 197 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr45685.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c b/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
deleted file mode 100644
index 0628943..0000000
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O3 -fdump-tree-phiopt1-details" } */
-
-typedef unsigned long int uint64_t;
-typedef long int int64_t;
-int summation_helper_1(int64_t* products, uint64_t count)
-{
-	int s = 0;
-	uint64_t i;
-	for(i=0; i<count; i++)
-	{	
-		int64_t val = (products[i]>0) ? 1 : -1;
-		products[i] *= val;
-		if(products[i] != i)
-			val = -val;
-		products[i] = val;
-		s += val;
-	}
-	return s;
-}
-
-
-int summation_helper_2(int64_t* products, uint64_t count)
-{
-	int s = 0;
-	uint64_t i;
-	for(i=0; i<count; i++)
-	{	
-		int val = (products[i]>0) ? 1 : -1;
-		products[i] *= val;
-		if(products[i] != i)
-			val = -val;
-		products[i] = val;
-		s += val;
-	}
-	return s;
-}
-
-/* { dg-final { scan-tree-dump-times "converted to straightline code" 2 "phiopt1" } } */
-/* { dg-final { cleanup-tree-dump "phiopt1" } } */
-
diff --git a/gcc/testsuite/gcc.target/aarch64/csneg-1.c b/gcc/testsuite/gcc.target/aarch64/csneg-1.c
index 08001af..29d4e4e 100644
--- a/gcc/testsuite/gcc.target/aarch64/csneg-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/csneg-1.c
@@ -48,3 +48,11 @@ test_csneg64_condasn2(long long x0,
   x4 = (x0 == x1) ? x3 : -x2;
   return x4;
 }
+
+int test_csneg_cmp(int x)
+{
+  /* { dg-final { scan-assembler "csneg\tw\[0-9\]" } } */
+  if (x > 3)
+    x = -x;
+  return x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr45685.c b/gcc/testsuite/gcc.target/i386/pr45685.c
new file mode 100644
index 0000000..7f50bb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr45685.c
@@ -0,0 +1,39 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O3" } */
+
+typedef unsigned long int uint64_t;
+typedef long int int64_t;
+int summation_helper_1(int64_t* products, uint64_t count)
+{
+	int s = 0;
+	uint64_t i;
+	for(i=0; i<count; i++)
+	{	
+		int64_t val = (products[i]>0) ? 1 : -1;
+		products[i] *= val;
+		if(products[i] != i)
+			val = -val;
+		products[i] = val;
+		s += val;
+	}
+	return s;
+}
+
+
+int summation_helper_2(int64_t* products, uint64_t count)
+{
+	int s = 0;
+	uint64_t i;
+	for(i=0; i<count; i++)
+	{	
+		int val = (products[i]>0) ? 1 : -1;
+		products[i] *= val;
+		if(products[i] != i)
+			val = -val;
+		products[i] = val;
+		s += val;
+	}
+	return s;
+}
+
+/* { dg-final { scan-assembler-times "cmov" 4 } } */
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index c7fb073..14a7122 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -96,8 +96,6 @@ static bool minmax_replacement (basic_block, basic_block,
 				edge, edge, gimple, tree, tree);
 static bool abs_replacement (basic_block, basic_block,
 			     edge, edge, gimple, tree, tree);
-static bool neg_replacement (basic_block, basic_block,
-			     edge, edge, gimple, tree, tree);
 static bool cond_store_replacement (basic_block, basic_block, edge, edge,
 				    hash_set<tree> *);
 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
@@ -209,23 +207,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
     /* Calculate the set of non-trapping memory accesses.  */
     nontrap = get_non_trapping ();
 
-  /* The replacement of conditional negation with a non-branching
-     sequence is really only a win when optimizing for speed and we
-     can avoid transformations by gimple if-conversion that result
-     in poor RTL generation.
-
-     Ideally either gimple if-conversion or the RTL expanders will
-     be improved and the code to emit branchless conditional negation
-     can be removed.  */
-  bool replace_conditional_negation = false;
-  if (!do_store_elim)
-    replace_conditional_negation
-      = ((!optimize_size && optimize >= 2)
-	 || (((flag_tree_loop_vectorize || cfun->has_force_vectorize_loops)
-	      && flag_tree_loop_if_convert != 0)
-	     || flag_tree_loop_if_convert == 1
-	     || flag_tree_loop_if_convert_stores == 1));
-
   /* Search every basic block for COND_EXPR we may be able to optimize.
 
      We walk the blocks in order that guarantees that a block with
@@ -380,9 +361,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
 	    cfgchanged = true;
 	  else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
 	    cfgchanged = true;
-	  else if (replace_conditional_negation
-		   && neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
-	    cfgchanged = true;
 	  else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
 	    cfgchanged = true;
 	}
@@ -1319,140 +1297,6 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
   return true;
 }
 
-/*  The function neg_replacement replaces conditional negation with
-    equivalent straight line code.  Returns TRUE if replacement is done,
-    otherwise returns FALSE.
-
-    COND_BB branches around negation occuring in MIDDLE_BB.
-
-    E0 and E1 are edges out of COND_BB.  E0 reaches MIDDLE_BB and
-    E1 reaches the other successor which should contain PHI with
-    arguments ARG0 and ARG1.
-
-    Assuming negation is to occur when the condition is true,
-    then the non-branching sequence is:
-
-       result = (rhs ^ -cond) + cond
-
-    Inverting the condition or its result gives us negation
-    when the original condition is false.  */
-
-static bool
-neg_replacement (basic_block cond_bb, basic_block middle_bb,
-		 edge e0 ATTRIBUTE_UNUSED, edge e1,
-		 gimple phi, tree arg0, tree arg1)
-{
-  gimple new_stmt, cond;
-  gimple_stmt_iterator gsi;
-  gimple assign;
-  edge true_edge, false_edge;
-  tree rhs, lhs;
-  enum tree_code cond_code;
-  bool invert = false;
-
-  /* This transformation performs logical operations on the
-     incoming arguments.  So force them to be integral types.   */
-  if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
-    return false;
-
-  /* OTHER_BLOCK must have only one executable statement which must have the
-     form arg0 = -arg1 or arg1 = -arg0.  */
-
-  assign = last_and_only_stmt (middle_bb);
-  /* If we did not find the proper negation assignment, then we can not
-     optimize.  */
-  if (assign == NULL)
-    return false;
-
-  /* If we got here, then we have found the only executable statement
-     in OTHER_BLOCK.  If it is anything other than arg0 = -arg1 or
-     arg1 = -arg0, then we can not optimize.  */
-  if (gimple_code (assign) != GIMPLE_ASSIGN)
-    return false;
-
-  lhs = gimple_assign_lhs (assign);
-
-  if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
-    return false;
-
-  rhs = gimple_assign_rhs1 (assign);
-
-  /* The assignment has to be arg0 = -arg1 or arg1 = -arg0.  */
-  if (!(lhs == arg0 && rhs == arg1)
-      && !(lhs == arg1 && rhs == arg0))
-    return false;
-
-  /* The basic sequence assumes we negate when the condition is true.
-     If we need the opposite, then we will either need to invert the
-     condition or its result.  */
-  extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
-  invert = false_edge->dest == middle_bb;
-
-  /* Unlike abs_replacement, we can handle arbitrary conditionals here.  */
-  cond = last_stmt (cond_bb);
-  cond_code = gimple_cond_code (cond);
-
-  /* If inversion is needed, first try to invert the test since
-     that's cheapest.  */
-  if (invert)
-    {
-      bool honor_nans = HONOR_NANS (gimple_cond_lhs (cond));
-      enum tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
-
-      /* If invert_tree_comparison was successful, then use its return
-	 value as the new code and note that inversion is no longer
-	 needed.  */
-      if (new_code != ERROR_MARK)
-	{
-	  cond_code = new_code;
-	  invert = false;
-	}
-    }
-
-  tree cond_val = make_ssa_name (boolean_type_node);
-  new_stmt = gimple_build_assign (cond_val, cond_code,
-				  gimple_cond_lhs (cond),
-				  gimple_cond_rhs (cond));
-  gsi = gsi_last_bb (cond_bb);
-  gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
-
-  /* If we still need inversion, then invert the result of the
-     condition.  */
-  if (invert)
-    {
-      tree tmp = make_ssa_name (boolean_type_node);
-      new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, cond_val,
-				      boolean_true_node);
-      gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-      cond_val = tmp;
-    }
-
-  /* Get the condition in the right type so that we can perform
-     logical and arithmetic operations on it.  */
-  tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
-  new_stmt = gimple_build_assign (cond_val_converted, NOP_EXPR, cond_val);
-  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
-  tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
-  new_stmt = gimple_build_assign (neg_cond_val_converted, NEGATE_EXPR,
-				  cond_val_converted);
-  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
-  tree tmp = make_ssa_name (TREE_TYPE (rhs));
-  new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, rhs,
-				  neg_cond_val_converted);
-  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
-  tree new_lhs = make_ssa_name (TREE_TYPE (rhs));
-  new_stmt = gimple_build_assign (new_lhs, PLUS_EXPR, tmp, cond_val_converted);
-  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
-  replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs);
-
-  /* Note that we optimized this PHI.  */
-  return true;
-}
-
 /* Auxiliary functions to determine the set of memory accesses which
    can't trap because they are preceded by accesses to the same memory
    portion.  We do that for MEM_REFs, so we only need to track
-- 
1.9.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Remove inefficient branchless conditional negate optimization
  2015-03-06 16:12       ` Wilco Dijkstra
@ 2015-03-06 16:37         ` Jiong Wang
  2015-03-07 13:34           ` H.J. Lu
  0 siblings, 1 reply; 10+ messages in thread
From: Jiong Wang @ 2015-03-06 16:37 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: gcc-patches, 'Jeff Law'


Wilco Dijkstra writes:

>> Jeff Law wrote:
>> Can you move pr45685.c into gcc.target/i386?
>> 
>> I know Richi said next stage1, but given this fixes a performance
>> regression for ARM and it's reverting rather than adding new code, I
>> think this is OK for the trunk with the testcase moved.
>> 
>> So, OK with the testcase moved into gcc.target/i386/
>
> I've moved it and changed the compile condition:
>
> /* { dg-do compile { target { ! { ia32 } } } } */
>
> Jiong, can you commit this please?

Done as 221246.

>
> Wilco
>
> 2015-03-06  Wilco Dijkstra  <wdijkstr@arm.com>
>
> 	* gcc/tree-ssa-phiopt.c (neg_replacement): Remove.
> 	(tree_ssa_phiopt_worker): Remove negate optimization.
> 	* gcc/testsuite/gcc.dg/tree-ssa/pr45685.c: Move to gcc.target/i386.
> 	* gcc/testsuite/gcc.target/aarch64/csneg-1.c (test_csneg_cmp):
> 	New test.
> 	* gcc/gcc.target/i386/pr45685.c: Moved test, check for conditional
> 	move on x64.

--
Regards,
Jiong

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Remove inefficient branchless conditional negate optimization
  2015-03-06 16:37         ` Jiong Wang
@ 2015-03-07 13:34           ` H.J. Lu
  0 siblings, 0 replies; 10+ messages in thread
From: H.J. Lu @ 2015-03-07 13:34 UTC (permalink / raw)
  To: Jiong Wang; +Cc: Wilco Dijkstra, gcc-patches, Jeff Law

On Fri, Mar 6, 2015 at 8:36 AM, Jiong Wang <jiong.wang@arm.com> wrote:
>
> Wilco Dijkstra writes:
>
>>> Jeff Law wrote:
>>> Can you move pr45685.c into gcc.target/i386?
>>>
>>> I know Richi said next stage1, but given this fixes a performance
>>> regression for ARM and it's reverting rather than adding new code, I
>>> think this is OK for the trunk with the testcase moved.
>>>
>>> So, OK with the testcase moved into gcc.target/i386/
>>
>> I've moved it and changed the compile condition:
>>
>> /* { dg-do compile { target { ! { ia32 } } } } */
>>
>> Jiong, can you commit this please?
>
> Done as 221246.
>
>>
>> Wilco
>>
>> 2015-03-06  Wilco Dijkstra  <wdijkstr@arm.com>
>>
>>       * gcc/tree-ssa-phiopt.c (neg_replacement): Remove.
>>       (tree_ssa_phiopt_worker): Remove negate optimization.
>>       * gcc/testsuite/gcc.dg/tree-ssa/pr45685.c: Move to gcc.target/i386.
>>       * gcc/testsuite/gcc.target/aarch64/csneg-1.c (test_csneg_cmp):
>>       New test.
>>       * gcc/gcc.target/i386/pr45685.c: Moved test, check for conditional
>>       move on x64.
>

I checked in this as an obvious fix to make it pass on x32.


-- 
H.J.
---
Index: ChangeLog
===================================================================
--- ChangeLog (revision 221254)
+++ ChangeLog (working copy)
@@ -1,3 +1,9 @@
+2015-03-07  H.J. Lu  <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/pr45685.c (uint64_t): Replace long with long
+ long.
+ (int64_t): Likewise.
+
 2015-03-07  Marek Polacek  <polacek@redhat.com>
     Martin Uecker  <uecker@eecs.berkeley.edu>

Index: gcc.target/i386/pr45685.c
===================================================================
--- gcc.target/i386/pr45685.c (revision 221254)
+++ gcc.target/i386/pr45685.c (working copy)
@@ -1,8 +1,8 @@
 /* { dg-do compile { target { ! { ia32 } } } } */
 /* { dg-options "-O3" } */

-typedef unsigned long int uint64_t;
-typedef long int int64_t;
+typedef unsigned long long int uint64_t;
+typedef long long int int64_t;
 int summation_helper_1(int64_t* products, uint64_t count)
 {
  int s = 0;

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-03-07 13:34 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-26 18:06 [PATCH] Remove inefficient branchless conditional negate optimization Wilco Dijkstra
2015-02-26 22:48 ` Jeff Law
2015-02-27  8:19   ` Richard Biener
2015-02-27 19:53     ` Wilco Dijkstra
2015-03-04 16:18   ` Wilco Dijkstra
2015-03-04 17:10     ` Jeff Law
2015-03-04 17:27       ` H.J. Lu
2015-03-06 16:12       ` Wilco Dijkstra
2015-03-06 16:37         ` Jiong Wang
2015-03-07 13:34           ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).