public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
@ 2015-11-13 16:03 Wilco Dijkstra
  2016-01-22 14:45 ` Andreas Schwab
  2016-01-23 10:40 ` Andreas Schwab
  0 siblings, 2 replies; 12+ messages in thread
From: Wilco Dijkstra @ 2015-11-13 16:03 UTC (permalink / raw)
  To: gcc-patches

This patch adds CCMP selection based on rtx costs. This is based on Jiong's
already approved patch
https://gcc.gnu.org/ml/gcc-patches/2015-09/msg01434.html with some minor
refactoring and the tests updated.

OK for commit?

ChangeLog:
2015-11-13  Jiong Wang  <jiong.wang@arm.com>

gcc/
	* ccmp.c (expand_ccmp_expr_1): Cost the instruction sequences
	generated from different expand order.
  
gcc/testsuite/
	* gcc.target/aarch64/ccmp_1.c: Update test.

---
 gcc/ccmp.c                                | 47
+++++++++++++++++++++++++++----
 gcc/testsuite/gcc.target/aarch64/ccmp_1.c | 15 ++++++++--
 2 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/gcc/ccmp.c b/gcc/ccmp.c
index cbdbd6d..95a41a6 100644
--- a/gcc/ccmp.c
+++ b/gcc/ccmp.c
@@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-outof-ssa.h"
 #include "cfgexpand.h"
 #include "ccmp.h"
+#include "predict.h"
 
 /* The following functions expand conditional compare (CCMP) instructions.
    Here is a short description about the over all algorithm:
@@ -159,6 +160,8 @@ expand_ccmp_next (gimple *g, enum tree_code code, rtx
prev,
 static rtx
 expand_ccmp_expr_1 (gimple *g, rtx *prep_seq, rtx *gen_seq)
 {
+  rtx prep_seq_1, gen_seq_1;
+  rtx prep_seq_2, gen_seq_2;
   tree exp = gimple_assign_rhs_to_tree (g);
   enum tree_code code = TREE_CODE (exp);
   gimple *gs0 = get_gimple_for_ssa_name (TREE_OPERAND (exp, 0));
@@ -174,19 +177,53 @@ expand_ccmp_expr_1 (gimple *g, rtx *prep_seq, rtx
*gen_seq)
     {
       if (TREE_CODE_CLASS (code1) == tcc_comparison)
 	{
-	  int unsignedp0;
-	  enum rtx_code rcode0;
+	  int unsignedp0, unsignedp1;
+	  enum rtx_code rcode0, rcode1;
+	  int speed_p = optimize_insn_for_speed_p ();
+	  rtx tmp2, ret, ret2;
+	  unsigned cost1 = MAX_COST;
+	  unsigned cost2 = MAX_COST;
 
 	  unsignedp0 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs0)));
+	  unsignedp1 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs1)));
 	  rcode0 = get_rtx_code (code0, unsignedp0);
+	  rcode1 = get_rtx_code (code1, unsignedp1);
 
-	  tmp = targetm.gen_ccmp_first (prep_seq, gen_seq, rcode0,
+	  tmp = targetm.gen_ccmp_first (&prep_seq_1, &gen_seq_1, rcode0,
 					gimple_assign_rhs1 (gs0),
 					gimple_assign_rhs2 (gs0));
-	  if (!tmp)
+
+	  tmp2 = targetm.gen_ccmp_first (&prep_seq_2, &gen_seq_2, rcode1,
+					 gimple_assign_rhs1 (gs1),
+					 gimple_assign_rhs2 (gs1));
+
+	  if (!tmp && !tmp2)
 	    return NULL_RTX;
 
-	  return expand_ccmp_next (gs1, code, tmp, prep_seq, gen_seq);
+	  if (tmp != NULL)
+	    {
+	      ret = expand_ccmp_next (gs1, code, tmp, &prep_seq_1,
&gen_seq_1);
+	      cost1 = seq_cost (safe_as_a <rtx_insn *> (prep_seq_1),
speed_p);
+	      cost1 += seq_cost (safe_as_a <rtx_insn *> (gen_seq_1),
speed_p);
+	    }
+	  if (tmp2 != NULL)
+	    {
+	      ret2 = expand_ccmp_next (gs0, code, tmp2, &prep_seq_2,
+				       &gen_seq_2);
+	      cost2 = seq_cost (safe_as_a <rtx_insn *> (prep_seq_2),
speed_p);
+	      cost2 += seq_cost (safe_as_a <rtx_insn *> (gen_seq_2),
speed_p);
+	    }
+
+	  if (cost2 < cost1)
+	    {
+	      *prep_seq = prep_seq_2;
+	      *gen_seq = gen_seq_2;
+	      return ret2;
+	    }
+
+	  *prep_seq = prep_seq_1;
+	  *gen_seq = gen_seq_1;
+	  return ret;
 	}
       else
 	{
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
index ef077e0..7c39b61 100644
--- a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
@@ -80,5 +80,16 @@ f13 (int a, int b)
   return a == 3 || a == 0;
 }
 
-/* { dg-final { scan-assembler "fccmp\t" } } */
-/* { dg-final { scan-assembler "fccmpe\t" } } */
+/* { dg-final { scan-assembler "cmp\t(.)+32" } } */
+/* { dg-final { scan-assembler "cmp\t(.)+33" } } */
+/* { dg-final { scan-assembler "cmp\t(.)+34" } } */
+/* { dg-final { scan-assembler "cmp\t(.)+35" } } */
+
+/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, 0" 4 } } */
+/* { dg-final { scan-assembler-times "fcmpe\t(.)+0\\.0" 2 } } */
+/* { dg-final { scan-assembler-times "fcmp\t(.)+0\\.0" 2 } } */
+
+/* { dg-final { scan-assembler "adds\t" } } */
+/* { dg-final { scan-assembler-times "\tccmp\t" 11 } } */
+/* { dg-final { scan-assembler-times "fccmp\t.*0\\.0" 1 } } */
+/* { dg-final { scan-assembler-times "fccmpe\t.*0\\.0" 1 } } */
-- 
1.9.1



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
  2015-11-13 16:03 [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order Wilco Dijkstra
@ 2016-01-22 14:45 ` Andreas Schwab
  2016-01-23 10:40 ` Andreas Schwab
  1 sibling, 0 replies; 12+ messages in thread
From: Andreas Schwab @ 2016-01-22 14:45 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: gcc-patches

This breaks bootstrap on aarch64 by miscompiling the stage2 compiler.

../../../libgomp/priority_queue.h:422:11: internal compiler error: RTL flag check: MEM_VOLATILE_P used with unexpected rtx code 'mem' in set_mem_attributes_minus_bitpos, at emit-rtl.c:1833
   if (list->tasks == node)
       ~~~~^~~~~~~

0x5c3077 ???
	../sysdeps/aarch64/start.S:81

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
  2015-11-13 16:03 [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order Wilco Dijkstra
  2016-01-22 14:45 ` Andreas Schwab
@ 2016-01-23 10:40 ` Andreas Schwab
  2016-01-25 20:09   ` Wilco Dijkstra
  1 sibling, 1 reply; 12+ messages in thread
From: Andreas Schwab @ 2016-01-23 10:40 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: gcc-patches

"Wilco Dijkstra" <Wilco.Dijkstra@arm.com> writes:

> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
> b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
> index ef077e0..7c39b61 100644
> --- a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
> @@ -80,5 +80,16 @@ f13 (int a, int b)
>    return a == 3 || a == 0;
>  }
>  
> -/* { dg-final { scan-assembler "fccmp\t" } } */
> -/* { dg-final { scan-assembler "fccmpe\t" } } */
> +/* { dg-final { scan-assembler "cmp\t(.)+32" } } */
> +/* { dg-final { scan-assembler "cmp\t(.)+33" } } */
> +/* { dg-final { scan-assembler "cmp\t(.)+34" } } */
> +/* { dg-final { scan-assembler "cmp\t(.)+35" } } */
> +
> +/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, 0" 4 } } */
> +/* { dg-final { scan-assembler-times "fcmpe\t(.)+0\\.0" 2 } } */
> +/* { dg-final { scan-assembler-times "fcmp\t(.)+0\\.0" 2 } } */
> +
> +/* { dg-final { scan-assembler "adds\t" } } */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 11 } } */
> +/* { dg-final { scan-assembler-times "fccmp\t.*0\\.0" 1 } } */
> +/* { dg-final { scan-assembler-times "fccmpe\t.*0\\.0" 1 } } */

FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler-times \tcmp\tw[0-9]+, 0 4
FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler adds\t
FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler-times fccmpe\t.*0\\.0 1

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
  2016-01-23 10:40 ` Andreas Schwab
@ 2016-01-25 20:09   ` Wilco Dijkstra
  2016-01-25 20:45     ` Richard Henderson
  2016-01-28 14:33     ` James Greenhalgh
  0 siblings, 2 replies; 12+ messages in thread
From: Wilco Dijkstra @ 2016-01-25 20:09 UTC (permalink / raw)
  To: Andreas Schwab, Richard Henderson; +Cc: gcc-patches, Christophe Lyon, nd

Andreas Schwab <schwab@linux-m68k.org> wrote:

> FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler-times \tcmp\tw[0-9]+, 0 4
> FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler adds\t
> FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler-times fccmpe\t.*0\\.0 1

Yes I noticed those too, and here is the fix. Richard's recent change added UNSPEC to the CCMP patterns to stop combine optimizing the CCMP CCmode immediate in a rare case. This requires a change to the CCMP cost calculation as the CCMP instruction with unspec is no longer recognized.

Fix the ccmp_1.c test to allow both '0' and 'wzr' on cmp - BTW is there a regular expression that correctly implements (0|xzr)? If I use that the test still fails somehow but \[0wzr\]+ works fine... Is the correct syntax documented somewhere?

Finally to ensure FCCMPE is emitted on relational compares, add -ffinite-math-only.

ChangeLog:
2016-01-25  Wilco Dijkstra  <wdijkstr@arm.com>

gcc/
	* config/aarch64/aarch64.c (aarch64_if_then_else_costs):
	Remove CONST_INT_P check in CCMP cost calculation.

gcc/testsuite/
	* gcc.target/aarch64/ccmp_1.c: Fix test issues.

---
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 6c570c7db1cfbd0415e73fb110ce5d70aa09b540..7f304b78a3e48862bf5aaf855e307fe90969dd8c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -6014,7 +6014,7 @@ aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
   else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
     {
       /* CCMP.  */
-      if ((GET_CODE (op1) == COMPARE) && CONST_INT_P (op2))
+      if (GET_CODE (op1) == COMPARE)
 	{
 	  /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0.  */
 	  if (XEXP (op1, 1) == const0_rtx)
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
index 7c39b61a585a1d4d662b0736e1c80e06bdc6b4ce..8e3f8629f802eec64c95080a23f320712333471b 100644
--- a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -ffinite-math-only" } */
 
 int
 f1 (int a)
@@ -85,7 +85,7 @@ f13 (int a, int b)
 /* { dg-final { scan-assembler "cmp\t(.)+34" } } */
 /* { dg-final { scan-assembler "cmp\t(.)+35" } } */
 
-/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, 0" 4 } } */
+/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, \[0wzr\]+" 4 } } */
 /* { dg-final { scan-assembler-times "fcmpe\t(.)+0\\.0" 2 } } */
 /* { dg-final { scan-assembler-times "fcmp\t(.)+0\\.0" 2 } } */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
  2016-01-25 20:09   ` Wilco Dijkstra
@ 2016-01-25 20:45     ` Richard Henderson
  2016-01-28 14:33     ` James Greenhalgh
  1 sibling, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2016-01-25 20:45 UTC (permalink / raw)
  To: Wilco Dijkstra, Andreas Schwab; +Cc: gcc-patches, Christophe Lyon, nd

On 01/25/2016 12:09 PM, Wilco Dijkstra wrote:
> BTW is there a regular expression that correctly implements (0|xzr)? 

(0|wzr) works fine for me; I've got exactly that fix in one of my trees.


r~

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
  2016-01-25 20:09   ` Wilco Dijkstra
  2016-01-25 20:45     ` Richard Henderson
@ 2016-01-28 14:33     ` James Greenhalgh
  2016-02-03  9:58       ` James Greenhalgh
  1 sibling, 1 reply; 12+ messages in thread
From: James Greenhalgh @ 2016-01-28 14:33 UTC (permalink / raw)
  To: Wilco Dijkstra
  Cc: Andreas Schwab, Richard Henderson, gcc-patches, Christophe Lyon, nd

On Mon, Jan 25, 2016 at 08:09:39PM +0000, Wilco Dijkstra wrote:
> Andreas Schwab <schwab@linux-m68k.org> wrote:
> 
> > FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler-times \tcmp\tw[0-9]+, 0 4
> > FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler adds\t
> > FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler-times fccmpe\t.*0\\.0 1
> 
> Yes I noticed those too, and here is the fix. Richard's recent change added
> UNSPEC to the CCMP patterns to stop combine optimizing the CCMP CCmode
> immediate in a rare case. This requires a change to the CCMP cost calculation
> as the CCMP instruction with unspec is no longer recognized.
> 
> Fix the ccmp_1.c test to allow both '0' and 'wzr' on cmp - BTW is there a
> regular expression that correctly implements (0|xzr)? If I use that the test
> still fails somehow but \[0wzr\]+ works fine... Is the correct syntax
> documented somewhere?
> 
> Finally to ensure FCCMPE is emitted on relational compares, add
> -ffinite-math-only.

OK.

Thanks,
James

> 
> ChangeLog:
> 2016-01-25  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> gcc/
> 	* config/aarch64/aarch64.c (aarch64_if_then_else_costs):
> 	Remove CONST_INT_P check in CCMP cost calculation.
> 
> gcc/testsuite/
> 	* gcc.target/aarch64/ccmp_1.c: Fix test issues.
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
  2016-01-28 14:33     ` James Greenhalgh
@ 2016-02-03  9:58       ` James Greenhalgh
  2016-02-03 11:57         ` Wilco Dijkstra
  0 siblings, 1 reply; 12+ messages in thread
From: James Greenhalgh @ 2016-02-03  9:58 UTC (permalink / raw)
  To: Wilco Dijkstra
  Cc: Andreas Schwab, Richard Henderson, gcc-patches, Christophe Lyon, nd

On Thu, Jan 28, 2016 at 02:33:20PM +0000, James Greenhalgh wrote:
> On Mon, Jan 25, 2016 at 08:09:39PM +0000, Wilco Dijkstra wrote:
> > Andreas Schwab <schwab@linux-m68k.org> wrote:
> > 
> > > FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler-times \tcmp\tw[0-9]+, 0 4
> > > FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler adds\t
> > > FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler-times fccmpe\t.*0\\.0 1
> > 
> > Yes I noticed those too, and here is the fix. Richard's recent change added
> > UNSPEC to the CCMP patterns to stop combine optimizing the CCMP CCmode
> > immediate in a rare case. This requires a change to the CCMP cost calculation
> > as the CCMP instruction with unspec is no longer recognized.
> > 
> > Fix the ccmp_1.c test to allow both '0' and 'wzr' on cmp - BTW is there a
> > regular expression that correctly implements (0|xzr)? If I use that the test
> > still fails somehow but \[0wzr\]+ works fine... Is the correct syntax
> > documented somewhere?
> > 
> > Finally to ensure FCCMPE is emitted on relational compares, add
> > -ffinite-math-only.
> > 
> > ChangeLog:
> > 2016-01-25  Wilco Dijkstra  <wdijkstr@arm.com>
> > 
> > gcc/
> > 	* config/aarch64/aarch64.c (aarch64_if_then_else_costs):
> > 	Remove CONST_INT_P check in CCMP cost calculation.
> > 
> > gcc/testsuite/
> > 	* gcc.target/aarch64/ccmp_1.c: Fix test issues.

I'm still seeing:

  FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler-times \\tcmp\\tw[0-9]+, (0|wzr) 4

Looking at the assembly generated for me with this testcase I see ccmp
with zero in 5 places:

  f3:
	cmp	w1, 34
	ccmp	w0, 19, 0, eq
	cset	w0, eq
	ret
  f4:
	cmp	w0, 35
	ccmp	w1, 20, 0, eq
	cset	w0, eq
	ret

  f7:
	cmp	w0, 0
	ccmp	w1, 7, 0, eq
	cset	w0, eq
	ret

  f8:
	cmp	w1, 0
	ccmp	w0, 9, 0, eq
	cset	w0, eq
	ret

  f11:
	fcmpe	d0, #0.0
	ccmp	w0, 30, 0, mi
	cset	w0, eq
	ret

Are these all expected? If so, can you spin the "obvious" patch to bump
this number to 5.

Thanks,
James

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
  2016-02-03  9:58       ` James Greenhalgh
@ 2016-02-03 11:57         ` Wilco Dijkstra
  0 siblings, 0 replies; 12+ messages in thread
From: Wilco Dijkstra @ 2016-02-03 11:57 UTC (permalink / raw)
  To: James Greenhalgh
  Cc: Andreas Schwab, Richard Henderson, gcc-patches, Christophe Lyon, nd

James Greenhalgh wrote:
> I'm still seeing:
>
>  FAIL: gcc.target/aarch64/ccmp_1.c scan-assembler-times \\tcmp\\tw[0-9]+, (0|wzr) 4

That's because "(0|wzr)" is not correctly matching due to the weird regular expression syntax used in the testsuite (I tried with several escapes to no avail). It looks like Richard committed that, perhaps accidentally? I'll change it back to "0" (count 4 is right as it only matches CMP, not CCMP).

Wilco

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
  2016-01-19 15:49 ` H.J. Lu
  2016-01-19 16:42   ` Wilco Dijkstra
@ 2016-01-19 18:15   ` Wilco Dijkstra
  1 sibling, 0 replies; 12+ messages in thread
From: Wilco Dijkstra @ 2016-01-19 18:15 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches, nd

H.J. Lu <hjl.tools@gmail.com> wrote:
> It breaks bootstrap on Linux/x86:

Committed trivial fix as r232576:

Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog    (revision 232575)
+++ gcc/ChangeLog    (working copy)
@@ -1,3 +1,7 @@
+2016-01-19  Wilco Dijkstra  <wdijkstr@arm.com>
+
+    * ccmp.c (expand_ccmp_expr_1): Avoid spurious unused warnings.
+
  2016-01-19  Jan Hubicka  <hubicka@ucw.cz>

      PR ipa/66223
Index: gcc/ccmp.c
===================================================================
--- gcc/ccmp.c    (revision 232575)
+++ gcc/ccmp.c    (working copy)
@@ -170,7 +170,7 @@
        int unsignedp0, unsignedp1;
        rtx_code rcode0, rcode1;
        int speed_p = optimize_insn_for_speed_p ();
-      rtx tmp2, ret, ret2;
+      rtx tmp2, ret = NULL_RTX, ret2 = NULL_RTX;
        unsigned cost1 = MAX_COST;
        unsigned cost2 = MAX_COST;


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
  2016-01-19 15:49 ` H.J. Lu
@ 2016-01-19 16:42   ` Wilco Dijkstra
  2016-01-19 18:15   ` Wilco Dijkstra
  1 sibling, 0 replies; 12+ messages in thread
From: Wilco Dijkstra @ 2016-01-19 16:42 UTC (permalink / raw)
  To: H.J. Lu; +Cc: James Greenhalgh, gcc-patches, nd

H.J. Lu <hjl.tools@gmail.com> wrote:
> It breaks bootstrap on Linux/x86:

Sorry about that - it looks like the warning levels seem to have changed since that patch was tested...

I have a trivial fix which I'll get checked in soon.

Wilco

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
  2015-12-15 10:33 Wilco Dijkstra
@ 2016-01-19 15:49 ` H.J. Lu
  2016-01-19 16:42   ` Wilco Dijkstra
  2016-01-19 18:15   ` Wilco Dijkstra
  0 siblings, 2 replies; 12+ messages in thread
From: H.J. Lu @ 2016-01-19 15:49 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: James Greenhalgh, gcc-patches, nd

On Tue, Dec 15, 2015 at 2:33 AM, Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> ping
>
>> -----Original Message-----
>> From: Wilco Dijkstra [mailto:Wilco.Dijkstra@arm.com]
>> Sent: 13 November 2015 16:03
>> To: 'gcc-patches@gcc.gnu.org'
>> Subject: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
>>
>> This patch adds CCMP selection based on rtx costs. This is based on Jiong's already approved patch https://gcc.gnu.org/ml/gcc-
>> patches/2015-09/msg01434.html with some minor refactoring and the tests updated.
>>
>> OK for commit?
>>
>> ChangeLog:
>> 2015-11-13  Jiong Wang  <jiong.wang@arm.com>
>>
>> gcc/
>>       * ccmp.c (expand_ccmp_expr_1): Cost the instruction sequences
>>       generated from different expand order.
>>

It breaks bootstrap on Linux/x86:

https://gcc.gnu.org/ml/gcc-regression/2016-01/msg00332.html

-- ../../src-trunk/gcc/ccmp.c: In function ârtx_def*
expand_ccmp_expr_1(gimple*, rtx_def**, rtx_def**)â:
../../src-trunk/gcc/ccmp.c:173:14: error: âretâ may be used
uninitialized in this function [-Werror=maybe-uninitialized]
    rtx tmp2, ret, ret2;
              ^~~

cc1plus: all warnings being treated as errors
Makefile:1085: recipe for target 'ccmp.o' failed
make[6]: *** [ccmp.o] Error 1

H.J.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
@ 2015-12-15 10:33 Wilco Dijkstra
  2016-01-19 15:49 ` H.J. Lu
  0 siblings, 1 reply; 12+ messages in thread
From: Wilco Dijkstra @ 2015-12-15 10:33 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

ping

> -----Original Message-----
> From: Wilco Dijkstra [mailto:Wilco.Dijkstra@arm.com]
> Sent: 13 November 2015 16:03
> To: 'gcc-patches@gcc.gnu.org'
> Subject: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
> 
> This patch adds CCMP selection based on rtx costs. This is based on Jiong's already approved patch https://gcc.gnu.org/ml/gcc-
> patches/2015-09/msg01434.html with some minor refactoring and the tests updated.
> 
> OK for commit?
> 
> ChangeLog:
> 2015-11-13  Jiong Wang  <jiong.wang@arm.com>
> 
> gcc/
> 	* ccmp.c (expand_ccmp_expr_1): Cost the instruction sequences
> 	generated from different expand order.
> 
> gcc/testsuite/
> 	* gcc.target/aarch64/ccmp_1.c: Update test.
> 
> ---
>  gcc/ccmp.c                                | 47 +++++++++++++++++++++++++++----
>  gcc/testsuite/gcc.target/aarch64/ccmp_1.c | 15 ++++++++--
>  2 files changed, 55 insertions(+), 7 deletions(-)
> 
> diff --git a/gcc/ccmp.c b/gcc/ccmp.c
> index cbdbd6d..95a41a6 100644
> --- a/gcc/ccmp.c
> +++ b/gcc/ccmp.c
> @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-outof-ssa.h"
>  #include "cfgexpand.h"
>  #include "ccmp.h"
> +#include "predict.h"
> 
>  /* The following functions expand conditional compare (CCMP) instructions.
>     Here is a short description about the over all algorithm:
> @@ -159,6 +160,8 @@ expand_ccmp_next (gimple *g, enum tree_code code, rtx prev,
>  static rtx
>  expand_ccmp_expr_1 (gimple *g, rtx *prep_seq, rtx *gen_seq)
>  {
> +  rtx prep_seq_1, gen_seq_1;
> +  rtx prep_seq_2, gen_seq_2;
>    tree exp = gimple_assign_rhs_to_tree (g);
>    enum tree_code code = TREE_CODE (exp);
>    gimple *gs0 = get_gimple_for_ssa_name (TREE_OPERAND (exp, 0));
> @@ -174,19 +177,53 @@ expand_ccmp_expr_1 (gimple *g, rtx *prep_seq, rtx *gen_seq)
>      {
>        if (TREE_CODE_CLASS (code1) == tcc_comparison)
>  	{
> -	  int unsignedp0;
> -	  enum rtx_code rcode0;
> +	  int unsignedp0, unsignedp1;
> +	  enum rtx_code rcode0, rcode1;
> +	  int speed_p = optimize_insn_for_speed_p ();
> +	  rtx tmp2, ret, ret2;
> +	  unsigned cost1 = MAX_COST;
> +	  unsigned cost2 = MAX_COST;
> 
>  	  unsignedp0 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs0)));
> +	  unsignedp1 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs1)));
>  	  rcode0 = get_rtx_code (code0, unsignedp0);
> +	  rcode1 = get_rtx_code (code1, unsignedp1);
> 
> -	  tmp = targetm.gen_ccmp_first (prep_seq, gen_seq, rcode0,
> +	  tmp = targetm.gen_ccmp_first (&prep_seq_1, &gen_seq_1, rcode0,
>  					gimple_assign_rhs1 (gs0),
>  					gimple_assign_rhs2 (gs0));
> -	  if (!tmp)
> +
> +	  tmp2 = targetm.gen_ccmp_first (&prep_seq_2, &gen_seq_2, rcode1,
> +					 gimple_assign_rhs1 (gs1),
> +					 gimple_assign_rhs2 (gs1));
> +
> +	  if (!tmp && !tmp2)
>  	    return NULL_RTX;
> 
> -	  return expand_ccmp_next (gs1, code, tmp, prep_seq, gen_seq);
> +	  if (tmp != NULL)
> +	    {
> +	      ret = expand_ccmp_next (gs1, code, tmp, &prep_seq_1, &gen_seq_1);
> +	      cost1 = seq_cost (safe_as_a <rtx_insn *> (prep_seq_1), speed_p);
> +	      cost1 += seq_cost (safe_as_a <rtx_insn *> (gen_seq_1), speed_p);
> +	    }
> +	  if (tmp2 != NULL)
> +	    {
> +	      ret2 = expand_ccmp_next (gs0, code, tmp2, &prep_seq_2,
> +				       &gen_seq_2);
> +	      cost2 = seq_cost (safe_as_a <rtx_insn *> (prep_seq_2), speed_p);
> +	      cost2 += seq_cost (safe_as_a <rtx_insn *> (gen_seq_2), speed_p);
> +	    }
> +
> +	  if (cost2 < cost1)
> +	    {
> +	      *prep_seq = prep_seq_2;
> +	      *gen_seq = gen_seq_2;
> +	      return ret2;
> +	    }
> +
> +	  *prep_seq = prep_seq_1;
> +	  *gen_seq = gen_seq_1;
> +	  return ret;
>  	}
>        else
>  	{
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
> index ef077e0..7c39b61 100644
> --- a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
> @@ -80,5 +80,16 @@ f13 (int a, int b)
>    return a == 3 || a == 0;
>  }
> 
> -/* { dg-final { scan-assembler "fccmp\t" } } */
> -/* { dg-final { scan-assembler "fccmpe\t" } } */
> +/* { dg-final { scan-assembler "cmp\t(.)+32" } } */
> +/* { dg-final { scan-assembler "cmp\t(.)+33" } } */
> +/* { dg-final { scan-assembler "cmp\t(.)+34" } } */
> +/* { dg-final { scan-assembler "cmp\t(.)+35" } } */
> +
> +/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, 0" 4 } } */
> +/* { dg-final { scan-assembler-times "fcmpe\t(.)+0\\.0" 2 } } */
> +/* { dg-final { scan-assembler-times "fcmp\t(.)+0\\.0" 2 } } */
> +
> +/* { dg-final { scan-assembler "adds\t" } } */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 11 } } */
> +/* { dg-final { scan-assembler-times "fccmp\t.*0\\.0" 1 } } */
> +/* { dg-final { scan-assembler-times "fccmpe\t.*0\\.0" 1 } } */
> --
> 1.9.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-02-03 11:57 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-13 16:03 [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order Wilco Dijkstra
2016-01-22 14:45 ` Andreas Schwab
2016-01-23 10:40 ` Andreas Schwab
2016-01-25 20:09   ` Wilco Dijkstra
2016-01-25 20:45     ` Richard Henderson
2016-01-28 14:33     ` James Greenhalgh
2016-02-03  9:58       ` James Greenhalgh
2016-02-03 11:57         ` Wilco Dijkstra
2015-12-15 10:33 Wilco Dijkstra
2016-01-19 15:49 ` H.J. Lu
2016-01-19 16:42   ` Wilco Dijkstra
2016-01-19 18:15   ` Wilco Dijkstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).