public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* RE: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
@ 2015-12-15 10:33 Wilco Dijkstra
  2015-12-15 17:30 ` [PATCH 4/4] " Jiong Wang
  2016-01-19 15:49 ` [PATCH 4/4][AArch64] " H.J. Lu
  0 siblings, 2 replies; 14+ messages in thread
From: Wilco Dijkstra @ 2015-12-15 10:33 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

ping

> -----Original Message-----
> From: Wilco Dijkstra [mailto:Wilco.Dijkstra@arm.com]
> Sent: 13 November 2015 16:03
> To: 'gcc-patches@gcc.gnu.org'
> Subject: [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
> 
> This patch adds CCMP selection based on rtx costs. This is based on Jiong's already approved patch https://gcc.gnu.org/ml/gcc-
> patches/2015-09/msg01434.html with some minor refactoring and the tests updated.
> 
> OK for commit?
> 
> ChangeLog:
> 2015-11-13  Jiong Wang  <jiong.wang@arm.com>
> 
> gcc/
> 	* ccmp.c (expand_ccmp_expr_1): Cost the instruction sequences
> 	generated from different expand order.
> 
> gcc/testsuite/
> 	* gcc.target/aarch64/ccmp_1.c: Update test.
> 
> ---
>  gcc/ccmp.c                                | 47 +++++++++++++++++++++++++++----
>  gcc/testsuite/gcc.target/aarch64/ccmp_1.c | 15 ++++++++--
>  2 files changed, 55 insertions(+), 7 deletions(-)
> 
> diff --git a/gcc/ccmp.c b/gcc/ccmp.c
> index cbdbd6d..95a41a6 100644
> --- a/gcc/ccmp.c
> +++ b/gcc/ccmp.c
> @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-outof-ssa.h"
>  #include "cfgexpand.h"
>  #include "ccmp.h"
> +#include "predict.h"
> 
>  /* The following functions expand conditional compare (CCMP) instructions.
>     Here is a short description about the over all algorithm:
> @@ -159,6 +160,8 @@ expand_ccmp_next (gimple *g, enum tree_code code, rtx prev,
>  static rtx
>  expand_ccmp_expr_1 (gimple *g, rtx *prep_seq, rtx *gen_seq)
>  {
> +  rtx prep_seq_1, gen_seq_1;
> +  rtx prep_seq_2, gen_seq_2;
>    tree exp = gimple_assign_rhs_to_tree (g);
>    enum tree_code code = TREE_CODE (exp);
>    gimple *gs0 = get_gimple_for_ssa_name (TREE_OPERAND (exp, 0));
> @@ -174,19 +177,53 @@ expand_ccmp_expr_1 (gimple *g, rtx *prep_seq, rtx *gen_seq)
>      {
>        if (TREE_CODE_CLASS (code1) == tcc_comparison)
>  	{
> -	  int unsignedp0;
> -	  enum rtx_code rcode0;
> +	  int unsignedp0, unsignedp1;
> +	  enum rtx_code rcode0, rcode1;
> +	  int speed_p = optimize_insn_for_speed_p ();
> +	  rtx tmp2, ret, ret2;
> +	  unsigned cost1 = MAX_COST;
> +	  unsigned cost2 = MAX_COST;
> 
>  	  unsignedp0 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs0)));
> +	  unsignedp1 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs1)));
>  	  rcode0 = get_rtx_code (code0, unsignedp0);
> +	  rcode1 = get_rtx_code (code1, unsignedp1);
> 
> -	  tmp = targetm.gen_ccmp_first (prep_seq, gen_seq, rcode0,
> +	  tmp = targetm.gen_ccmp_first (&prep_seq_1, &gen_seq_1, rcode0,
>  					gimple_assign_rhs1 (gs0),
>  					gimple_assign_rhs2 (gs0));
> -	  if (!tmp)
> +
> +	  tmp2 = targetm.gen_ccmp_first (&prep_seq_2, &gen_seq_2, rcode1,
> +					 gimple_assign_rhs1 (gs1),
> +					 gimple_assign_rhs2 (gs1));
> +
> +	  if (!tmp && !tmp2)
>  	    return NULL_RTX;
> 
> -	  return expand_ccmp_next (gs1, code, tmp, prep_seq, gen_seq);
> +	  if (tmp != NULL)
> +	    {
> +	      ret = expand_ccmp_next (gs1, code, tmp, &prep_seq_1, &gen_seq_1);
> +	      cost1 = seq_cost (safe_as_a <rtx_insn *> (prep_seq_1), speed_p);
> +	      cost1 += seq_cost (safe_as_a <rtx_insn *> (gen_seq_1), speed_p);
> +	    }
> +	  if (tmp2 != NULL)
> +	    {
> +	      ret2 = expand_ccmp_next (gs0, code, tmp2, &prep_seq_2,
> +				       &gen_seq_2);
> +	      cost2 = seq_cost (safe_as_a <rtx_insn *> (prep_seq_2), speed_p);
> +	      cost2 += seq_cost (safe_as_a <rtx_insn *> (gen_seq_2), speed_p);
> +	    }
> +
> +	  if (cost2 < cost1)
> +	    {
> +	      *prep_seq = prep_seq_2;
> +	      *gen_seq = gen_seq_2;
> +	      return ret2;
> +	    }
> +
> +	  *prep_seq = prep_seq_1;
> +	  *gen_seq = gen_seq_1;
> +	  return ret;
>  	}
>        else
>  	{
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
> index ef077e0..7c39b61 100644
> --- a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
> @@ -80,5 +80,16 @@ f13 (int a, int b)
>    return a == 3 || a == 0;
>  }
> 
> -/* { dg-final { scan-assembler "fccmp\t" } } */
> -/* { dg-final { scan-assembler "fccmpe\t" } } */
> +/* { dg-final { scan-assembler "cmp\t(.)+32" } } */
> +/* { dg-final { scan-assembler "cmp\t(.)+33" } } */
> +/* { dg-final { scan-assembler "cmp\t(.)+34" } } */
> +/* { dg-final { scan-assembler "cmp\t(.)+35" } } */
> +
> +/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, 0" 4 } } */
> +/* { dg-final { scan-assembler-times "fcmpe\t(.)+0\\.0" 2 } } */
> +/* { dg-final { scan-assembler-times "fcmp\t(.)+0\\.0" 2 } } */
> +
> +/* { dg-final { scan-assembler "adds\t" } } */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 11 } } */
> +/* { dg-final { scan-assembler-times "fccmp\t.*0\\.0" 1 } } */
> +/* { dg-final { scan-assembler-times "fccmpe\t.*0\\.0" 1 } } */
> --
> 1.9.1

^ permalink raw reply	[flat|nested] 14+ messages in thread
* [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order
@ 2015-11-13 16:03 Wilco Dijkstra
  2016-01-22 14:45 ` Andreas Schwab
  2016-01-23 10:40 ` Andreas Schwab
  0 siblings, 2 replies; 14+ messages in thread
From: Wilco Dijkstra @ 2015-11-13 16:03 UTC (permalink / raw)
  To: gcc-patches

This patch adds CCMP selection based on rtx costs. This is based on Jiong's
already approved patch
https://gcc.gnu.org/ml/gcc-patches/2015-09/msg01434.html with some minor
refactoring and the tests updated.

OK for commit?

ChangeLog:
2015-11-13  Jiong Wang  <jiong.wang@arm.com>

gcc/
	* ccmp.c (expand_ccmp_expr_1): Cost the instruction sequences
	generated from different expand order.
  
gcc/testsuite/
	* gcc.target/aarch64/ccmp_1.c: Update test.

---
 gcc/ccmp.c                                | 47
+++++++++++++++++++++++++++----
 gcc/testsuite/gcc.target/aarch64/ccmp_1.c | 15 ++++++++--
 2 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/gcc/ccmp.c b/gcc/ccmp.c
index cbdbd6d..95a41a6 100644
--- a/gcc/ccmp.c
+++ b/gcc/ccmp.c
@@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-outof-ssa.h"
 #include "cfgexpand.h"
 #include "ccmp.h"
+#include "predict.h"
 
 /* The following functions expand conditional compare (CCMP) instructions.
    Here is a short description about the over all algorithm:
@@ -159,6 +160,8 @@ expand_ccmp_next (gimple *g, enum tree_code code, rtx
prev,
 static rtx
 expand_ccmp_expr_1 (gimple *g, rtx *prep_seq, rtx *gen_seq)
 {
+  rtx prep_seq_1, gen_seq_1;
+  rtx prep_seq_2, gen_seq_2;
   tree exp = gimple_assign_rhs_to_tree (g);
   enum tree_code code = TREE_CODE (exp);
   gimple *gs0 = get_gimple_for_ssa_name (TREE_OPERAND (exp, 0));
@@ -174,19 +177,53 @@ expand_ccmp_expr_1 (gimple *g, rtx *prep_seq, rtx
*gen_seq)
     {
       if (TREE_CODE_CLASS (code1) == tcc_comparison)
 	{
-	  int unsignedp0;
-	  enum rtx_code rcode0;
+	  int unsignedp0, unsignedp1;
+	  enum rtx_code rcode0, rcode1;
+	  int speed_p = optimize_insn_for_speed_p ();
+	  rtx tmp2, ret, ret2;
+	  unsigned cost1 = MAX_COST;
+	  unsigned cost2 = MAX_COST;
 
 	  unsignedp0 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs0)));
+	  unsignedp1 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs1)));
 	  rcode0 = get_rtx_code (code0, unsignedp0);
+	  rcode1 = get_rtx_code (code1, unsignedp1);
 
-	  tmp = targetm.gen_ccmp_first (prep_seq, gen_seq, rcode0,
+	  tmp = targetm.gen_ccmp_first (&prep_seq_1, &gen_seq_1, rcode0,
 					gimple_assign_rhs1 (gs0),
 					gimple_assign_rhs2 (gs0));
-	  if (!tmp)
+
+	  tmp2 = targetm.gen_ccmp_first (&prep_seq_2, &gen_seq_2, rcode1,
+					 gimple_assign_rhs1 (gs1),
+					 gimple_assign_rhs2 (gs1));
+
+	  if (!tmp && !tmp2)
 	    return NULL_RTX;
 
-	  return expand_ccmp_next (gs1, code, tmp, prep_seq, gen_seq);
+	  if (tmp != NULL)
+	    {
+	      ret = expand_ccmp_next (gs1, code, tmp, &prep_seq_1,
&gen_seq_1);
+	      cost1 = seq_cost (safe_as_a <rtx_insn *> (prep_seq_1),
speed_p);
+	      cost1 += seq_cost (safe_as_a <rtx_insn *> (gen_seq_1),
speed_p);
+	    }
+	  if (tmp2 != NULL)
+	    {
+	      ret2 = expand_ccmp_next (gs0, code, tmp2, &prep_seq_2,
+				       &gen_seq_2);
+	      cost2 = seq_cost (safe_as_a <rtx_insn *> (prep_seq_2),
speed_p);
+	      cost2 += seq_cost (safe_as_a <rtx_insn *> (gen_seq_2),
speed_p);
+	    }
+
+	  if (cost2 < cost1)
+	    {
+	      *prep_seq = prep_seq_2;
+	      *gen_seq = gen_seq_2;
+	      return ret2;
+	    }
+
+	  *prep_seq = prep_seq_1;
+	  *gen_seq = gen_seq_1;
+	  return ret;
 	}
       else
 	{
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
index ef077e0..7c39b61 100644
--- a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
@@ -80,5 +80,16 @@ f13 (int a, int b)
   return a == 3 || a == 0;
 }
 
-/* { dg-final { scan-assembler "fccmp\t" } } */
-/* { dg-final { scan-assembler "fccmpe\t" } } */
+/* { dg-final { scan-assembler "cmp\t(.)+32" } } */
+/* { dg-final { scan-assembler "cmp\t(.)+33" } } */
+/* { dg-final { scan-assembler "cmp\t(.)+34" } } */
+/* { dg-final { scan-assembler "cmp\t(.)+35" } } */
+
+/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, 0" 4 } } */
+/* { dg-final { scan-assembler-times "fcmpe\t(.)+0\\.0" 2 } } */
+/* { dg-final { scan-assembler-times "fcmp\t(.)+0\\.0" 2 } } */
+
+/* { dg-final { scan-assembler "adds\t" } } */
+/* { dg-final { scan-assembler-times "\tccmp\t" 11 } } */
+/* { dg-final { scan-assembler-times "fccmp\t.*0\\.0" 1 } } */
+/* { dg-final { scan-assembler-times "fccmpe\t.*0\\.0" 1 } } */
-- 
1.9.1



^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2016-02-03 11:57 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-15 10:33 [PATCH 4/4][AArch64] Cost CCMP instruction sequences to choose better expand order Wilco Dijkstra
2015-12-15 17:30 ` [PATCH 4/4] " Jiong Wang
2015-12-15 23:49   ` Bernd Schmidt
2016-01-19 15:49 ` [PATCH 4/4][AArch64] " H.J. Lu
2016-01-19 16:42   ` Wilco Dijkstra
2016-01-19 18:15   ` Wilco Dijkstra
  -- strict thread matches above, loose matches on Subject: below --
2015-11-13 16:03 Wilco Dijkstra
2016-01-22 14:45 ` Andreas Schwab
2016-01-23 10:40 ` Andreas Schwab
2016-01-25 20:09   ` Wilco Dijkstra
2016-01-25 20:45     ` Richard Henderson
2016-01-28 14:33     ` James Greenhalgh
2016-02-03  9:58       ` James Greenhalgh
2016-02-03 11:57         ` Wilco Dijkstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).