public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942]
@ 2023-12-12  8:21 Andrew Pinski
  2023-12-13  1:08 ` Andrew Pinski
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Andrew Pinski @ 2023-12-12  8:21 UTC (permalink / raw)
  To: gcc-patches

Ccmp is not used if the result of the and/ior is used by both
a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
here by using ccmp in this case.
Two changes is required, first we need to allow the outer statement's
result be used more than once.
The second change is that during the expansion of the gimple, we need
to try using ccmp. This is needed because we don't use expand the ssa
name of the lhs but rather expand directly from the gimple.

A small note on the ccmp_4.c testcase, we should be able to get slightly
better than with this patch but it is one extra instruction compared to
before.

Bootstraped and tested on aarch64-linux-gnu with no regressions.

	PR target/100942

gcc/ChangeLog:

	* ccmp.cc (ccmp_candidate_p): Add outer argument.
	Allow if the outer is true and the lhs is used more
	than once.
	(expand_ccmp_expr): Update call to ccmp_candidate_p.
	* cfgexpand.cc (expand_gimple_stmt_1): Try using ccmp
	for binary assignments.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/ccmp_3.c: New test.
	* gcc.target/aarch64/ccmp_4.c: New test.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
 gcc/ccmp.cc                               |  9 +++---
 gcc/cfgexpand.cc                          | 25 ++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 20 +++++++++++++
 gcc/testsuite/gcc.target/aarch64/ccmp_4.c | 35 +++++++++++++++++++++++
 4 files changed, 85 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c

diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
index 1bd6fadea35..a274f8c3d53 100644
--- a/gcc/ccmp.cc
+++ b/gcc/ccmp.cc
@@ -92,7 +92,7 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
 
 /* Check whether G is a potential conditional compare candidate.  */
 static bool
-ccmp_candidate_p (gimple *g)
+ccmp_candidate_p (gimple *g, bool outer = false)
 {
   tree lhs, op0, op1;
   gimple *gs0, *gs1;
@@ -109,8 +109,9 @@ ccmp_candidate_p (gimple *g)
   lhs = gimple_assign_lhs (g);
   op0 = gimple_assign_rhs1 (g);
   op1 = gimple_assign_rhs2 (g);
-  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
-      || !has_single_use (lhs))
+  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
+    return false;
+  if (!outer && !has_single_use (lhs))
     return false;
 
   bb = gimple_bb (g);
@@ -284,7 +285,7 @@ expand_ccmp_expr (gimple *g, machine_mode mode)
   rtx_insn *last;
   rtx tmp;
 
-  if (!ccmp_candidate_p (g))
+  if (!ccmp_candidate_p (g, true))
     return NULL_RTX;
 
   last = get_last_insn ();
diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index b860be8bb77..0f9aad8e3eb 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -74,6 +74,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "output.h"
 #include "builtins.h"
 #include "opts.h"
+#include "ccmp.h"
 
 /* Some systems use __main in a way incompatible with its use in gcc, in these
    cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
@@ -3972,6 +3973,30 @@ expand_gimple_stmt_1 (gimple *stmt)
 	    if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
 	      promoted = true;
 
+	    /* Try to expand conditonal compare.  */
+	    if (targetm.gen_ccmp_first
+		&& gimple_assign_rhs_class (assign_stmt) == GIMPLE_BINARY_RHS)
+	      {
+		machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+		gcc_checking_assert (targetm.gen_ccmp_next != NULL);
+		temp = expand_ccmp_expr (stmt, mode);
+		if (temp)
+		  {
+		    if (promoted)
+		      {
+			int unsignedp = SUBREG_PROMOTED_SIGN (target);
+			convert_move (SUBREG_REG (target), temp, unsignedp);
+		      }
+		    else
+		     {
+			temp = force_operand (temp, target);
+			if (temp != target)
+			  emit_move_insn (target, temp);
+		      }
+		    return;
+		  }
+	      }
+
 	    ops.code = gimple_assign_rhs_code (assign_stmt);
 	    ops.type = TREE_TYPE (lhs);
 	    switch (get_gimple_rhs_class (ops.code))
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
new file mode 100644
index 00000000000..a2b47fbee14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
@@ -0,0 +1,20 @@
+/* { dg-options "-O2" } */
+/* PR target/100942 */
+
+void foo(void);
+int f1(int a, int b)
+{
+  int c = a == 0 || b == 0;
+  if (c) foo();
+  return c;
+}
+
+/* We should get one cmp followed by ccmp and one cset. */
+/* { dg-final { scan-assembler "\tccmp\t" } } */
+/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
+/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
+/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
+/* { dg-final { scan-assembler-not "\torr\t" } } */
+/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
+/* { dg-final { scan-assembler-not "\tcbz\t" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
new file mode 100644
index 00000000000..bc0f57a7c59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
@@ -0,0 +1,35 @@
+/* { dg-options "-O2" } */
+/* PR target/100942 */
+
+void foo(void);
+int f1(int a, int b, int d)
+{
+  int c = a < 8 || b < 9;
+  int e = d < 11 || c;
+  if (e) foo();
+  return c;
+}
+
+/*
+  We really should get:
+        cmp     w0, 7
+        ccmp    w1, 8, 4, gt
+        cset    w0, le
+        ccmp    w2, 10, 4, gt
+        ble     .L11
+
+  But we currently get:
+        cmp     w0, 7
+        ccmp    w1, 8, 4, gt
+        cset    w0, le
+        cmp     w0, 0
+        ccmp    w2, 10, 4, eq
+        ble     .L11
+  The middle cmp is not needed.
+ */
+
+/* We should end up with only one cmp and 2 ccmp and 1 cset but currently we get 2 cmp
+   though. */
+/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
+/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */
-- 
2.39.3


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942]
  2023-12-12  8:21 [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942] Andrew Pinski
@ 2023-12-13  1:08 ` Andrew Pinski
  2024-01-02 17:34 ` Andrew Pinski
  2024-01-02 22:45 ` Richard Sandiford
  2 siblings, 0 replies; 4+ messages in thread
From: Andrew Pinski @ 2023-12-13  1:08 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: gcc-patches

On Tue, Dec 12, 2023 at 12:22 AM Andrew Pinski <quic_apinski@quicinc.com> wrote:
>
> Ccmp is not used if the result of the and/ior is used by both
> a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> here by using ccmp in this case.
> Two changes is required, first we need to allow the outer statement's
> result be used more than once.
> The second change is that during the expansion of the gimple, we need
> to try using ccmp. This is needed because we don't use expand the ssa
> name of the lhs but rather expand directly from the gimple.
>
> A small note on the ccmp_4.c testcase, we should be able to get slightly
> better than with this patch but it is one extra instruction compared to
> before.

Just FYI, this pattern shows up a few times in GCC itself even.

Thanks,
Andrew Pinski

>
> Bootstraped and tested on aarch64-linux-gnu with no regressions.
>
>         PR target/100942
>
> gcc/ChangeLog:
>
>         * ccmp.cc (ccmp_candidate_p): Add outer argument.
>         Allow if the outer is true and the lhs is used more
>         than once.
>         (expand_ccmp_expr): Update call to ccmp_candidate_p.
>         * cfgexpand.cc (expand_gimple_stmt_1): Try using ccmp
>         for binary assignments.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/ccmp_3.c: New test.
>         * gcc.target/aarch64/ccmp_4.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> ---
>  gcc/ccmp.cc                               |  9 +++---
>  gcc/cfgexpand.cc                          | 25 ++++++++++++++++
>  gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 20 +++++++++++++
>  gcc/testsuite/gcc.target/aarch64/ccmp_4.c | 35 +++++++++++++++++++++++
>  4 files changed, 85 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c
>
> diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
> index 1bd6fadea35..a274f8c3d53 100644
> --- a/gcc/ccmp.cc
> +++ b/gcc/ccmp.cc
> @@ -92,7 +92,7 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
>
>  /* Check whether G is a potential conditional compare candidate.  */
>  static bool
> -ccmp_candidate_p (gimple *g)
> +ccmp_candidate_p (gimple *g, bool outer = false)
>  {
>    tree lhs, op0, op1;
>    gimple *gs0, *gs1;
> @@ -109,8 +109,9 @@ ccmp_candidate_p (gimple *g)
>    lhs = gimple_assign_lhs (g);
>    op0 = gimple_assign_rhs1 (g);
>    op1 = gimple_assign_rhs2 (g);
> -  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
> -      || !has_single_use (lhs))
> +  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
> +    return false;
> +  if (!outer && !has_single_use (lhs))
>      return false;
>
>    bb = gimple_bb (g);
> @@ -284,7 +285,7 @@ expand_ccmp_expr (gimple *g, machine_mode mode)
>    rtx_insn *last;
>    rtx tmp;
>
> -  if (!ccmp_candidate_p (g))
> +  if (!ccmp_candidate_p (g, true))
>      return NULL_RTX;
>
>    last = get_last_insn ();
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index b860be8bb77..0f9aad8e3eb 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -74,6 +74,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "output.h"
>  #include "builtins.h"
>  #include "opts.h"
> +#include "ccmp.h"
>
>  /* Some systems use __main in a way incompatible with its use in gcc, in these
>     cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
> @@ -3972,6 +3973,30 @@ expand_gimple_stmt_1 (gimple *stmt)
>             if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
>               promoted = true;
>
> +           /* Try to expand conditonal compare.  */
> +           if (targetm.gen_ccmp_first
> +               && gimple_assign_rhs_class (assign_stmt) == GIMPLE_BINARY_RHS)
> +             {
> +               machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> +               gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> +               temp = expand_ccmp_expr (stmt, mode);
> +               if (temp)
> +                 {
> +                   if (promoted)
> +                     {
> +                       int unsignedp = SUBREG_PROMOTED_SIGN (target);
> +                       convert_move (SUBREG_REG (target), temp, unsignedp);
> +                     }
> +                   else
> +                    {
> +                       temp = force_operand (temp, target);
> +                       if (temp != target)
> +                         emit_move_insn (target, temp);
> +                     }
> +                   return;
> +                 }
> +             }
> +
>             ops.code = gimple_assign_rhs_code (assign_stmt);
>             ops.type = TREE_TYPE (lhs);
>             switch (get_gimple_rhs_class (ops.code))
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> new file mode 100644
> index 00000000000..a2b47fbee14
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b)
> +{
> +  int c = a == 0 || b == 0;
> +  if (c) foo();
> +  return c;
> +}
> +
> +/* We should get one cmp followed by ccmp and one cset. */
> +/* { dg-final { scan-assembler "\tccmp\t" } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
> +/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
> +/* { dg-final { scan-assembler-not "\torr\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbz\t" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> new file mode 100644
> index 00000000000..bc0f57a7c59
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> @@ -0,0 +1,35 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b, int d)
> +{
> +  int c = a < 8 || b < 9;
> +  int e = d < 11 || c;
> +  if (e) foo();
> +  return c;
> +}
> +
> +/*
> +  We really should get:
> +        cmp     w0, 7
> +        ccmp    w1, 8, 4, gt
> +        cset    w0, le
> +        ccmp    w2, 10, 4, gt
> +        ble     .L11
> +
> +  But we currently get:
> +        cmp     w0, 7
> +        ccmp    w1, 8, 4, gt
> +        cset    w0, le
> +        cmp     w0, 0
> +        ccmp    w2, 10, 4, eq
> +        ble     .L11
> +  The middle cmp is not needed.
> + */
> +
> +/* We should end up with only one cmp and 2 ccmp and 1 cset but currently we get 2 cmp
> +   though. */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */
> --
> 2.39.3
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942]
  2023-12-12  8:21 [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942] Andrew Pinski
  2023-12-13  1:08 ` Andrew Pinski
@ 2024-01-02 17:34 ` Andrew Pinski
  2024-01-02 22:45 ` Richard Sandiford
  2 siblings, 0 replies; 4+ messages in thread
From: Andrew Pinski @ 2024-01-02 17:34 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: gcc-patches

On Tue, Dec 12, 2023 at 12:22 AM Andrew Pinski <quic_apinski@quicinc.com> wrote:
>
> Ccmp is not used if the result of the and/ior is used by both
> a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> here by using ccmp in this case.
> Two changes is required, first we need to allow the outer statement's
> result be used more than once.
> The second change is that during the expansion of the gimple, we need
> to try using ccmp. This is needed because we don't use expand the ssa
> name of the lhs but rather expand directly from the gimple.
>
> A small note on the ccmp_4.c testcase, we should be able to get slightly
> better than with this patch but it is one extra instruction compared to
> before.
>
> Bootstraped and tested on aarch64-linux-gnu with no regressions.

Ping?

>
>         PR target/100942
>
> gcc/ChangeLog:
>
>         * ccmp.cc (ccmp_candidate_p): Add outer argument.
>         Allow if the outer is true and the lhs is used more
>         than once.
>         (expand_ccmp_expr): Update call to ccmp_candidate_p.
>         * cfgexpand.cc (expand_gimple_stmt_1): Try using ccmp
>         for binary assignments.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/ccmp_3.c: New test.
>         * gcc.target/aarch64/ccmp_4.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> ---
>  gcc/ccmp.cc                               |  9 +++---
>  gcc/cfgexpand.cc                          | 25 ++++++++++++++++
>  gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 20 +++++++++++++
>  gcc/testsuite/gcc.target/aarch64/ccmp_4.c | 35 +++++++++++++++++++++++
>  4 files changed, 85 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c
>
> diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
> index 1bd6fadea35..a274f8c3d53 100644
> --- a/gcc/ccmp.cc
> +++ b/gcc/ccmp.cc
> @@ -92,7 +92,7 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
>
>  /* Check whether G is a potential conditional compare candidate.  */
>  static bool
> -ccmp_candidate_p (gimple *g)
> +ccmp_candidate_p (gimple *g, bool outer = false)
>  {
>    tree lhs, op0, op1;
>    gimple *gs0, *gs1;
> @@ -109,8 +109,9 @@ ccmp_candidate_p (gimple *g)
>    lhs = gimple_assign_lhs (g);
>    op0 = gimple_assign_rhs1 (g);
>    op1 = gimple_assign_rhs2 (g);
> -  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
> -      || !has_single_use (lhs))
> +  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
> +    return false;
> +  if (!outer && !has_single_use (lhs))
>      return false;
>
>    bb = gimple_bb (g);
> @@ -284,7 +285,7 @@ expand_ccmp_expr (gimple *g, machine_mode mode)
>    rtx_insn *last;
>    rtx tmp;
>
> -  if (!ccmp_candidate_p (g))
> +  if (!ccmp_candidate_p (g, true))
>      return NULL_RTX;
>
>    last = get_last_insn ();
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index b860be8bb77..0f9aad8e3eb 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -74,6 +74,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "output.h"
>  #include "builtins.h"
>  #include "opts.h"
> +#include "ccmp.h"
>
>  /* Some systems use __main in a way incompatible with its use in gcc, in these
>     cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
> @@ -3972,6 +3973,30 @@ expand_gimple_stmt_1 (gimple *stmt)
>             if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
>               promoted = true;
>
> +           /* Try to expand conditonal compare.  */
> +           if (targetm.gen_ccmp_first
> +               && gimple_assign_rhs_class (assign_stmt) == GIMPLE_BINARY_RHS)
> +             {
> +               machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> +               gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> +               temp = expand_ccmp_expr (stmt, mode);
> +               if (temp)
> +                 {
> +                   if (promoted)
> +                     {
> +                       int unsignedp = SUBREG_PROMOTED_SIGN (target);
> +                       convert_move (SUBREG_REG (target), temp, unsignedp);
> +                     }
> +                   else
> +                    {
> +                       temp = force_operand (temp, target);
> +                       if (temp != target)
> +                         emit_move_insn (target, temp);
> +                     }
> +                   return;
> +                 }
> +             }
> +
>             ops.code = gimple_assign_rhs_code (assign_stmt);
>             ops.type = TREE_TYPE (lhs);
>             switch (get_gimple_rhs_class (ops.code))
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> new file mode 100644
> index 00000000000..a2b47fbee14
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b)
> +{
> +  int c = a == 0 || b == 0;
> +  if (c) foo();
> +  return c;
> +}
> +
> +/* We should get one cmp followed by ccmp and one cset. */
> +/* { dg-final { scan-assembler "\tccmp\t" } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
> +/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
> +/* { dg-final { scan-assembler-not "\torr\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbz\t" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> new file mode 100644
> index 00000000000..bc0f57a7c59
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> @@ -0,0 +1,35 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b, int d)
> +{
> +  int c = a < 8 || b < 9;
> +  int e = d < 11 || c;
> +  if (e) foo();
> +  return c;
> +}
> +
> +/*
> +  We really should get:
> +        cmp     w0, 7
> +        ccmp    w1, 8, 4, gt
> +        cset    w0, le
> +        ccmp    w2, 10, 4, gt
> +        ble     .L11
> +
> +  But we currently get:
> +        cmp     w0, 7
> +        ccmp    w1, 8, 4, gt
> +        cset    w0, le
> +        cmp     w0, 0
> +        ccmp    w2, 10, 4, eq
> +        ble     .L11
> +  The middle cmp is not needed.
> + */
> +
> +/* We should end up with only one cmp and 2 ccmp and 1 cset but currently we get 2 cmp
> +   though. */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */
> --
> 2.39.3
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942]
  2023-12-12  8:21 [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942] Andrew Pinski
  2023-12-13  1:08 ` Andrew Pinski
  2024-01-02 17:34 ` Andrew Pinski
@ 2024-01-02 22:45 ` Richard Sandiford
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Sandiford @ 2024-01-02 22:45 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: gcc-patches

Andrew Pinski <quic_apinski@quicinc.com> writes:
> Ccmp is not used if the result of the and/ior is used by both
> a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> here by using ccmp in this case.
> Two changes is required, first we need to allow the outer statement's
> result be used more than once.
> The second change is that during the expansion of the gimple, we need
> to try using ccmp. This is needed because we don't use expand the ssa
> name of the lhs but rather expand directly from the gimple.
>
> A small note on the ccmp_4.c testcase, we should be able to get slightly
> better than with this patch but it is one extra instruction compared to
> before.
>
> Bootstraped and tested on aarch64-linux-gnu with no regressions.
>
> 	PR target/100942
>
> gcc/ChangeLog:
>
> 	* ccmp.cc (ccmp_candidate_p): Add outer argument.
> 	Allow if the outer is true and the lhs is used more
> 	than once.
> 	(expand_ccmp_expr): Update call to ccmp_candidate_p.
> 	* cfgexpand.cc (expand_gimple_stmt_1): Try using ccmp
> 	for binary assignments.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/ccmp_3.c: New test.
> 	* gcc.target/aarch64/ccmp_4.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>

TBH I was hoping someone more familiar with the code would comment,
since it looks like the current ccmp decision might have been deliberate.
I agree it doesn't seem to make much sense though.  It's not specific
to a use in a GIMPLE_ASSIGN + GIMPLE_COND, it applies even to uses
in two GIMPLE_ASSIGNs.  E.g.:

int f1(int a, int b, _Bool *x)
{
  x[0] = x[1] = a == 0 || b == 0;
}

int f2(int a, int b, int *x)
{
  x[0] = x[1] = a == 0 || b == 0;
}

produces:

f1:
        cmp     w0, 0
        cset    w3, eq
        cmp     w1, 0
        cset    w1, eq
        orr     w1, w3, w1
        strb    w1, [x2]
        strb    w1, [x2, 1]
        ret

f2:
        cmp     w0, 0
        ccmp    w1, 0, 4, ne
        cset    w1, eq
        stp     w1, w1, [x2]
        ret

because f2 has a cast to int (making the || single-use) whereas f1
doesn't.  Might be nice to include that as a ccmp_5.c.

> ---
>  gcc/ccmp.cc                               |  9 +++---
>  gcc/cfgexpand.cc                          | 25 ++++++++++++++++
>  gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 20 +++++++++++++
>  gcc/testsuite/gcc.target/aarch64/ccmp_4.c | 35 +++++++++++++++++++++++
>  4 files changed, 85 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c
>
> diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
> index 1bd6fadea35..a274f8c3d53 100644
> --- a/gcc/ccmp.cc
> +++ b/gcc/ccmp.cc
> @@ -92,7 +92,7 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
>  
>  /* Check whether G is a potential conditional compare candidate.  */
>  static bool
> -ccmp_candidate_p (gimple *g)
> +ccmp_candidate_p (gimple *g, bool outer = false)

The function comment should describe the new parameter.

>  {
>    tree lhs, op0, op1;
>    gimple *gs0, *gs1;
> @@ -109,8 +109,9 @@ ccmp_candidate_p (gimple *g)
>    lhs = gimple_assign_lhs (g);
>    op0 = gimple_assign_rhs1 (g);
>    op1 = gimple_assign_rhs2 (g);
> -  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
> -      || !has_single_use (lhs))
> +  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
> +    return false;
> +  if (!outer && !has_single_use (lhs))
>      return false;
>  
>    bb = gimple_bb (g);
> @@ -284,7 +285,7 @@ expand_ccmp_expr (gimple *g, machine_mode mode)
>    rtx_insn *last;
>    rtx tmp;
>  
> -  if (!ccmp_candidate_p (g))
> +  if (!ccmp_candidate_p (g, true))
>      return NULL_RTX;
>  
>    last = get_last_insn ();
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index b860be8bb77..0f9aad8e3eb 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -74,6 +74,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "output.h"
>  #include "builtins.h"
>  #include "opts.h"
> +#include "ccmp.h"
>  
>  /* Some systems use __main in a way incompatible with its use in gcc, in these
>     cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
> @@ -3972,6 +3973,30 @@ expand_gimple_stmt_1 (gimple *stmt)
>  	    if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
>  	      promoted = true;
>  
> +	    /* Try to expand conditonal compare.  */
> +	    if (targetm.gen_ccmp_first
> +		&& gimple_assign_rhs_class (assign_stmt) == GIMPLE_BINARY_RHS)
> +	      {
> +		machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> +		gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> +		temp = expand_ccmp_expr (stmt, mode);
> +		if (temp)
> +		  {
> +		    if (promoted)
> +		      {
> +			int unsignedp = SUBREG_PROMOTED_SIGN (target);
> +			convert_move (SUBREG_REG (target), temp, unsignedp);
> +		      }
> +		    else
> +		     {
> +			temp = force_operand (temp, target);
> +			if (temp != target)
> +			  emit_move_insn (target, temp);
> +		      }
> +		    return;
> +		  }
> +	      }
> +
>  	    ops.code = gimple_assign_rhs_code (assign_stmt);
>  	    ops.type = TREE_TYPE (lhs);
>  	    switch (get_gimple_rhs_class (ops.code))

Would it be possible instead to split:

	  rtx r;
	  location_t saved_loc = curr_insn_location ();
	  loc = gimple_location (g);
	  if (loc != UNKNOWN_LOCATION)
	    set_curr_insn_location (loc);
	  ops.code = gimple_assign_rhs_code (g);
          switch (get_gimple_rhs_class (ops.code))
	    {
	    case GIMPLE_TERNARY_RHS:
	      ops.op2 = gimple_assign_rhs3 (g);
	      /* Fallthru */
	    case GIMPLE_BINARY_RHS:
	      ops.op1 = gimple_assign_rhs2 (g);

	      /* Try to expand conditonal compare.  */
	      if (targetm.gen_ccmp_first)
		{
		  gcc_checking_assert (targetm.gen_ccmp_next != NULL);
		  r = expand_ccmp_expr (g, mode);
		  if (r)
		    break;
		}
	      /* Fallthru */
	    case GIMPLE_UNARY_RHS:
	      ops.op0 = gimple_assign_rhs1 (g);
	      ops.type = TREE_TYPE (gimple_assign_lhs (g));
	      ops.location = loc;
	      r = expand_expr_real_2 (&ops, target, tmode, modifier);
	      break;
	    case GIMPLE_SINGLE_RHS:
	      {
		r = expand_expr_real (gimple_assign_rhs1 (g), target,
				      tmode, modifier, alt_rtl,
				      inner_reference_p);
		break;
	      }
	    default:
	      gcc_unreachable ();
	    }
	  set_curr_insn_location (saved_loc);

of expand_expr_real_1 out into a subroutine and make this function call it?
That way we could avoid duplicating the expand_ccmp_expr stuff and the
promoted handling.

Thanks for doing this.  Looks like a nice improvement.

Richard

> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> new file mode 100644
> index 00000000000..a2b47fbee14
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b)
> +{
> +  int c = a == 0 || b == 0;
> +  if (c) foo();
> +  return c;
> +}
> +
> +/* We should get one cmp followed by ccmp and one cset. */
> +/* { dg-final { scan-assembler "\tccmp\t" } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
> +/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
> +/* { dg-final { scan-assembler-not "\torr\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbz\t" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> new file mode 100644
> index 00000000000..bc0f57a7c59
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> @@ -0,0 +1,35 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b, int d)
> +{
> +  int c = a < 8 || b < 9;
> +  int e = d < 11 || c;
> +  if (e) foo();
> +  return c;
> +}
> +
> +/*
> +  We really should get:
> +        cmp     w0, 7
> +        ccmp    w1, 8, 4, gt
> +        cset    w0, le
> +        ccmp    w2, 10, 4, gt
> +        ble     .L11
> +
> +  But we currently get:
> +        cmp     w0, 7
> +        ccmp    w1, 8, 4, gt
> +        cset    w0, le
> +        cmp     w0, 0
> +        ccmp    w2, 10, 4, eq
> +        ble     .L11
> +  The middle cmp is not needed.
> + */
> +
> +/* We should end up with only one cmp and 2 ccmp and 1 cset but currently we get 2 cmp
> +   though. */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-01-02 22:45 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-12  8:21 [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942] Andrew Pinski
2023-12-13  1:08 ` Andrew Pinski
2024-01-02 17:34 ` Andrew Pinski
2024-01-02 22:45 ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).