public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCHv2] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942]
@ 2024-01-05  7:43 Andrew Pinski
  2024-01-12 12:26 ` [PATCHv3] " Richard Sandiford
  0 siblings, 1 reply; 5+ messages in thread
From: Andrew Pinski @ 2024-01-05  7:43 UTC (permalink / raw)
  To: gcc-patches; +Cc: Andrew Pinski

Ccmp is not used if the result of the and/ior is used by both
a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
here by using ccmp in this case.
Two changes is required, first we need to allow the outer statement's
result be used more than once.
The second change is that during the expansion of the gimple, we need
to try using ccmp. This is needed because we don't use expand the ssa
name of the lhs but rather expand directly from the gimple.

A small note on the ccmp_4.c testcase, we should be able to get slightly
better than with this patch but it is one extra instruction compared to
before.

Diff from v1:
* v2: Split out expand_gimple_assign_ssa so the we only need to handle
promotion once. Add ccmp_5.c testcase which was suggested. Change comment
on ccmp_candidate_p.

Bootstraped and tested on aarch64-linux-gnu with no regressions.

	PR target/100942

gcc/ChangeLog:

	* ccmp.cc (ccmp_candidate_p): Add outer argument.
	Allow if the outer is true and the lhs is used more
	than once.
	(expand_ccmp_expr): Update call to ccmp_candidate_p.
	* cfgexpand.cc (expand_gimple_assign_ssa): New function, try
	using ccmp for binary assignments and extracted from ...
	(expand_gimple_stmt_1): Here. Call expand_gimple_assign_ssa.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/ccmp_3.c: New test.
	* gcc.target/aarch64/ccmp_4.c: New test.
	* gcc.target/aarch64/ccmp_5.c: New test.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
 gcc/ccmp.cc                               | 12 ++--
 gcc/cfgexpand.cc                          | 75 +++++++++++++++--------
 gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 20 ++++++
 gcc/testsuite/gcc.target/aarch64/ccmp_4.c | 35 +++++++++++
 gcc/testsuite/gcc.target/aarch64/ccmp_5.c | 20 ++++++
 5 files changed, 132 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_5.c

diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
index 09d6b5595a4..7cb525addf4 100644
--- a/gcc/ccmp.cc
+++ b/gcc/ccmp.cc
@@ -90,9 +90,10 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
    If all checks OK in expand_ccmp_expr, it emits insns in prep_seq, then
    insns in gen_seq.  */
 
-/* Check whether G is a potential conditional compare candidate.  */
+/* Check whether G is a potential conditional compare candidate; OUTER is true if
+   G is the outer most AND/IOR.  */
 static bool
-ccmp_candidate_p (gimple *g)
+ccmp_candidate_p (gimple *g, bool outer = false)
 {
   tree lhs, op0, op1;
   gimple *gs0, *gs1;
@@ -109,8 +110,9 @@ ccmp_candidate_p (gimple *g)
   lhs = gimple_assign_lhs (g);
   op0 = gimple_assign_rhs1 (g);
   op1 = gimple_assign_rhs2 (g);
-  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
-      || !has_single_use (lhs))
+  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
+    return false;
+  if (!outer && !has_single_use (lhs))
     return false;
 
   bb = gimple_bb (g);
@@ -284,7 +286,7 @@ expand_ccmp_expr (gimple *g, machine_mode mode)
   rtx_insn *last;
   rtx tmp;
 
-  if (!ccmp_candidate_p (g))
+  if (!ccmp_candidate_p (g, true))
     return NULL_RTX;
 
   last = get_last_insn ();
diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index 1db22f0a1a3..293b9386192 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -74,6 +74,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "output.h"
 #include "builtins.h"
 #include "opts.h"
+#include "ccmp.h"
 
 /* Some systems use __main in a way incompatible with its use in gcc, in these
    cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
@@ -3858,6 +3859,50 @@ expand_clobber (tree lhs)
     }
 }
 
+/* A subroutine of expand_gimple_stmt_1, expanding one gimple assign statement
+   ASSIGN_STMT that has a SSA name on the lhs, using TARGET and TARGET_MODE
+   as the target if possible and mode for what mode we should expand into.  */
+static rtx
+expand_gimple_assign_ssa (gassign *assign_stmt,
+			  rtx target,
+			  machine_mode target_mode)
+{
+  rtx temp;
+  tree lhs = gimple_assign_lhs (assign_stmt);
+  /* Try to expand conditonal compare.  */
+  if (targetm.gen_ccmp_first
+      && gimple_assign_rhs_class (assign_stmt) == GIMPLE_BINARY_RHS)
+    {
+      machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+      gcc_checking_assert (targetm.gen_ccmp_next != NULL);
+      temp = expand_ccmp_expr (assign_stmt, mode);
+      if (temp)
+	return temp;
+    }
+
+  struct separate_ops ops;
+  ops.code = gimple_assign_rhs_code (assign_stmt);
+  ops.type = TREE_TYPE (lhs);
+  switch (get_gimple_rhs_class (ops.code))
+    {
+      case GIMPLE_TERNARY_RHS:
+	ops.op2 = gimple_assign_rhs3 (assign_stmt);
+	/* Fallthru */
+      case GIMPLE_BINARY_RHS:
+	ops.op1 = gimple_assign_rhs2 (assign_stmt);
+	/* Fallthru */
+      case GIMPLE_UNARY_RHS:
+	ops.op0 = gimple_assign_rhs1 (assign_stmt);
+	break;
+      default:
+	gcc_unreachable ();
+     }
+  ops.location = gimple_location (assign_stmt);
+
+  return expand_expr_real_2 (&ops, target, target_mode,
+			     EXPAND_NORMAL);
+}
+
 /* A subroutine of expand_gimple_stmt, expanding one gimple statement
    STMT that doesn't require special handling for outgoing edges.  That
    is no tailcalls and no GIMPLE_COND.  */
@@ -3971,37 +4016,17 @@ expand_gimple_stmt_1 (gimple *stmt)
 	  {
 	    rtx target, temp;
 	    bool nontemporal = gimple_assign_nontemporal_move_p (assign_stmt);
-	    struct separate_ops ops;
 	    bool promoted = false;
 
 	    target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
 	    if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
 	      promoted = true;
 
-	    ops.code = gimple_assign_rhs_code (assign_stmt);
-	    ops.type = TREE_TYPE (lhs);
-	    switch (get_gimple_rhs_class (ops.code))
-	      {
-		case GIMPLE_TERNARY_RHS:
-		  ops.op2 = gimple_assign_rhs3 (assign_stmt);
-		  /* Fallthru */
-		case GIMPLE_BINARY_RHS:
-		  ops.op1 = gimple_assign_rhs2 (assign_stmt);
-		  /* Fallthru */
-		case GIMPLE_UNARY_RHS:
-		  ops.op0 = gimple_assign_rhs1 (assign_stmt);
-		  break;
-		default:
-		  gcc_unreachable ();
-	      }
-	    ops.location = gimple_location (stmt);
-
-	    /* If we want to use a nontemporal store, force the value to
-	       register first.  If we store into a promoted register,
-	       don't directly expand to target.  */
+	   /* If we want to use a nontemporal store, force the value to
+	      register first.  If we store into a promoted register,
+	      don't directly expand to target.  */
 	    temp = nontemporal || promoted ? NULL_RTX : target;
-	    temp = expand_expr_real_2 (&ops, temp, GET_MODE (target),
-				       EXPAND_NORMAL);
+	    temp = expand_gimple_assign_ssa (assign_stmt, temp, GET_MODE (target));
 
 	    if (temp == target)
 	      ;
@@ -4013,7 +4038,7 @@ expand_gimple_stmt_1 (gimple *stmt)
 		if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
 		  {
 		    temp = convert_modes (GET_MODE (target),
-					  TYPE_MODE (ops.type),
+					  TYPE_MODE (TREE_TYPE (lhs)),
 					  temp, unsignedp);
 		    temp = convert_modes (GET_MODE (SUBREG_REG (target)),
 					  GET_MODE (target), temp, unsignedp);
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
new file mode 100644
index 00000000000..a2b47fbee14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
@@ -0,0 +1,20 @@
+/* { dg-options "-O2" } */
+/* PR target/100942 */
+
+void foo(void);
+int f1(int a, int b)
+{
+  int c = a == 0 || b == 0;
+  if (c) foo();
+  return c;
+}
+
+/* We should get one cmp followed by ccmp and one cset. */
+/* { dg-final { scan-assembler "\tccmp\t" } } */
+/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
+/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
+/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
+/* { dg-final { scan-assembler-not "\torr\t" } } */
+/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
+/* { dg-final { scan-assembler-not "\tcbz\t" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
new file mode 100644
index 00000000000..bc0f57a7c59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
@@ -0,0 +1,35 @@
+/* { dg-options "-O2" } */
+/* PR target/100942 */
+
+void foo(void);
+int f1(int a, int b, int d)
+{
+  int c = a < 8 || b < 9;
+  int e = d < 11 || c;
+  if (e) foo();
+  return c;
+}
+
+/*
+  We really should get:
+        cmp     w0, 7
+        ccmp    w1, 8, 4, gt
+        cset    w0, le
+        ccmp    w2, 10, 4, gt
+        ble     .L11
+
+  But we currently get:
+        cmp     w0, 7
+        ccmp    w1, 8, 4, gt
+        cset    w0, le
+        cmp     w0, 0
+        ccmp    w2, 10, 4, eq
+        ble     .L11
+  The middle cmp is not needed.
+ */
+
+/* We should end up with only one cmp and 2 ccmp and 1 cset but currently we get 2 cmp
+   though. */
+/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
+/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_5.c b/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
new file mode 100644
index 00000000000..7e52ae4f322
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
@@ -0,0 +1,20 @@
+
+/* { dg-options "-O2" } */
+/* PR target/100942 */
+void f1(int a, int b, _Bool *x)
+{
+  x[0] = x[1] = a == 0 || b == 0;
+}
+
+void f2(int a, int b, int *x)
+{
+  x[0] = x[1] = a == 0 || b == 0;
+}
+
+
+/* Both functions should be using ccmp rather than 2 cset/orr.  */
+/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tcset\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tcmp\t" 2 } } */
+/* { dg-final { scan-assembler-not "\torr\t" } } */
+
-- 
2.39.3


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCHv3] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942]
  2024-01-05  7:43 [PATCHv2] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942] Andrew Pinski
@ 2024-01-12 12:26 ` Richard Sandiford
  2024-01-12 21:28   ` Andrew Pinski (QUIC)
  2024-01-22 13:24   ` Ping: " Richard Sandiford
  0 siblings, 2 replies; 5+ messages in thread
From: Richard Sandiford @ 2024-01-12 12:26 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: gcc-patches

Andrew Pinski <quic_apinski@quicinc.com> writes:
> Ccmp is not used if the result of the and/ior is used by both
> a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> here by using ccmp in this case.
> Two changes is required, first we need to allow the outer statement's
> result be used more than once.
> The second change is that during the expansion of the gimple, we need
> to try using ccmp. This is needed because we don't use expand the ssa
> name of the lhs but rather expand directly from the gimple.
>
> A small note on the ccmp_4.c testcase, we should be able to get slightly
> better than with this patch but it is one extra instruction compared to
> before.
>
> Diff from v1:
> * v2: Split out expand_gimple_assign_ssa so the we only need to handle
> promotion once. Add ccmp_5.c testcase which was suggested. Change comment
> on ccmp_candidate_p.

I meant more that we should split out the gassign handling in
expand_expr_real_1, since we're effectively making cfgexpand follow
it more closely.  What do you think about the attached version?
Tested on aarch64-linux-gnu and x86_64-linux-gnu.

OK for the expr/cfgexpand bits?

Thanks,
Richard

----

Ccmp is not used if the result of the and/ior is used by both
a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
here by using ccmp in this case.
Two changes is required, first we need to allow the outer statement's
result be used more than once.
The second change is that during the expansion of the gimple, we need
to try using ccmp. This is needed because we don't use expand the ssa
name of the lhs but rather expand directly from the gimple.

A small note on the ccmp_4.c testcase, we should be able to get slightly
better than with this patch but it is one extra instruction compared to
before.

	PR target/100942

gcc/ChangeLog:

	* ccmp.cc (ccmp_candidate_p): Add outer argument.
	Allow if the outer is true and the lhs is used more
	than once.
	(expand_ccmp_expr): Update call to ccmp_candidate_p.
	* expr.h (expand_expr_real_gassign): Declare.
	* expr.cc (expand_expr_real_gassign): New function, split out from...
	(expand_expr_real_1): ...here.
	* cfgexpand.cc (expand_gimple_stmt_1): Use expand_expr_real_gassign.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/ccmp_3.c: New test.
	* gcc.target/aarch64/ccmp_4.c: New test.
	* gcc.target/aarch64/ccmp_5.c: New test.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
Co-authored-by: Richard Sandiford <richard.sandiford@arm.com>
---
 gcc/ccmp.cc                               |  12 +--
 gcc/cfgexpand.cc                          |  31 ++-----
 gcc/expr.cc                               | 103 ++++++++++++----------
 gcc/expr.h                                |   3 +
 gcc/testsuite/gcc.target/aarch64/ccmp_3.c |  20 +++++
 gcc/testsuite/gcc.target/aarch64/ccmp_4.c |  35 ++++++++
 gcc/testsuite/gcc.target/aarch64/ccmp_5.c |  20 +++++
 7 files changed, 149 insertions(+), 75 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_5.c

diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
index 09d6b5595a4..7cb525addf4 100644
--- a/gcc/ccmp.cc
+++ b/gcc/ccmp.cc
@@ -90,9 +90,10 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
    If all checks OK in expand_ccmp_expr, it emits insns in prep_seq, then
    insns in gen_seq.  */
 
-/* Check whether G is a potential conditional compare candidate.  */
+/* Check whether G is a potential conditional compare candidate; OUTER is true if
+   G is the outer most AND/IOR.  */
 static bool
-ccmp_candidate_p (gimple *g)
+ccmp_candidate_p (gimple *g, bool outer = false)
 {
   tree lhs, op0, op1;
   gimple *gs0, *gs1;
@@ -109,8 +110,9 @@ ccmp_candidate_p (gimple *g)
   lhs = gimple_assign_lhs (g);
   op0 = gimple_assign_rhs1 (g);
   op1 = gimple_assign_rhs2 (g);
-  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
-      || !has_single_use (lhs))
+  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
+    return false;
+  if (!outer && !has_single_use (lhs))
     return false;
 
   bb = gimple_bb (g);
@@ -284,7 +286,7 @@ expand_ccmp_expr (gimple *g, machine_mode mode)
   rtx_insn *last;
   rtx tmp;
 
-  if (!ccmp_candidate_p (g))
+  if (!ccmp_candidate_p (g, true))
     return NULL_RTX;
 
   last = get_last_insn ();
diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index 1db22f0a1a3..381ed2c82d7 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -3971,37 +3971,18 @@ expand_gimple_stmt_1 (gimple *stmt)
 	  {
 	    rtx target, temp;
 	    bool nontemporal = gimple_assign_nontemporal_move_p (assign_stmt);
-	    struct separate_ops ops;
 	    bool promoted = false;
 
 	    target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
 	    if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
 	      promoted = true;
 
-	    ops.code = gimple_assign_rhs_code (assign_stmt);
-	    ops.type = TREE_TYPE (lhs);
-	    switch (get_gimple_rhs_class (ops.code))
-	      {
-		case GIMPLE_TERNARY_RHS:
-		  ops.op2 = gimple_assign_rhs3 (assign_stmt);
-		  /* Fallthru */
-		case GIMPLE_BINARY_RHS:
-		  ops.op1 = gimple_assign_rhs2 (assign_stmt);
-		  /* Fallthru */
-		case GIMPLE_UNARY_RHS:
-		  ops.op0 = gimple_assign_rhs1 (assign_stmt);
-		  break;
-		default:
-		  gcc_unreachable ();
-	      }
-	    ops.location = gimple_location (stmt);
-
-	    /* If we want to use a nontemporal store, force the value to
-	       register first.  If we store into a promoted register,
-	       don't directly expand to target.  */
+	   /* If we want to use a nontemporal store, force the value to
+	      register first.  If we store into a promoted register,
+	      don't directly expand to target.  */
 	    temp = nontemporal || promoted ? NULL_RTX : target;
-	    temp = expand_expr_real_2 (&ops, temp, GET_MODE (target),
-				       EXPAND_NORMAL);
+	    temp = expand_expr_real_gassign (assign_stmt, temp,
+					     GET_MODE (target), EXPAND_NORMAL);
 
 	    if (temp == target)
 	      ;
@@ -4013,7 +3994,7 @@ expand_gimple_stmt_1 (gimple *stmt)
 		if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
 		  {
 		    temp = convert_modes (GET_MODE (target),
-					  TYPE_MODE (ops.type),
+					  TYPE_MODE (TREE_TYPE (lhs)),
 					  temp, unsignedp);
 		    temp = convert_modes (GET_MODE (SUBREG_REG (target)),
 					  GET_MODE (target), temp, unsignedp);
diff --git a/gcc/expr.cc b/gcc/expr.cc
index dc816bc20fa..f9052a6ff5f 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -11035,6 +11035,62 @@ stmt_is_replaceable_p (gimple *stmt)
   return false;
 }
 
+/* A subroutine of expand_expr_real_1.  Expand gimple assignment G,
+   which is known to set an SSA_NAME result.  The other arguments are
+   as for expand_expr_real_1.  */
+
+rtx
+expand_expr_real_gassign (gassign *g, rtx target, machine_mode tmode,
+			  enum expand_modifier modifier, rtx *alt_rtl,
+			  bool inner_reference_p)
+{
+  separate_ops ops;
+  rtx r;
+  location_t saved_loc = curr_insn_location ();
+  auto loc = gimple_location (g);
+  if (loc != UNKNOWN_LOCATION)
+    set_curr_insn_location (loc);
+  tree lhs = gimple_assign_lhs (g);
+  ops.code = gimple_assign_rhs_code (g);
+  ops.type = TREE_TYPE (lhs);
+  switch (get_gimple_rhs_class (ops.code))
+    {
+    case GIMPLE_TERNARY_RHS:
+      ops.op2 = gimple_assign_rhs3 (g);
+      /* Fallthru */
+    case GIMPLE_BINARY_RHS:
+      ops.op1 = gimple_assign_rhs2 (g);
+
+      /* Try to expand conditonal compare.  */
+      if (targetm.gen_ccmp_first)
+	{
+	  gcc_checking_assert (targetm.gen_ccmp_next != NULL);
+	  r = expand_ccmp_expr (g, TYPE_MODE (ops.type));
+	  if (r)
+	    break;
+	}
+      /* Fallthru */
+    case GIMPLE_UNARY_RHS:
+      ops.op0 = gimple_assign_rhs1 (g);
+      ops.location = loc;
+      r = expand_expr_real_2 (&ops, target, tmode, modifier);
+      break;
+    case GIMPLE_SINGLE_RHS:
+      {
+	r = expand_expr_real (gimple_assign_rhs1 (g), target,
+			      tmode, modifier, alt_rtl,
+			      inner_reference_p);
+	break;
+      }
+    default:
+      gcc_unreachable ();
+    }
+  set_curr_insn_location (saved_loc);
+  if (REG_P (r) && !REG_EXPR (r))
+    set_reg_attrs_for_decl_rtl (lhs, r);
+  return r;
+}
+
 rtx
 expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 		    enum expand_modifier modifier, rtx *alt_rtl,
@@ -11198,51 +11254,8 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 	  && stmt_is_replaceable_p (SSA_NAME_DEF_STMT (exp)))
 	g = SSA_NAME_DEF_STMT (exp);
       if (g)
-	{
-	  rtx r;
-	  location_t saved_loc = curr_insn_location ();
-	  loc = gimple_location (g);
-	  if (loc != UNKNOWN_LOCATION)
-	    set_curr_insn_location (loc);
-	  ops.code = gimple_assign_rhs_code (g);
-          switch (get_gimple_rhs_class (ops.code))
-	    {
-	    case GIMPLE_TERNARY_RHS:
-	      ops.op2 = gimple_assign_rhs3 (g);
-	      /* Fallthru */
-	    case GIMPLE_BINARY_RHS:
-	      ops.op1 = gimple_assign_rhs2 (g);
-
-	      /* Try to expand conditonal compare.  */
-	      if (targetm.gen_ccmp_first)
-		{
-		  gcc_checking_assert (targetm.gen_ccmp_next != NULL);
-		  r = expand_ccmp_expr (g, mode);
-		  if (r)
-		    break;
-		}
-	      /* Fallthru */
-	    case GIMPLE_UNARY_RHS:
-	      ops.op0 = gimple_assign_rhs1 (g);
-	      ops.type = TREE_TYPE (gimple_assign_lhs (g));
-	      ops.location = loc;
-	      r = expand_expr_real_2 (&ops, target, tmode, modifier);
-	      break;
-	    case GIMPLE_SINGLE_RHS:
-	      {
-		r = expand_expr_real (gimple_assign_rhs1 (g), target,
-				      tmode, modifier, alt_rtl,
-				      inner_reference_p);
-		break;
-	      }
-	    default:
-	      gcc_unreachable ();
-	    }
-	  set_curr_insn_location (saved_loc);
-	  if (REG_P (r) && !REG_EXPR (r))
-	    set_reg_attrs_for_decl_rtl (SSA_NAME_VAR (exp), r);
-	  return r;
-	}
+	return expand_expr_real_gassign (as_a<gassign *> (g), target, tmode,
+					 modifier, alt_rtl, inner_reference_p);
 
       ssa_name = exp;
       decl_rtl = get_rtx_for_ssa_name (ssa_name);
diff --git a/gcc/expr.h b/gcc/expr.h
index f04f40da6ab..64956f63029 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -302,6 +302,9 @@ extern rtx expand_expr_real_1 (tree, rtx, machine_mode,
 			       enum expand_modifier, rtx *, bool);
 extern rtx expand_expr_real_2 (sepops, rtx, machine_mode,
 			       enum expand_modifier);
+extern rtx expand_expr_real_gassign (gassign *, rtx, machine_mode,
+				     enum expand_modifier modifier,
+				     rtx * = nullptr, bool = false);
 
 /* Generate code for computing expression EXP.
    An rtx for the computed value is returned.  The value is never null.
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
new file mode 100644
index 00000000000..a2b47fbee14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
@@ -0,0 +1,20 @@
+/* { dg-options "-O2" } */
+/* PR target/100942 */
+
+void foo(void);
+int f1(int a, int b)
+{
+  int c = a == 0 || b == 0;
+  if (c) foo();
+  return c;
+}
+
+/* We should get one cmp followed by ccmp and one cset. */
+/* { dg-final { scan-assembler "\tccmp\t" } } */
+/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
+/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
+/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
+/* { dg-final { scan-assembler-not "\torr\t" } } */
+/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
+/* { dg-final { scan-assembler-not "\tcbz\t" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
new file mode 100644
index 00000000000..bc0f57a7c59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
@@ -0,0 +1,35 @@
+/* { dg-options "-O2" } */
+/* PR target/100942 */
+
+void foo(void);
+int f1(int a, int b, int d)
+{
+  int c = a < 8 || b < 9;
+  int e = d < 11 || c;
+  if (e) foo();
+  return c;
+}
+
+/*
+  We really should get:
+        cmp     w0, 7
+        ccmp    w1, 8, 4, gt
+        cset    w0, le
+        ccmp    w2, 10, 4, gt
+        ble     .L11
+
+  But we currently get:
+        cmp     w0, 7
+        ccmp    w1, 8, 4, gt
+        cset    w0, le
+        cmp     w0, 0
+        ccmp    w2, 10, 4, eq
+        ble     .L11
+  The middle cmp is not needed.
+ */
+
+/* We should end up with only one cmp and 2 ccmp and 1 cset but currently we get 2 cmp
+   though. */
+/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
+/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_5.c b/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
new file mode 100644
index 00000000000..7e52ae4f322
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
@@ -0,0 +1,20 @@
+
+/* { dg-options "-O2" } */
+/* PR target/100942 */
+void f1(int a, int b, _Bool *x)
+{
+  x[0] = x[1] = a == 0 || b == 0;
+}
+
+void f2(int a, int b, int *x)
+{
+  x[0] = x[1] = a == 0 || b == 0;
+}
+
+
+/* Both functions should be using ccmp rather than 2 cset/orr.  */
+/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tcset\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tcmp\t" 2 } } */
+/* { dg-final { scan-assembler-not "\torr\t" } } */
+
-- 
2.25.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCHv3] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942]
  2024-01-12 12:26 ` [PATCHv3] " Richard Sandiford
@ 2024-01-12 21:28   ` Andrew Pinski (QUIC)
  2024-01-22 13:24   ` Ping: " Richard Sandiford
  1 sibling, 0 replies; 5+ messages in thread
From: Andrew Pinski (QUIC) @ 2024-01-12 21:28 UTC (permalink / raw)
  To: Richard Sandiford, Andrew Pinski (QUIC); +Cc: gcc-patches

> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Friday, January 12, 2024 4:26 AM
> To: Andrew Pinski (QUIC) <quic_apinski@quicinc.com>
> Cc: gcc-patches@gcc.gnu.org
> Subject: [PATCHv3] aarch64/expr: Use ccmp when the outer expression is
> used twice [PR100942]
> 
> Andrew Pinski <quic_apinski@quicinc.com> writes:
> > Ccmp is not used if the result of the and/ior is used by both
> > a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> > here by using ccmp in this case.
> > Two changes is required, first we need to allow the outer statement's
> > result be used more than once.
> > The second change is that during the expansion of the gimple, we need
> > to try using ccmp. This is needed because we don't use expand the ssa
> > name of the lhs but rather expand directly from the gimple.
> >
> > A small note on the ccmp_4.c testcase, we should be able to get slightly
> > better than with this patch but it is one extra instruction compared to
> > before.
> >
> > Diff from v1:
> > * v2: Split out expand_gimple_assign_ssa so the we only need to handle
> > promotion once. Add ccmp_5.c testcase which was suggested. Change
> comment
> > on ccmp_candidate_p.
> 
> I meant more that we should split out the gassign handling in
> expand_expr_real_1, since we're effectively making cfgexpand follow
> it more closely.  What do you think about the attached version?
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.
> 
> OK for the expr/cfgexpand bits?

Oh that is what I originally thought you wanted but I was not 100% sure so I just
moved it out in one place.  Anyways thanks for taking care of the change. 

Thanks,
Andrew Pinski

> 
> Thanks,
> Richard
> 
> ----
> 
> Ccmp is not used if the result of the and/ior is used by both
> a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> here by using ccmp in this case.
> Two changes is required, first we need to allow the outer statement's
> result be used more than once.
> The second change is that during the expansion of the gimple, we need
> to try using ccmp. This is needed because we don't use expand the ssa
> name of the lhs but rather expand directly from the gimple.
> 
> A small note on the ccmp_4.c testcase, we should be able to get slightly
> better than with this patch but it is one extra instruction compared to
> before.
> 
> 	PR target/100942
> 
> gcc/ChangeLog:
> 
> 	* ccmp.cc (ccmp_candidate_p): Add outer argument.
> 	Allow if the outer is true and the lhs is used more
> 	than once.
> 	(expand_ccmp_expr): Update call to ccmp_candidate_p.
> 	* expr.h (expand_expr_real_gassign): Declare.
> 	* expr.cc (expand_expr_real_gassign): New function, split out from...
> 	(expand_expr_real_1): ...here.
> 	* cfgexpand.cc (expand_gimple_stmt_1): Use
> expand_expr_real_gassign.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/aarch64/ccmp_3.c: New test.
> 	* gcc.target/aarch64/ccmp_4.c: New test.
> 	* gcc.target/aarch64/ccmp_5.c: New test.
> 
> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> Co-authored-by: Richard Sandiford <richard.sandiford@arm.com>
> ---
>  gcc/ccmp.cc                               |  12 +--
>  gcc/cfgexpand.cc                          |  31 ++-----
>  gcc/expr.cc                               | 103 ++++++++++++----------
>  gcc/expr.h                                |   3 +
>  gcc/testsuite/gcc.target/aarch64/ccmp_3.c |  20 +++++
>  gcc/testsuite/gcc.target/aarch64/ccmp_4.c |  35 ++++++++
>  gcc/testsuite/gcc.target/aarch64/ccmp_5.c |  20 +++++
>  7 files changed, 149 insertions(+), 75 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> 
> diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
> index 09d6b5595a4..7cb525addf4 100644
> --- a/gcc/ccmp.cc
> +++ b/gcc/ccmp.cc
> @@ -90,9 +90,10 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
>     If all checks OK in expand_ccmp_expr, it emits insns in prep_seq, then
>     insns in gen_seq.  */
> 
> -/* Check whether G is a potential conditional compare candidate.  */
> +/* Check whether G is a potential conditional compare candidate; OUTER is
> true if
> +   G is the outer most AND/IOR.  */
>  static bool
> -ccmp_candidate_p (gimple *g)
> +ccmp_candidate_p (gimple *g, bool outer = false)
>  {
>    tree lhs, op0, op1;
>    gimple *gs0, *gs1;
> @@ -109,8 +110,9 @@ ccmp_candidate_p (gimple *g)
>    lhs = gimple_assign_lhs (g);
>    op0 = gimple_assign_rhs1 (g);
>    op1 = gimple_assign_rhs2 (g);
> -  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
> -      || !has_single_use (lhs))
> +  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
> +    return false;
> +  if (!outer && !has_single_use (lhs))
>      return false;
> 
>    bb = gimple_bb (g);
> @@ -284,7 +286,7 @@ expand_ccmp_expr (gimple *g, machine_mode
> mode)
>    rtx_insn *last;
>    rtx tmp;
> 
> -  if (!ccmp_candidate_p (g))
> +  if (!ccmp_candidate_p (g, true))
>      return NULL_RTX;
> 
>    last = get_last_insn ();
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index 1db22f0a1a3..381ed2c82d7 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -3971,37 +3971,18 @@ expand_gimple_stmt_1 (gimple *stmt)
>  	  {
>  	    rtx target, temp;
>  	    bool nontemporal = gimple_assign_nontemporal_move_p
> (assign_stmt);
> -	    struct separate_ops ops;
>  	    bool promoted = false;
> 
>  	    target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
>  	    if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P
> (target))
>  	      promoted = true;
> 
> -	    ops.code = gimple_assign_rhs_code (assign_stmt);
> -	    ops.type = TREE_TYPE (lhs);
> -	    switch (get_gimple_rhs_class (ops.code))
> -	      {
> -		case GIMPLE_TERNARY_RHS:
> -		  ops.op2 = gimple_assign_rhs3 (assign_stmt);
> -		  /* Fallthru */
> -		case GIMPLE_BINARY_RHS:
> -		  ops.op1 = gimple_assign_rhs2 (assign_stmt);
> -		  /* Fallthru */
> -		case GIMPLE_UNARY_RHS:
> -		  ops.op0 = gimple_assign_rhs1 (assign_stmt);
> -		  break;
> -		default:
> -		  gcc_unreachable ();
> -	      }
> -	    ops.location = gimple_location (stmt);
> -
> -	    /* If we want to use a nontemporal store, force the value to
> -	       register first.  If we store into a promoted register,
> -	       don't directly expand to target.  */
> +	   /* If we want to use a nontemporal store, force the value to
> +	      register first.  If we store into a promoted register,
> +	      don't directly expand to target.  */
>  	    temp = nontemporal || promoted ? NULL_RTX : target;
> -	    temp = expand_expr_real_2 (&ops, temp, GET_MODE (target),
> -				       EXPAND_NORMAL);
> +	    temp = expand_expr_real_gassign (assign_stmt, temp,
> +					     GET_MODE (target),
> EXPAND_NORMAL);
> 
>  	    if (temp == target)
>  	      ;
> @@ -4013,7 +3994,7 @@ expand_gimple_stmt_1 (gimple *stmt)
>  		if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
>  		  {
>  		    temp = convert_modes (GET_MODE (target),
> -					  TYPE_MODE (ops.type),
> +					  TYPE_MODE (TREE_TYPE (lhs)),
>  					  temp, unsignedp);
>  		    temp = convert_modes (GET_MODE (SUBREG_REG (target)),
>  					  GET_MODE (target), temp,
> unsignedp);
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index dc816bc20fa..f9052a6ff5f 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -11035,6 +11035,62 @@ stmt_is_replaceable_p (gimple *stmt)
>    return false;
>  }
> 
> +/* A subroutine of expand_expr_real_1.  Expand gimple assignment G,
> +   which is known to set an SSA_NAME result.  The other arguments are
> +   as for expand_expr_real_1.  */
> +
> +rtx
> +expand_expr_real_gassign (gassign *g, rtx target, machine_mode tmode,
> +			  enum expand_modifier modifier, rtx *alt_rtl,
> +			  bool inner_reference_p)
> +{
> +  separate_ops ops;
> +  rtx r;
> +  location_t saved_loc = curr_insn_location ();
> +  auto loc = gimple_location (g);
> +  if (loc != UNKNOWN_LOCATION)
> +    set_curr_insn_location (loc);
> +  tree lhs = gimple_assign_lhs (g);
> +  ops.code = gimple_assign_rhs_code (g);
> +  ops.type = TREE_TYPE (lhs);
> +  switch (get_gimple_rhs_class (ops.code))
> +    {
> +    case GIMPLE_TERNARY_RHS:
> +      ops.op2 = gimple_assign_rhs3 (g);
> +      /* Fallthru */
> +    case GIMPLE_BINARY_RHS:
> +      ops.op1 = gimple_assign_rhs2 (g);
> +
> +      /* Try to expand conditonal compare.  */
> +      if (targetm.gen_ccmp_first)
> +	{
> +	  gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> +	  r = expand_ccmp_expr (g, TYPE_MODE (ops.type));
> +	  if (r)
> +	    break;
> +	}
> +      /* Fallthru */
> +    case GIMPLE_UNARY_RHS:
> +      ops.op0 = gimple_assign_rhs1 (g);
> +      ops.location = loc;
> +      r = expand_expr_real_2 (&ops, target, tmode, modifier);
> +      break;
> +    case GIMPLE_SINGLE_RHS:
> +      {
> +	r = expand_expr_real (gimple_assign_rhs1 (g), target,
> +			      tmode, modifier, alt_rtl,
> +			      inner_reference_p);
> +	break;
> +      }
> +    default:
> +      gcc_unreachable ();
> +    }
> +  set_curr_insn_location (saved_loc);
> +  if (REG_P (r) && !REG_EXPR (r))
> +    set_reg_attrs_for_decl_rtl (lhs, r);
> +  return r;
> +}
> +
>  rtx
>  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
>  		    enum expand_modifier modifier, rtx *alt_rtl,
> @@ -11198,51 +11254,8 @@ expand_expr_real_1 (tree exp, rtx target,
> machine_mode tmode,
>  	  && stmt_is_replaceable_p (SSA_NAME_DEF_STMT (exp)))
>  	g = SSA_NAME_DEF_STMT (exp);
>        if (g)
> -	{
> -	  rtx r;
> -	  location_t saved_loc = curr_insn_location ();
> -	  loc = gimple_location (g);
> -	  if (loc != UNKNOWN_LOCATION)
> -	    set_curr_insn_location (loc);
> -	  ops.code = gimple_assign_rhs_code (g);
> -          switch (get_gimple_rhs_class (ops.code))
> -	    {
> -	    case GIMPLE_TERNARY_RHS:
> -	      ops.op2 = gimple_assign_rhs3 (g);
> -	      /* Fallthru */
> -	    case GIMPLE_BINARY_RHS:
> -	      ops.op1 = gimple_assign_rhs2 (g);
> -
> -	      /* Try to expand conditonal compare.  */
> -	      if (targetm.gen_ccmp_first)
> -		{
> -		  gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> -		  r = expand_ccmp_expr (g, mode);
> -		  if (r)
> -		    break;
> -		}
> -	      /* Fallthru */
> -	    case GIMPLE_UNARY_RHS:
> -	      ops.op0 = gimple_assign_rhs1 (g);
> -	      ops.type = TREE_TYPE (gimple_assign_lhs (g));
> -	      ops.location = loc;
> -	      r = expand_expr_real_2 (&ops, target, tmode, modifier);
> -	      break;
> -	    case GIMPLE_SINGLE_RHS:
> -	      {
> -		r = expand_expr_real (gimple_assign_rhs1 (g), target,
> -				      tmode, modifier, alt_rtl,
> -				      inner_reference_p);
> -		break;
> -	      }
> -	    default:
> -	      gcc_unreachable ();
> -	    }
> -	  set_curr_insn_location (saved_loc);
> -	  if (REG_P (r) && !REG_EXPR (r))
> -	    set_reg_attrs_for_decl_rtl (SSA_NAME_VAR (exp), r);
> -	  return r;
> -	}
> +	return expand_expr_real_gassign (as_a<gassign *> (g), target, tmode,
> +					 modifier, alt_rtl, inner_reference_p);
> 
>        ssa_name = exp;
>        decl_rtl = get_rtx_for_ssa_name (ssa_name);
> diff --git a/gcc/expr.h b/gcc/expr.h
> index f04f40da6ab..64956f63029 100644
> --- a/gcc/expr.h
> +++ b/gcc/expr.h
> @@ -302,6 +302,9 @@ extern rtx expand_expr_real_1 (tree, rtx,
> machine_mode,
>  			       enum expand_modifier, rtx *, bool);
>  extern rtx expand_expr_real_2 (sepops, rtx, machine_mode,
>  			       enum expand_modifier);
> +extern rtx expand_expr_real_gassign (gassign *, rtx, machine_mode,
> +				     enum expand_modifier modifier,
> +				     rtx * = nullptr, bool = false);
> 
>  /* Generate code for computing expression EXP.
>     An rtx for the computed value is returned.  The value is never null.
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> new file mode 100644
> index 00000000000..a2b47fbee14
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b)
> +{
> +  int c = a == 0 || b == 0;
> +  if (c) foo();
> +  return c;
> +}
> +
> +/* We should get one cmp followed by ccmp and one cset. */
> +/* { dg-final { scan-assembler "\tccmp\t" } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
> +/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
> +/* { dg-final { scan-assembler-not "\torr\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbz\t" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> new file mode 100644
> index 00000000000..bc0f57a7c59
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> @@ -0,0 +1,35 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b, int d)
> +{
> +  int c = a < 8 || b < 9;
> +  int e = d < 11 || c;
> +  if (e) foo();
> +  return c;
> +}
> +
> +/*
> +  We really should get:
> +        cmp     w0, 7
> +        ccmp    w1, 8, 4, gt
> +        cset    w0, le
> +        ccmp    w2, 10, 4, gt
> +        ble     .L11
> +
> +  But we currently get:
> +        cmp     w0, 7
> +        ccmp    w1, 8, 4, gt
> +        cset    w0, le
> +        cmp     w0, 0
> +        ccmp    w2, 10, 4, eq
> +        ble     .L11
> +  The middle cmp is not needed.
> + */
> +
> +/* We should end up with only one cmp and 2 ccmp and 1 cset but currently
> we get 2 cmp
> +   though. */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> b/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> new file mode 100644
> index 00000000000..7e52ae4f322
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> @@ -0,0 +1,20 @@
> +
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +void f1(int a, int b, _Bool *x)
> +{
> +  x[0] = x[1] = a == 0 || b == 0;
> +}
> +
> +void f2(int a, int b, int *x)
> +{
> +  x[0] = x[1] = a == 0 || b == 0;
> +}
> +
> +
> +/* Both functions should be using ccmp rather than 2 cset/orr.  */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 2 } } */
> +/* { dg-final { scan-assembler-not "\torr\t" } } */
> +
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Ping: [PATCHv3] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942]
  2024-01-12 12:26 ` [PATCHv3] " Richard Sandiford
  2024-01-12 21:28   ` Andrew Pinski (QUIC)
@ 2024-01-22 13:24   ` Richard Sandiford
  2024-01-22 14:06     ` Richard Biener
  1 sibling, 1 reply; 5+ messages in thread
From: Richard Sandiford @ 2024-01-22 13:24 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: gcc-patches

Ping for the expr/cfgexpand bits

Richard Sandiford <richard.sandiford@arm.com> writes:
> Andrew Pinski <quic_apinski@quicinc.com> writes:
>> Ccmp is not used if the result of the and/ior is used by both
>> a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
>> here by using ccmp in this case.
>> Two changes is required, first we need to allow the outer statement's
>> result be used more than once.
>> The second change is that during the expansion of the gimple, we need
>> to try using ccmp. This is needed because we don't use expand the ssa
>> name of the lhs but rather expand directly from the gimple.
>>
>> A small note on the ccmp_4.c testcase, we should be able to get slightly
>> better than with this patch but it is one extra instruction compared to
>> before.
>>
>> Diff from v1:
>> * v2: Split out expand_gimple_assign_ssa so the we only need to handle
>> promotion once. Add ccmp_5.c testcase which was suggested. Change comment
>> on ccmp_candidate_p.
>
> I meant more that we should split out the gassign handling in
> expand_expr_real_1, since we're effectively making cfgexpand follow
> it more closely.  What do you think about the attached version?
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.
>
> OK for the expr/cfgexpand bits?
>
> Thanks,
> Richard
>
> ----
>
> Ccmp is not used if the result of the and/ior is used by both
> a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> here by using ccmp in this case.
> Two changes is required, first we need to allow the outer statement's
> result be used more than once.
> The second change is that during the expansion of the gimple, we need
> to try using ccmp. This is needed because we don't use expand the ssa
> name of the lhs but rather expand directly from the gimple.
>
> A small note on the ccmp_4.c testcase, we should be able to get slightly
> better than with this patch but it is one extra instruction compared to
> before.
>
> 	PR target/100942
>
> gcc/ChangeLog:
>
> 	* ccmp.cc (ccmp_candidate_p): Add outer argument.
> 	Allow if the outer is true and the lhs is used more
> 	than once.
> 	(expand_ccmp_expr): Update call to ccmp_candidate_p.
> 	* expr.h (expand_expr_real_gassign): Declare.
> 	* expr.cc (expand_expr_real_gassign): New function, split out from...
> 	(expand_expr_real_1): ...here.
> 	* cfgexpand.cc (expand_gimple_stmt_1): Use expand_expr_real_gassign.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/ccmp_3.c: New test.
> 	* gcc.target/aarch64/ccmp_4.c: New test.
> 	* gcc.target/aarch64/ccmp_5.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> Co-authored-by: Richard Sandiford <richard.sandiford@arm.com>
> ---
>  gcc/ccmp.cc                               |  12 +--
>  gcc/cfgexpand.cc                          |  31 ++-----
>  gcc/expr.cc                               | 103 ++++++++++++----------
>  gcc/expr.h                                |   3 +
>  gcc/testsuite/gcc.target/aarch64/ccmp_3.c |  20 +++++
>  gcc/testsuite/gcc.target/aarch64/ccmp_4.c |  35 ++++++++
>  gcc/testsuite/gcc.target/aarch64/ccmp_5.c |  20 +++++
>  7 files changed, 149 insertions(+), 75 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_5.c
>
> diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
> index 09d6b5595a4..7cb525addf4 100644
> --- a/gcc/ccmp.cc
> +++ b/gcc/ccmp.cc
> @@ -90,9 +90,10 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
>     If all checks OK in expand_ccmp_expr, it emits insns in prep_seq, then
>     insns in gen_seq.  */
>  
> -/* Check whether G is a potential conditional compare candidate.  */
> +/* Check whether G is a potential conditional compare candidate; OUTER is true if
> +   G is the outer most AND/IOR.  */
>  static bool
> -ccmp_candidate_p (gimple *g)
> +ccmp_candidate_p (gimple *g, bool outer = false)
>  {
>    tree lhs, op0, op1;
>    gimple *gs0, *gs1;
> @@ -109,8 +110,9 @@ ccmp_candidate_p (gimple *g)
>    lhs = gimple_assign_lhs (g);
>    op0 = gimple_assign_rhs1 (g);
>    op1 = gimple_assign_rhs2 (g);
> -  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
> -      || !has_single_use (lhs))
> +  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
> +    return false;
> +  if (!outer && !has_single_use (lhs))
>      return false;
>  
>    bb = gimple_bb (g);
> @@ -284,7 +286,7 @@ expand_ccmp_expr (gimple *g, machine_mode mode)
>    rtx_insn *last;
>    rtx tmp;
>  
> -  if (!ccmp_candidate_p (g))
> +  if (!ccmp_candidate_p (g, true))
>      return NULL_RTX;
>  
>    last = get_last_insn ();
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index 1db22f0a1a3..381ed2c82d7 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -3971,37 +3971,18 @@ expand_gimple_stmt_1 (gimple *stmt)
>  	  {
>  	    rtx target, temp;
>  	    bool nontemporal = gimple_assign_nontemporal_move_p (assign_stmt);
> -	    struct separate_ops ops;
>  	    bool promoted = false;
>  
>  	    target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
>  	    if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
>  	      promoted = true;
>  
> -	    ops.code = gimple_assign_rhs_code (assign_stmt);
> -	    ops.type = TREE_TYPE (lhs);
> -	    switch (get_gimple_rhs_class (ops.code))
> -	      {
> -		case GIMPLE_TERNARY_RHS:
> -		  ops.op2 = gimple_assign_rhs3 (assign_stmt);
> -		  /* Fallthru */
> -		case GIMPLE_BINARY_RHS:
> -		  ops.op1 = gimple_assign_rhs2 (assign_stmt);
> -		  /* Fallthru */
> -		case GIMPLE_UNARY_RHS:
> -		  ops.op0 = gimple_assign_rhs1 (assign_stmt);
> -		  break;
> -		default:
> -		  gcc_unreachable ();
> -	      }
> -	    ops.location = gimple_location (stmt);
> -
> -	    /* If we want to use a nontemporal store, force the value to
> -	       register first.  If we store into a promoted register,
> -	       don't directly expand to target.  */
> +	   /* If we want to use a nontemporal store, force the value to
> +	      register first.  If we store into a promoted register,
> +	      don't directly expand to target.  */
>  	    temp = nontemporal || promoted ? NULL_RTX : target;
> -	    temp = expand_expr_real_2 (&ops, temp, GET_MODE (target),
> -				       EXPAND_NORMAL);
> +	    temp = expand_expr_real_gassign (assign_stmt, temp,
> +					     GET_MODE (target), EXPAND_NORMAL);
>  
>  	    if (temp == target)
>  	      ;
> @@ -4013,7 +3994,7 @@ expand_gimple_stmt_1 (gimple *stmt)
>  		if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
>  		  {
>  		    temp = convert_modes (GET_MODE (target),
> -					  TYPE_MODE (ops.type),
> +					  TYPE_MODE (TREE_TYPE (lhs)),
>  					  temp, unsignedp);
>  		    temp = convert_modes (GET_MODE (SUBREG_REG (target)),
>  					  GET_MODE (target), temp, unsignedp);
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index dc816bc20fa..f9052a6ff5f 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -11035,6 +11035,62 @@ stmt_is_replaceable_p (gimple *stmt)
>    return false;
>  }
>  
> +/* A subroutine of expand_expr_real_1.  Expand gimple assignment G,
> +   which is known to set an SSA_NAME result.  The other arguments are
> +   as for expand_expr_real_1.  */
> +
> +rtx
> +expand_expr_real_gassign (gassign *g, rtx target, machine_mode tmode,
> +			  enum expand_modifier modifier, rtx *alt_rtl,
> +			  bool inner_reference_p)
> +{
> +  separate_ops ops;
> +  rtx r;
> +  location_t saved_loc = curr_insn_location ();
> +  auto loc = gimple_location (g);
> +  if (loc != UNKNOWN_LOCATION)
> +    set_curr_insn_location (loc);
> +  tree lhs = gimple_assign_lhs (g);
> +  ops.code = gimple_assign_rhs_code (g);
> +  ops.type = TREE_TYPE (lhs);
> +  switch (get_gimple_rhs_class (ops.code))
> +    {
> +    case GIMPLE_TERNARY_RHS:
> +      ops.op2 = gimple_assign_rhs3 (g);
> +      /* Fallthru */
> +    case GIMPLE_BINARY_RHS:
> +      ops.op1 = gimple_assign_rhs2 (g);
> +
> +      /* Try to expand conditonal compare.  */
> +      if (targetm.gen_ccmp_first)
> +	{
> +	  gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> +	  r = expand_ccmp_expr (g, TYPE_MODE (ops.type));
> +	  if (r)
> +	    break;
> +	}
> +      /* Fallthru */
> +    case GIMPLE_UNARY_RHS:
> +      ops.op0 = gimple_assign_rhs1 (g);
> +      ops.location = loc;
> +      r = expand_expr_real_2 (&ops, target, tmode, modifier);
> +      break;
> +    case GIMPLE_SINGLE_RHS:
> +      {
> +	r = expand_expr_real (gimple_assign_rhs1 (g), target,
> +			      tmode, modifier, alt_rtl,
> +			      inner_reference_p);
> +	break;
> +      }
> +    default:
> +      gcc_unreachable ();
> +    }
> +  set_curr_insn_location (saved_loc);
> +  if (REG_P (r) && !REG_EXPR (r))
> +    set_reg_attrs_for_decl_rtl (lhs, r);
> +  return r;
> +}
> +
>  rtx
>  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
>  		    enum expand_modifier modifier, rtx *alt_rtl,
> @@ -11198,51 +11254,8 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
>  	  && stmt_is_replaceable_p (SSA_NAME_DEF_STMT (exp)))
>  	g = SSA_NAME_DEF_STMT (exp);
>        if (g)
> -	{
> -	  rtx r;
> -	  location_t saved_loc = curr_insn_location ();
> -	  loc = gimple_location (g);
> -	  if (loc != UNKNOWN_LOCATION)
> -	    set_curr_insn_location (loc);
> -	  ops.code = gimple_assign_rhs_code (g);
> -          switch (get_gimple_rhs_class (ops.code))
> -	    {
> -	    case GIMPLE_TERNARY_RHS:
> -	      ops.op2 = gimple_assign_rhs3 (g);
> -	      /* Fallthru */
> -	    case GIMPLE_BINARY_RHS:
> -	      ops.op1 = gimple_assign_rhs2 (g);
> -
> -	      /* Try to expand conditonal compare.  */
> -	      if (targetm.gen_ccmp_first)
> -		{
> -		  gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> -		  r = expand_ccmp_expr (g, mode);
> -		  if (r)
> -		    break;
> -		}
> -	      /* Fallthru */
> -	    case GIMPLE_UNARY_RHS:
> -	      ops.op0 = gimple_assign_rhs1 (g);
> -	      ops.type = TREE_TYPE (gimple_assign_lhs (g));
> -	      ops.location = loc;
> -	      r = expand_expr_real_2 (&ops, target, tmode, modifier);
> -	      break;
> -	    case GIMPLE_SINGLE_RHS:
> -	      {
> -		r = expand_expr_real (gimple_assign_rhs1 (g), target,
> -				      tmode, modifier, alt_rtl,
> -				      inner_reference_p);
> -		break;
> -	      }
> -	    default:
> -	      gcc_unreachable ();
> -	    }
> -	  set_curr_insn_location (saved_loc);
> -	  if (REG_P (r) && !REG_EXPR (r))
> -	    set_reg_attrs_for_decl_rtl (SSA_NAME_VAR (exp), r);
> -	  return r;
> -	}
> +	return expand_expr_real_gassign (as_a<gassign *> (g), target, tmode,
> +					 modifier, alt_rtl, inner_reference_p);
>  
>        ssa_name = exp;
>        decl_rtl = get_rtx_for_ssa_name (ssa_name);
> diff --git a/gcc/expr.h b/gcc/expr.h
> index f04f40da6ab..64956f63029 100644
> --- a/gcc/expr.h
> +++ b/gcc/expr.h
> @@ -302,6 +302,9 @@ extern rtx expand_expr_real_1 (tree, rtx, machine_mode,
>  			       enum expand_modifier, rtx *, bool);
>  extern rtx expand_expr_real_2 (sepops, rtx, machine_mode,
>  			       enum expand_modifier);
> +extern rtx expand_expr_real_gassign (gassign *, rtx, machine_mode,
> +				     enum expand_modifier modifier,
> +				     rtx * = nullptr, bool = false);
>  
>  /* Generate code for computing expression EXP.
>     An rtx for the computed value is returned.  The value is never null.
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> new file mode 100644
> index 00000000000..a2b47fbee14
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b)
> +{
> +  int c = a == 0 || b == 0;
> +  if (c) foo();
> +  return c;
> +}
> +
> +/* We should get one cmp followed by ccmp and one cset. */
> +/* { dg-final { scan-assembler "\tccmp\t" } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
> +/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
> +/* { dg-final { scan-assembler-not "\torr\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbz\t" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> new file mode 100644
> index 00000000000..bc0f57a7c59
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> @@ -0,0 +1,35 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b, int d)
> +{
> +  int c = a < 8 || b < 9;
> +  int e = d < 11 || c;
> +  if (e) foo();
> +  return c;
> +}
> +
> +/*
> +  We really should get:
> +        cmp     w0, 7
> +        ccmp    w1, 8, 4, gt
> +        cset    w0, le
> +        ccmp    w2, 10, 4, gt
> +        ble     .L11
> +
> +  But we currently get:
> +        cmp     w0, 7
> +        ccmp    w1, 8, 4, gt
> +        cset    w0, le
> +        cmp     w0, 0
> +        ccmp    w2, 10, 4, eq
> +        ble     .L11
> +  The middle cmp is not needed.
> + */
> +
> +/* We should end up with only one cmp and 2 ccmp and 1 cset but currently we get 2 cmp
> +   though. */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_5.c b/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> new file mode 100644
> index 00000000000..7e52ae4f322
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> @@ -0,0 +1,20 @@
> +
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +void f1(int a, int b, _Bool *x)
> +{
> +  x[0] = x[1] = a == 0 || b == 0;
> +}
> +
> +void f2(int a, int b, int *x)
> +{
> +  x[0] = x[1] = a == 0 || b == 0;
> +}
> +
> +
> +/* Both functions should be using ccmp rather than 2 cset/orr.  */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 2 } } */
> +/* { dg-final { scan-assembler-not "\torr\t" } } */
> +

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Ping: [PATCHv3] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942]
  2024-01-22 13:24   ` Ping: " Richard Sandiford
@ 2024-01-22 14:06     ` Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2024-01-22 14:06 UTC (permalink / raw)
  To: Andrew Pinski, gcc-patches, richard.sandiford

On Mon, Jan 22, 2024 at 2:24 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Ping for the expr/cfgexpand bits
>
> Richard Sandiford <richard.sandiford@arm.com> writes:
> > Andrew Pinski <quic_apinski@quicinc.com> writes:
> >> Ccmp is not used if the result of the and/ior is used by both
> >> a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> >> here by using ccmp in this case.
> >> Two changes is required, first we need to allow the outer statement's
> >> result be used more than once.
> >> The second change is that during the expansion of the gimple, we need
> >> to try using ccmp. This is needed because we don't use expand the ssa
> >> name of the lhs but rather expand directly from the gimple.
> >>
> >> A small note on the ccmp_4.c testcase, we should be able to get slightly
> >> better than with this patch but it is one extra instruction compared to
> >> before.
> >>
> >> Diff from v1:
> >> * v2: Split out expand_gimple_assign_ssa so the we only need to handle
> >> promotion once. Add ccmp_5.c testcase which was suggested. Change comment
> >> on ccmp_candidate_p.
> >
> > I meant more that we should split out the gassign handling in
> > expand_expr_real_1, since we're effectively making cfgexpand follow
> > it more closely.  What do you think about the attached version?
> > Tested on aarch64-linux-gnu and x86_64-linux-gnu.
> >
> > OK for the expr/cfgexpand bits?

OK.

Richard.

> > Thanks,
> > Richard
> >
> > ----
> >
> > Ccmp is not used if the result of the and/ior is used by both
> > a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> > here by using ccmp in this case.
> > Two changes is required, first we need to allow the outer statement's
> > result be used more than once.
> > The second change is that during the expansion of the gimple, we need
> > to try using ccmp. This is needed because we don't use expand the ssa
> > name of the lhs but rather expand directly from the gimple.
> >
> > A small note on the ccmp_4.c testcase, we should be able to get slightly
> > better than with this patch but it is one extra instruction compared to
> > before.
> >
> >       PR target/100942
> >
> > gcc/ChangeLog:
> >
> >       * ccmp.cc (ccmp_candidate_p): Add outer argument.
> >       Allow if the outer is true and the lhs is used more
> >       than once.
> >       (expand_ccmp_expr): Update call to ccmp_candidate_p.
> >       * expr.h (expand_expr_real_gassign): Declare.
> >       * expr.cc (expand_expr_real_gassign): New function, split out from...
> >       (expand_expr_real_1): ...here.
> >       * cfgexpand.cc (expand_gimple_stmt_1): Use expand_expr_real_gassign.
> >
> > gcc/testsuite/ChangeLog:
> >
> >       * gcc.target/aarch64/ccmp_3.c: New test.
> >       * gcc.target/aarch64/ccmp_4.c: New test.
> >       * gcc.target/aarch64/ccmp_5.c: New test.
> >
> > Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> > Co-authored-by: Richard Sandiford <richard.sandiford@arm.com>
> > ---
> >  gcc/ccmp.cc                               |  12 +--
> >  gcc/cfgexpand.cc                          |  31 ++-----
> >  gcc/expr.cc                               | 103 ++++++++++++----------
> >  gcc/expr.h                                |   3 +
> >  gcc/testsuite/gcc.target/aarch64/ccmp_3.c |  20 +++++
> >  gcc/testsuite/gcc.target/aarch64/ccmp_4.c |  35 ++++++++
> >  gcc/testsuite/gcc.target/aarch64/ccmp_5.c |  20 +++++
> >  7 files changed, 149 insertions(+), 75 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> >
> > diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
> > index 09d6b5595a4..7cb525addf4 100644
> > --- a/gcc/ccmp.cc
> > +++ b/gcc/ccmp.cc
> > @@ -90,9 +90,10 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
> >     If all checks OK in expand_ccmp_expr, it emits insns in prep_seq, then
> >     insns in gen_seq.  */
> >
> > -/* Check whether G is a potential conditional compare candidate.  */
> > +/* Check whether G is a potential conditional compare candidate; OUTER is true if
> > +   G is the outer most AND/IOR.  */
> >  static bool
> > -ccmp_candidate_p (gimple *g)
> > +ccmp_candidate_p (gimple *g, bool outer = false)
> >  {
> >    tree lhs, op0, op1;
> >    gimple *gs0, *gs1;
> > @@ -109,8 +110,9 @@ ccmp_candidate_p (gimple *g)
> >    lhs = gimple_assign_lhs (g);
> >    op0 = gimple_assign_rhs1 (g);
> >    op1 = gimple_assign_rhs2 (g);
> > -  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
> > -      || !has_single_use (lhs))
> > +  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
> > +    return false;
> > +  if (!outer && !has_single_use (lhs))
> >      return false;
> >
> >    bb = gimple_bb (g);
> > @@ -284,7 +286,7 @@ expand_ccmp_expr (gimple *g, machine_mode mode)
> >    rtx_insn *last;
> >    rtx tmp;
> >
> > -  if (!ccmp_candidate_p (g))
> > +  if (!ccmp_candidate_p (g, true))
> >      return NULL_RTX;
> >
> >    last = get_last_insn ();
> > diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> > index 1db22f0a1a3..381ed2c82d7 100644
> > --- a/gcc/cfgexpand.cc
> > +++ b/gcc/cfgexpand.cc
> > @@ -3971,37 +3971,18 @@ expand_gimple_stmt_1 (gimple *stmt)
> >         {
> >           rtx target, temp;
> >           bool nontemporal = gimple_assign_nontemporal_move_p (assign_stmt);
> > -         struct separate_ops ops;
> >           bool promoted = false;
> >
> >           target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> >           if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
> >             promoted = true;
> >
> > -         ops.code = gimple_assign_rhs_code (assign_stmt);
> > -         ops.type = TREE_TYPE (lhs);
> > -         switch (get_gimple_rhs_class (ops.code))
> > -           {
> > -             case GIMPLE_TERNARY_RHS:
> > -               ops.op2 = gimple_assign_rhs3 (assign_stmt);
> > -               /* Fallthru */
> > -             case GIMPLE_BINARY_RHS:
> > -               ops.op1 = gimple_assign_rhs2 (assign_stmt);
> > -               /* Fallthru */
> > -             case GIMPLE_UNARY_RHS:
> > -               ops.op0 = gimple_assign_rhs1 (assign_stmt);
> > -               break;
> > -             default:
> > -               gcc_unreachable ();
> > -           }
> > -         ops.location = gimple_location (stmt);
> > -
> > -         /* If we want to use a nontemporal store, force the value to
> > -            register first.  If we store into a promoted register,
> > -            don't directly expand to target.  */
> > +        /* If we want to use a nontemporal store, force the value to
> > +           register first.  If we store into a promoted register,
> > +           don't directly expand to target.  */
> >           temp = nontemporal || promoted ? NULL_RTX : target;
> > -         temp = expand_expr_real_2 (&ops, temp, GET_MODE (target),
> > -                                    EXPAND_NORMAL);
> > +         temp = expand_expr_real_gassign (assign_stmt, temp,
> > +                                          GET_MODE (target), EXPAND_NORMAL);
> >
> >           if (temp == target)
> >             ;
> > @@ -4013,7 +3994,7 @@ expand_gimple_stmt_1 (gimple *stmt)
> >               if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
> >                 {
> >                   temp = convert_modes (GET_MODE (target),
> > -                                       TYPE_MODE (ops.type),
> > +                                       TYPE_MODE (TREE_TYPE (lhs)),
> >                                         temp, unsignedp);
> >                   temp = convert_modes (GET_MODE (SUBREG_REG (target)),
> >                                         GET_MODE (target), temp, unsignedp);
> > diff --git a/gcc/expr.cc b/gcc/expr.cc
> > index dc816bc20fa..f9052a6ff5f 100644
> > --- a/gcc/expr.cc
> > +++ b/gcc/expr.cc
> > @@ -11035,6 +11035,62 @@ stmt_is_replaceable_p (gimple *stmt)
> >    return false;
> >  }
> >
> > +/* A subroutine of expand_expr_real_1.  Expand gimple assignment G,
> > +   which is known to set an SSA_NAME result.  The other arguments are
> > +   as for expand_expr_real_1.  */
> > +
> > +rtx
> > +expand_expr_real_gassign (gassign *g, rtx target, machine_mode tmode,
> > +                       enum expand_modifier modifier, rtx *alt_rtl,
> > +                       bool inner_reference_p)
> > +{
> > +  separate_ops ops;
> > +  rtx r;
> > +  location_t saved_loc = curr_insn_location ();
> > +  auto loc = gimple_location (g);
> > +  if (loc != UNKNOWN_LOCATION)
> > +    set_curr_insn_location (loc);
> > +  tree lhs = gimple_assign_lhs (g);
> > +  ops.code = gimple_assign_rhs_code (g);
> > +  ops.type = TREE_TYPE (lhs);
> > +  switch (get_gimple_rhs_class (ops.code))
> > +    {
> > +    case GIMPLE_TERNARY_RHS:
> > +      ops.op2 = gimple_assign_rhs3 (g);
> > +      /* Fallthru */
> > +    case GIMPLE_BINARY_RHS:
> > +      ops.op1 = gimple_assign_rhs2 (g);
> > +
> > +      /* Try to expand conditonal compare.  */
> > +      if (targetm.gen_ccmp_first)
> > +     {
> > +       gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> > +       r = expand_ccmp_expr (g, TYPE_MODE (ops.type));
> > +       if (r)
> > +         break;
> > +     }
> > +      /* Fallthru */
> > +    case GIMPLE_UNARY_RHS:
> > +      ops.op0 = gimple_assign_rhs1 (g);
> > +      ops.location = loc;
> > +      r = expand_expr_real_2 (&ops, target, tmode, modifier);
> > +      break;
> > +    case GIMPLE_SINGLE_RHS:
> > +      {
> > +     r = expand_expr_real (gimple_assign_rhs1 (g), target,
> > +                           tmode, modifier, alt_rtl,
> > +                           inner_reference_p);
> > +     break;
> > +      }
> > +    default:
> > +      gcc_unreachable ();
> > +    }
> > +  set_curr_insn_location (saved_loc);
> > +  if (REG_P (r) && !REG_EXPR (r))
> > +    set_reg_attrs_for_decl_rtl (lhs, r);
> > +  return r;
> > +}
> > +
> >  rtx
> >  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
> >                   enum expand_modifier modifier, rtx *alt_rtl,
> > @@ -11198,51 +11254,8 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
> >         && stmt_is_replaceable_p (SSA_NAME_DEF_STMT (exp)))
> >       g = SSA_NAME_DEF_STMT (exp);
> >        if (g)
> > -     {
> > -       rtx r;
> > -       location_t saved_loc = curr_insn_location ();
> > -       loc = gimple_location (g);
> > -       if (loc != UNKNOWN_LOCATION)
> > -         set_curr_insn_location (loc);
> > -       ops.code = gimple_assign_rhs_code (g);
> > -          switch (get_gimple_rhs_class (ops.code))
> > -         {
> > -         case GIMPLE_TERNARY_RHS:
> > -           ops.op2 = gimple_assign_rhs3 (g);
> > -           /* Fallthru */
> > -         case GIMPLE_BINARY_RHS:
> > -           ops.op1 = gimple_assign_rhs2 (g);
> > -
> > -           /* Try to expand conditonal compare.  */
> > -           if (targetm.gen_ccmp_first)
> > -             {
> > -               gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> > -               r = expand_ccmp_expr (g, mode);
> > -               if (r)
> > -                 break;
> > -             }
> > -           /* Fallthru */
> > -         case GIMPLE_UNARY_RHS:
> > -           ops.op0 = gimple_assign_rhs1 (g);
> > -           ops.type = TREE_TYPE (gimple_assign_lhs (g));
> > -           ops.location = loc;
> > -           r = expand_expr_real_2 (&ops, target, tmode, modifier);
> > -           break;
> > -         case GIMPLE_SINGLE_RHS:
> > -           {
> > -             r = expand_expr_real (gimple_assign_rhs1 (g), target,
> > -                                   tmode, modifier, alt_rtl,
> > -                                   inner_reference_p);
> > -             break;
> > -           }
> > -         default:
> > -           gcc_unreachable ();
> > -         }
> > -       set_curr_insn_location (saved_loc);
> > -       if (REG_P (r) && !REG_EXPR (r))
> > -         set_reg_attrs_for_decl_rtl (SSA_NAME_VAR (exp), r);
> > -       return r;
> > -     }
> > +     return expand_expr_real_gassign (as_a<gassign *> (g), target, tmode,
> > +                                      modifier, alt_rtl, inner_reference_p);
> >
> >        ssa_name = exp;
> >        decl_rtl = get_rtx_for_ssa_name (ssa_name);
> > diff --git a/gcc/expr.h b/gcc/expr.h
> > index f04f40da6ab..64956f63029 100644
> > --- a/gcc/expr.h
> > +++ b/gcc/expr.h
> > @@ -302,6 +302,9 @@ extern rtx expand_expr_real_1 (tree, rtx, machine_mode,
> >                              enum expand_modifier, rtx *, bool);
> >  extern rtx expand_expr_real_2 (sepops, rtx, machine_mode,
> >                              enum expand_modifier);
> > +extern rtx expand_expr_real_gassign (gassign *, rtx, machine_mode,
> > +                                  enum expand_modifier modifier,
> > +                                  rtx * = nullptr, bool = false);
> >
> >  /* Generate code for computing expression EXP.
> >     An rtx for the computed value is returned.  The value is never null.
> > diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> > new file mode 100644
> > index 00000000000..a2b47fbee14
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> > @@ -0,0 +1,20 @@
> > +/* { dg-options "-O2" } */
> > +/* PR target/100942 */
> > +
> > +void foo(void);
> > +int f1(int a, int b)
> > +{
> > +  int c = a == 0 || b == 0;
> > +  if (c) foo();
> > +  return c;
> > +}
> > +
> > +/* We should get one cmp followed by ccmp and one cset. */
> > +/* { dg-final { scan-assembler "\tccmp\t" } } */
> > +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> > +/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
> > +/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
> > +/* { dg-final { scan-assembler-not "\torr\t" } } */
> > +/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
> > +/* { dg-final { scan-assembler-not "\tcbz\t" } } */
> > +
> > diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> > new file mode 100644
> > index 00000000000..bc0f57a7c59
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> > @@ -0,0 +1,35 @@
> > +/* { dg-options "-O2" } */
> > +/* PR target/100942 */
> > +
> > +void foo(void);
> > +int f1(int a, int b, int d)
> > +{
> > +  int c = a < 8 || b < 9;
> > +  int e = d < 11 || c;
> > +  if (e) foo();
> > +  return c;
> > +}
> > +
> > +/*
> > +  We really should get:
> > +        cmp     w0, 7
> > +        ccmp    w1, 8, 4, gt
> > +        cset    w0, le
> > +        ccmp    w2, 10, 4, gt
> > +        ble     .L11
> > +
> > +  But we currently get:
> > +        cmp     w0, 7
> > +        ccmp    w1, 8, 4, gt
> > +        cset    w0, le
> > +        cmp     w0, 0
> > +        ccmp    w2, 10, 4, eq
> > +        ble     .L11
> > +  The middle cmp is not needed.
> > + */
> > +
> > +/* We should end up with only one cmp and 2 ccmp and 1 cset but currently we get 2 cmp
> > +   though. */
> > +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> > +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> > +/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_5.c b/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> > new file mode 100644
> > index 00000000000..7e52ae4f322
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> > @@ -0,0 +1,20 @@
> > +
> > +/* { dg-options "-O2" } */
> > +/* PR target/100942 */
> > +void f1(int a, int b, _Bool *x)
> > +{
> > +  x[0] = x[1] = a == 0 || b == 0;
> > +}
> > +
> > +void f2(int a, int b, int *x)
> > +{
> > +  x[0] = x[1] = a == 0 || b == 0;
> > +}
> > +
> > +
> > +/* Both functions should be using ccmp rather than 2 cset/orr.  */
> > +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> > +/* { dg-final { scan-assembler-times "\tcset\t" 2 } } */
> > +/* { dg-final { scan-assembler-times "\tcmp\t" 2 } } */
> > +/* { dg-final { scan-assembler-not "\torr\t" } } */
> > +

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-01-22 14:07 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-05  7:43 [PATCHv2] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942] Andrew Pinski
2024-01-12 12:26 ` [PATCHv3] " Richard Sandiford
2024-01-12 21:28   ` Andrew Pinski (QUIC)
2024-01-22 13:24   ` Ping: " Richard Sandiford
2024-01-22 14:06     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).