public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: gcc-patches@gcc.gnu.org
Cc: richard.earnshaw@arm.com, richard.sandiford@arm.com,
	marcus.shawcroft@arm.com, kyrylo.tkachov@arm.com,
	Wilco.Dijkstra@arm.com
Subject: [PATCH v2 8/9] aarch64: Implement TImode comparisons
Date: Fri, 20 Mar 2020 19:42:30 -0700	[thread overview]
Message-ID: <20200321024231.13778-9-richard.henderson@linaro.org> (raw)
In-Reply-To: <20200321024231.13778-1-richard.henderson@linaro.org>

Use ccmp to perform all TImode comparisons branchless.

	* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Expand all of
	the comparisons for TImode, not just NE.
	* config/aarch64/aarch64.md (cbranchti4, cstoreti4): New.
---
 gcc/config/aarch64/aarch64.c  | 130 ++++++++++++++++++++++++++++++----
 gcc/config/aarch64/aarch64.md |  28 ++++++++
 2 files changed, 144 insertions(+), 14 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9e7c26a8df2..6ae0ea388ce 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2333,32 +2333,134 @@ rtx
 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 {
   machine_mode cmp_mode = GET_MODE (x);
-  machine_mode cc_mode;
   rtx cc_reg;
 
   if (cmp_mode == TImode)
     {
-      gcc_assert (code == NE);
-
-      cc_mode = CCmode;
-      cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
-
       rtx x_lo = operand_subword (x, 0, 0, TImode);
-      rtx y_lo = operand_subword (y, 0, 0, TImode);
-      emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
-
       rtx x_hi = operand_subword (x, 1, 0, TImode);
-      rtx y_hi = operand_subword (y, 1, 0, TImode);
-      emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi,
-			       gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
-			       GEN_INT (AARCH64_EQ)));
+      struct expand_operand ops[2];
+      rtx y_lo, y_hi, tmp;
+
+      if (CONST_INT_P (y))
+	{
+	  HOST_WIDE_INT y_int = INTVAL (y);
+
+	  y_lo = y;
+	  switch (code)
+	    {
+	    case EQ:
+	    case NE:
+	      /* For equality, IOR the two halves together.  If this gets
+		 used for a branch, we expect this to fold to cbz/cbnz;
+		 otherwise it's no larger than cmp+ccmp below.  Beware of
+		 the compare-and-swap post-reload split and use cmp+ccmp.  */
+	      if (y_int == 0 && can_create_pseudo_p ())
+		{
+		  tmp = gen_reg_rtx (DImode);
+		  emit_insn (gen_iordi3 (tmp, x_hi, x_lo));
+		  emit_insn (gen_cmpdi (tmp, const0_rtx));
+		  cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+		  goto done;
+		}
+		break;
+
+	    case LE:
+	    case GT:
+	      /* Add 1 to Y to convert to LT/GE, which avoids the swap and
+		 keeps the constant operand.  The cstoreti and cbranchti
+		 operand predicates require aarch64_plus_operand, which
+		 means this increment cannot overflow.  */
+	      y_lo = gen_int_mode (++y_int, DImode);
+	      code = (code == LE ? LT : GE);
+	      /* fall through */
+
+	    case LT:
+	    case GE:
+	      /* Check only the sign bit using tst, or fold to tbz/tbnz.  */
+	      if (y_int == 0)
+		{
+		  cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+		  tmp = gen_rtx_AND (DImode, x_hi, GEN_INT (INT64_MIN));
+		  tmp = gen_rtx_COMPARE (CC_NZmode, tmp, const0_rtx);
+		  emit_set_insn (cc_reg, tmp);
+		  code = (code == LT ? NE : EQ);
+		  goto done;
+		}
+	      break;
+
+	    default:
+	      break;
+	    }
+	  y_hi = (y_int < 0 ? constm1_rtx : const0_rtx);
+	}
+      else
+	{
+	  y_lo = operand_subword (y, 0, 0, TImode);
+	  y_hi = operand_subword (y, 1, 0, TImode);
+	}
+
+      switch (code)
+	{
+	case LEU:
+	case GTU:
+	case LE:
+	case GT:
+	  std::swap (x_lo, y_lo);
+	  std::swap (x_hi, y_hi);
+	  code = swap_condition (code);
+	  break;
+
+	default:
+	  break;
+	}
+
+      /* Emit cmpdi, forcing operands into registers as required. */
+      create_input_operand (&ops[0], x_lo, DImode);
+      create_input_operand (&ops[1], y_lo, DImode);
+      expand_insn (CODE_FOR_cmpdi, 2, ops);
+
+      cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+      switch (code)
+	{
+	case EQ:
+	case NE:
+	  /* For EQ, (x_lo == y_lo) && (x_hi == y_hi).  */
+	  emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi,
+				   gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+				   GEN_INT (AARCH64_EQ)));
+	  break;
+
+	case LTU:
+	case GEU:
+	  /* For LTU, (x - y), as double-word arithmetic.  */
+	  create_input_operand (&ops[0], x_hi, DImode);
+	  create_input_operand (&ops[1], y_hi, DImode);
+	  expand_insn (CODE_FOR_ucmpdi3_carryinC, 2, ops);
+	  /* The result is entirely within the C bit. */
+	  break;
+
+	case LT:
+	case GE:
+	  /* For LT, (x - y), as double-word arithmetic.  */
+	  create_input_operand (&ops[0], x_hi, DImode);
+	  create_input_operand (&ops[1], y_hi, DImode);
+	  expand_insn (CODE_FOR_scmpdi3_carryinC, 2, ops);
+	  /* The result is within the N and V bits -- normal LT/GE. */
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
     }
   else
     {
-      cc_mode = SELECT_CC_MODE (code, x, y);
+      machine_mode cc_mode = SELECT_CC_MODE (code, x, y);
       cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
       emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
     }
+
+ done:
   return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
 }
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 0b44c814bae..284a8038e28 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -471,6 +471,20 @@
   operands[2] = const0_rtx;
 })
 
+(define_expand "cbranchti4"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+			    [(match_operand:TI 1 "register_operand")
+			     (match_operand:TI 2 "aarch64_plus_operand")])
+			   (label_ref (match_operand 3 "" ""))
+			   (pc)))]
+  ""
+{
+  operands[0] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+					 operands[2]);
+  operands[1] = XEXP (operands[0], 0);
+  operands[2] = const0_rtx;
+})
+
 (define_expand "cbranch<mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
 			    [(match_operand:GPF 1 "register_operand")
@@ -4055,6 +4069,20 @@
   operands[3] = const0_rtx;
 })
 
+(define_expand "cstoreti4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "aarch64_comparison_operator"
+	 [(match_operand:TI 2 "register_operand")
+	  (match_operand:TI 3 "aarch64_plus_operand")]))]
+  ""
+{
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				         operands[3]);
+  PUT_MODE (operands[1], SImode);
+  operands[2] = XEXP (operands[1], 0);
+  operands[3] = const0_rtx;
+})
+
 (define_expand "cstorecc4"
   [(set (match_operand:SI 0 "register_operand")
        (match_operator 1 "aarch64_comparison_operator_mode"
-- 
2.20.1


  parent reply	other threads:[~2020-03-21  2:42 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-21  2:42 [PATCH v2 0/9] " Richard Henderson
2020-03-21  2:42 ` [PATCH v2 1/9] aarch64: Accept 0 as first argument to compares Richard Henderson
2020-03-31 16:55   ` Richard Sandiford
2020-03-31 17:15     ` Richard Henderson
2020-03-21  2:42 ` [PATCH v2 2/9] aarch64: Accept zeros in add<GPI>3_carryin Richard Henderson
2020-03-21  2:42 ` [PATCH v2 3/9] aarch64: Add <su>cmp_*_carryinC patterns Richard Henderson
2020-03-22 19:30   ` Segher Boessenkool
2020-03-22 20:40     ` Richard Henderson
2020-03-31 18:34   ` Richard Sandiford
2020-03-31 22:44     ` Richard Henderson
2020-04-01 12:37       ` Segher Boessenkool
2020-04-01 16:28       ` Richard Sandiford
2020-04-01 17:14         ` Richard Henderson
2020-03-21  2:42 ` [PATCH v2 4/9] aarch64: Add <su>cmp<GPI>_carryinC_m2 Richard Henderson
2020-03-21  2:42 ` [PATCH v2 5/9] aarch64: Provide expander for sub<GPI>3_compare1 Richard Henderson
2020-03-21  2:42 ` [PATCH v2 6/9] aarch64: Introduce aarch64_expand_addsubti Richard Henderson
2020-03-21  2:42 ` [PATCH v2 7/9] aarch64: Adjust result of aarch64_gen_compare_reg Richard Henderson
2020-03-22 21:55   ` Segher Boessenkool
2020-03-22 22:21     ` Richard Henderson
2020-03-21  2:42 ` Richard Henderson [this message]
2020-03-21  2:42 ` [PATCH v2 9/9] aarch64: Implement absti2 Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200321024231.13778-9-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=Wilco.Dijkstra@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=kyrylo.tkachov@arm.com \
    --cc=marcus.shawcroft@arm.com \
    --cc=richard.earnshaw@arm.com \
    --cc=richard.sandiford@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).