public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: gcc-patches@gcc.gnu.org
Cc: ramana.radhakrishnan@arm.com,	agraf@suse.de,
	marcus.shawcroft@arm.com,	james.greenhalgh@arm.com,
	richard.earnshaw@arm.com
Subject: [PATCH, AArch64 v2 02/11] aarch64: Improve cas generation
Date: Tue, 02 Oct 2018 16:19:00 -0000	[thread overview]
Message-ID: <20181002161915.18843-3-richard.henderson@linaro.org> (raw)
In-Reply-To: <20181002161915.18843-1-richard.henderson@linaro.org>

Do not zero-extend the input to the cas for subword operations;
instead, use the appropriate zero-extending compare insns.
Correct the predicates and constraints for immediate expected operand.

	* config/aarch64/aarch64.c (aarch64_gen_compare_reg_maybe_ze): New.
	(aarch64_split_compare_and_swap): Use it.
	(aarch64_expand_compare_and_swap): Likewise.  Remove convert_modes;
	test oldval against the proper predicate.
	* config/aarch64/atomics.md (@atomic_compare_and_swap<ALLI>):
	Use nonmemory_operand for expected.
	(cas_short_expected_pred): New.
	(@aarch64_compare_and_swap<SHORT>): Use it; use "rn" not "rI" to match.
	(@aarch64_compare_and_swap<GPI>): Use "rn" not "rI" for expected.
	* config/aarch64/predicates.md (aarch64_plushi_immediate): New.
	(aarch64_plushi_operand): New.
---
 gcc/config/aarch64/aarch64.c     | 90 +++++++++++++++++++-------------
 gcc/config/aarch64/atomics.md    | 19 ++++---
 gcc/config/aarch64/predicates.md | 12 +++++
 3 files changed, 76 insertions(+), 45 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fbec54fe5da..0e2b85de1e3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1613,6 +1613,33 @@ aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
   return cc_reg;
 }
 
+/* Similarly, but maybe zero-extend Y if Y_MODE < SImode.  */
+
+static rtx
+aarch64_gen_compare_reg_maybe_ze(RTX_CODE code, rtx x, rtx y,
+                                 machine_mode y_mode)
+{
+  if (y_mode == E_QImode || y_mode == E_HImode)
+    {
+      if (CONST_INT_P (y))
+	y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
+      else
+	{
+	  rtx t, cc_reg;
+	  machine_mode cc_mode;
+
+	  t = gen_rtx_ZERO_EXTEND (SImode, y);
+	  t = gen_rtx_COMPARE (CC_SWPmode, t, x);
+	  cc_mode = CC_SWPmode;
+	  cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+	  emit_set_insn (cc_reg, t);
+	  return cc_reg;
+	}
+    }
+
+  return aarch64_gen_compare_reg (code, x, y);
+}
+
 /* Build the SYMBOL_REF for __tls_get_addr.  */
 
 static GTY(()) rtx tls_get_addr_libfunc;
@@ -14138,8 +14165,8 @@ aarch64_emit_unlikely_jump (rtx insn)
 void
 aarch64_expand_compare_and_swap (rtx operands[])
 {
-  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
-  machine_mode mode, cmp_mode;
+  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x, cc_reg;
+  machine_mode mode, r_mode;
 
   bval = operands[0];
   rval = operands[1];
@@ -14150,36 +14177,19 @@ aarch64_expand_compare_and_swap (rtx operands[])
   mod_s = operands[6];
   mod_f = operands[7];
   mode = GET_MODE (mem);
-  cmp_mode = mode;
 
   /* Normally the succ memory model must be stronger than fail, but in the
      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
-
   if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
 
-  switch (mode)
+  r_mode = mode;
+  if (mode == QImode || mode == HImode)
     {
-    case E_QImode:
-    case E_HImode:
-      /* For short modes, we're going to perform the comparison in SImode,
-	 so do the zero-extension now.  */
-      cmp_mode = SImode;
-      rval = gen_reg_rtx (SImode);
-      oldval = convert_modes (SImode, mode, oldval, true);
-      /* Fall through.  */
-
-    case E_SImode:
-    case E_DImode:
-      /* Force the value into a register if needed.  */
-      if (!aarch64_plus_operand (oldval, mode))
-	oldval = force_reg (cmp_mode, oldval);
-      break;
-
-    default:
-      gcc_unreachable ();
+      r_mode = SImode;
+      rval = gen_reg_rtx (r_mode);
     }
 
   if (TARGET_LSE)
@@ -14187,26 +14197,32 @@ aarch64_expand_compare_and_swap (rtx operands[])
       /* The CAS insn requires oldval and rval overlap, but we need to
 	 have a copy of oldval saved across the operation to tell if
 	 the operation is successful.  */
-      if (mode == QImode || mode == HImode)
-	rval = copy_to_mode_reg (SImode, gen_lowpart (SImode, oldval));
-      else if (reg_overlap_mentioned_p (rval, oldval))
-        rval = copy_to_mode_reg (mode, oldval);
-      else
-	emit_move_insn (rval, oldval);
+      if (reg_overlap_mentioned_p (rval, oldval))
+        rval = copy_to_mode_reg (r_mode, oldval);
+      else 
+	emit_move_insn (rval, gen_lowpart (r_mode, oldval));
+
       emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem,
 						   newval, mod_s));
-      aarch64_gen_compare_reg (EQ, rval, oldval);
+      cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
     }
   else
-    emit_insn (gen_aarch64_compare_and_swap (mode, rval, mem, oldval, newval,
-					     is_weak, mod_s, mod_f));
+    {
+      /* The oldval predicate varies by mode.  Test it and force to reg.  */
+      insn_code code = code_for_aarch64_compare_and_swap (mode);
+      if (!insn_data[code].operand[2].predicate (oldval, mode))
+	oldval = force_reg (mode, oldval);
 
-  if (mode == QImode || mode == HImode)
+      emit_insn (GEN_FCN (code) (rval, mem, oldval, newval,
+				 is_weak, mod_s, mod_f));
+      cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+    }
+
+  if (r_mode != mode)
     rval = gen_lowpart (mode, rval);
   emit_move_insn (operands[1], rval);
 
-  x = gen_rtx_REG (CCmode, CC_REGNUM);
-  x = gen_rtx_EQ (SImode, x, const0_rtx);
+  x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
   emit_insn (gen_rtx_SET (bval, x));
 }
 
@@ -14321,10 +14337,10 @@ aarch64_split_compare_and_swap (rtx operands[])
     }
   else
     {
-      cond = aarch64_gen_compare_reg (NE, rval, oldval);
+      cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
-				 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
+				gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
       aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
     }
 
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 22660850af1..e44301b40c7 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -24,8 +24,8 @@
   [(match_operand:SI 0 "register_operand" "")			;; bool out
    (match_operand:ALLI 1 "register_operand" "")			;; val out
    (match_operand:ALLI 2 "aarch64_sync_memory_operand" "")	;; memory
-   (match_operand:ALLI 3 "general_operand" "")			;; expected
-   (match_operand:ALLI 4 "aarch64_reg_or_zero" "")			;; desired
+   (match_operand:ALLI 3 "nonmemory_operand" "")		;; expected
+   (match_operand:ALLI 4 "aarch64_reg_or_zero" "")		;; desired
    (match_operand:SI 5 "const_int_operand")			;; is_weak
    (match_operand:SI 6 "const_int_operand")			;; mod_s
    (match_operand:SI 7 "const_int_operand")]			;; mod_f
@@ -36,19 +36,22 @@
   }
 )
 
+(define_mode_attr cas_short_expected_pred
+  [(QI "aarch64_reg_or_imm") (HI "aarch64_plushi_operand")])
+
 (define_insn_and_split "@aarch64_compare_and_swap<mode>"
   [(set (reg:CC CC_REGNUM)					;; bool out
     (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
-   (set (match_operand:SI 0 "register_operand" "=&r")	   ;; val out
+   (set (match_operand:SI 0 "register_operand" "=&r")		;; val out
     (zero_extend:SI
       (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
    (set (match_dup 1)
     (unspec_volatile:SHORT
-      [(match_operand:SI 2 "aarch64_plus_operand" "rI")	;; expected
+      [(match_operand:SHORT 2 "<cas_short_expected_pred>" "rn")	;; expected
        (match_operand:SHORT 3 "aarch64_reg_or_zero" "rZ")	;; desired
-       (match_operand:SI 4 "const_int_operand")		;; is_weak
-       (match_operand:SI 5 "const_int_operand")		;; mod_s
-       (match_operand:SI 6 "const_int_operand")]	;; mod_f
+       (match_operand:SI 4 "const_int_operand")			;; is_weak
+       (match_operand:SI 5 "const_int_operand")			;; mod_s
+       (match_operand:SI 6 "const_int_operand")]		;; mod_f
       UNSPECV_ATOMIC_CMPSW))
    (clobber (match_scratch:SI 7 "=&r"))]
   ""
@@ -68,7 +71,7 @@
     (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q"))   ;; memory
    (set (match_dup 1)
     (unspec_volatile:GPI
-      [(match_operand:GPI 2 "aarch64_plus_operand" "rI")	;; expect
+      [(match_operand:GPI 2 "aarch64_plus_operand" "rn")	;; expect
        (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")		;; desired
        (match_operand:SI 4 "const_int_operand")			;; is_weak
        (match_operand:SI 5 "const_int_operand")			;; mod_s
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 5b08b03c586..4c75eff3e5a 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -114,6 +114,18 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_plus_immediate")))
 
+(define_predicate "aarch64_plushi_immediate"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT val = INTVAL (op);
+  /* The HImode value must be zero-extendable to an SImode plus_operand.  */
+  return ((val & 0xfff) == val || sext_hwi (val & 0xf000, 16) == val);
+})
+
+(define_predicate "aarch64_plushi_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "aarch64_plushi_immediate")))
+
 (define_predicate "aarch64_pluslong_immediate"
   (and (match_code "const_int")
        (match_test "(INTVAL (op) < 0xffffff && INTVAL (op) > -0xffffff)")))
-- 
2.17.1

  parent reply	other threads:[~2018-10-02 16:19 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-02 16:19 [PATCH, AArch64 v2 00/11] LSE atomics out-of-line Richard Henderson
2018-10-02 16:19 ` [PATCH, AArch64 v2 11/11] Enable -matomic-ool by default Richard Henderson
2018-10-02 16:19 ` Richard Henderson [this message]
2018-10-30 20:37   ` [PATCH, AArch64 v2 02/11] aarch64: Improve cas generation James Greenhalgh
2018-10-02 16:19 ` [PATCH, AArch64 v2 07/11] aarch64: Add out-of-line functions for LSE atomics Richard Henderson
2018-10-02 16:19 ` [PATCH, AArch64 v2 01/11] aarch64: Simplify LSE cas generation Richard Henderson
2018-10-30 20:32   ` James Greenhalgh
2018-10-31 10:35     ` Richard Henderson
2018-10-02 16:19 ` [PATCH, AArch64 v2 04/11] aarch64: Improve atomic-op lse generation Richard Henderson
2018-10-30 21:40   ` James Greenhalgh
2018-10-02 16:19 ` [PATCH, AArch64 v2 09/11] aarch64: Force TImode values into even registers Richard Henderson
2018-10-30 21:47   ` James Greenhalgh
2018-10-02 16:19 ` [PATCH, AArch64 v2 08/11] aarch64: Implement -matomic-ool Richard Henderson
2018-10-02 16:19 ` [PATCH, AArch64 v2 03/11] aarch64: Improve swp generation Richard Henderson
2018-10-30 20:50   ` James Greenhalgh
2018-10-02 16:19 ` [PATCH, AArch64 v2 06/11] Add visibility to libfunc constructors Richard Henderson
2018-10-30 21:46   ` James Greenhalgh
2018-10-31 11:29   ` Richard Henderson
2018-10-31 17:46   ` Eric Botcazou
2018-10-31 19:04     ` Richard Henderson
2018-10-31 19:43       ` Eric Botcazou
2018-10-02 16:36 ` [PATCH, AArch64 v2 10/11] aarch64: Implement TImode compare-and-swap Richard Henderson
2018-10-02 16:37 ` [PATCH, AArch64 v2 05/11] aarch64: Emit LSE st<op> instructions Richard Henderson
2018-10-30 21:42   ` James Greenhalgh
2018-10-31 11:13     ` Richard Henderson
2018-10-31 11:25     ` Richard Henderson
2018-10-31 15:49       ` Will Deacon
2018-10-31 17:35         ` Richard Henderson
2018-10-31 19:42           ` Will Deacon
2018-10-31 22:39             ` Richard Henderson
2018-10-11 17:44 ` [PATCH, AArch64 v2 00/11] LSE atomics out-of-line Richard Henderson
2018-10-22 14:01   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181002161915.18843-3-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=agraf@suse.de \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=james.greenhalgh@arm.com \
    --cc=marcus.shawcroft@arm.com \
    --cc=ramana.radhakrishnan@arm.com \
    --cc=richard.earnshaw@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).