From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pl1-x62d.google.com (mail-pl1-x62d.google.com [IPv6:2607:f8b0:4864:20::62d]) by sourceware.org (Postfix) with ESMTPS id 2ECAF388B832 for ; Thu, 2 Apr 2020 18:54:10 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org 2ECAF388B832 Received: by mail-pl1-x62d.google.com with SMTP id v23so1671509ply.10 for ; Thu, 02 Apr 2020 11:54:10 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=dIEE4FAlIJc1e3p/c6zytvIqHvgnvLiGVX6HnHCxg24=; b=c02KsG9OVlOUZsdBdLtJBntTOVU1uvav4U0X5WKCpYKXKeB+B++66BIpWIbMxJOybi B+vvs+eOBYB93tHl2b3NEYn+/ZTeox5S6viqdSqgwBI0gZXkW7tIN3cIMnJib+7Gyu/9 qyjLutY95qlhj2+9JorARgWMxnAvur9M38myKWkzGV9m2TpNpVxEn5exy5DfEYthEfOW X47i7lKyufQriYKPSGOo2vEsPD0wm/nrQTP54mNR1rMsCrBemrxsO6Jdp/PcyHucjCok rJPSSIFvxjw6UBHPccSoHUzoicfGN6/9fwXOrc7Imfv/TeeFR0RPsjyJ430nGEOgmUIY nTWA== X-Gm-Message-State: AGi0PuYokQe04AGpnmN4GBrh0INZwvgJlh7b6owwsPe/6xZPRHFJJyr5 dI+Be067UdZE2Z2gnZayOGrV2TpH20o= X-Google-Smtp-Source: APiQypLI8jPZJwqnhlfyX6dE4Rj+yuOk2PGgXYgaBXfZu5qdcQ9c65qqWuPeuF1vdKcHi3T2YnnsHw== X-Received: by 2002:a17:90a:feb:: with SMTP id 98mr5457439pjz.72.1585853648810; Thu, 02 Apr 2020 11:54:08 -0700 (PDT) Received: from localhost.localdomain (174-21-149-226.tukw.qwest.net. [174.21.149.226]) by smtp.gmail.com with ESMTPSA id r64sm4216973pjb.15.2020.04.02.11.54.07 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Thu, 02 Apr 2020 11:54:07 -0700 (PDT) From: Richard Henderson To: gcc-patches@gcc.gnu.org Cc: richard.sandiford@arm.com, segher@kernel.crashing.org, richard.earnshaw@arm.com, Wilco.Dijkstra@arm.com, marcus.shawcroft@arm.com, kyrylo.tkachov@arm.com Subject: [PATCH v2 10/11] aarch64: Implement TImode comparisons Date: Thu, 2 Apr 2020 11:53:52 -0700 Message-Id: <20200402185353.11047-11-richard.henderson@linaro.org> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200402185353.11047-1-richard.henderson@linaro.org> References: <20200402185353.11047-1-richard.henderson@linaro.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-23.8 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.2 X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 02 Apr 2020 18:54:11 -0000 Use ccmp to perform all TImode comparisons branchless. * config/aarch64/aarch64.c (aarch64_gen_compare_reg): Expand all of the comparisons for TImode, not just NE. * config/aarch64/aarch64.md (cbranchti4, cstoreti4): New. --- gcc/config/aarch64/aarch64.c | 122 ++++++++++++++++++++++++++++++---- gcc/config/aarch64/aarch64.md | 28 ++++++++ 2 files changed, 136 insertions(+), 14 deletions(-) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 93658338041..89c9192266c 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2333,32 +2333,126 @@ rtx aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) { machine_mode cmp_mode = GET_MODE (x); - machine_mode cc_mode; rtx cc_reg; if (cmp_mode == TImode) { - gcc_assert (code == NE); - - cc_mode = CCmode; - cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); - rtx x_lo = operand_subword (x, 0, 0, TImode); - rtx y_lo = operand_subword (y, 0, 0, TImode); - emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo)); - rtx x_hi = operand_subword (x, 1, 0, TImode); - rtx y_hi = operand_subword (y, 1, 0, TImode); - emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi, - gen_rtx_EQ (cc_mode, cc_reg, const0_rtx), - GEN_INT (AARCH64_EQ))); + struct expand_operand ops[2]; + rtx y_lo, y_hi, tmp; + + if (CONST_INT_P (y)) + { + HOST_WIDE_INT y_int = INTVAL (y); + + y_lo = y; + switch (code) + { + case EQ: + case NE: + /* For equality, IOR the two halves together. If this gets + used for a branch, we expect this to fold to cbz/cbnz; + otherwise it's no larger than cmp+ccmp below. Beware of + the compare-and-swap post-reload split and use cmp+ccmp. */ + if (y_int == 0 && can_create_pseudo_p ()) + { + tmp = gen_reg_rtx (DImode); + emit_insn (gen_iordi3 (tmp, x_hi, x_lo)); + emit_insn (gen_cmpdi (tmp, const0_rtx)); + cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); + goto done; + } + break; + + case LE: + case GT: + /* Add 1 to Y to convert to LT/GE, which avoids the swap and + keeps the constant operand. The cstoreti and cbranchti + operand predicates require aarch64_plus_operand, which + means this increment cannot overflow. */ + y_lo = gen_int_mode (++y_int, DImode); + code = (code == LE ? LT : GE); + /* fall through */ + + case LT: + case GE: + /* Check only the sign bit using tst, or fold to tbz/tbnz. */ + if (y_int == 0) + { + cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); + tmp = gen_rtx_AND (DImode, x_hi, GEN_INT (INT64_MIN)); + tmp = gen_rtx_COMPARE (CC_NZmode, tmp, const0_rtx); + emit_set_insn (cc_reg, tmp); + code = (code == LT ? NE : EQ); + goto done; + } + break; + + default: + break; + } + y_hi = (y_int < 0 ? constm1_rtx : const0_rtx); + } + else + { + y_lo = operand_subword (y, 0, 0, TImode); + y_hi = operand_subword (y, 1, 0, TImode); + } + + switch (code) + { + case LEU: + case GTU: + case LE: + case GT: + std::swap (x_lo, y_lo); + std::swap (x_hi, y_hi); + code = swap_condition (code); + break; + + default: + break; + } + + /* Emit cmpdi, forcing operands into registers as required. */ + create_input_operand (&ops[0], x_lo, DImode); + create_input_operand (&ops[1], y_lo, DImode); + expand_insn (CODE_FOR_cmpdi, 2, ops); + + cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); + switch (code) + { + case EQ: + case NE: + /* For EQ, (x_lo == y_lo) && (x_hi == y_hi). */ + emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi, + gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx), + GEN_INT (AARCH64_EQ))); + break; + + case LTU: + case GEU: + case LT: + case GE: + /* Compute (x - y), as double-word arithmetic. */ + create_input_operand (&ops[0], x_hi, DImode); + create_input_operand (&ops[1], y_hi, DImode); + expand_insn (CODE_FOR_cmpdi3_carryin, 2, ops); + break; + + default: + gcc_unreachable (); + } } else { - cc_mode = SELECT_CC_MODE (code, x, y); + machine_mode cc_mode = SELECT_CC_MODE (code, x, y); cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y)); } + + done: return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx); } diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index dbaeb7c251c..cf716f815a1 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -473,6 +473,20 @@ operands[2] = const0_rtx; }) +(define_expand "cbranchti4" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand:TI 1 "register_operand") + (match_operand:TI 2 "aarch64_plus_operand")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + operands[0] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], + operands[2]); + operands[1] = XEXP (operands[0], 0); + operands[2] = const0_rtx; +}) + (define_expand "cbranch4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand:GPF 1 "register_operand") @@ -3923,6 +3937,20 @@ operands[3] = const0_rtx; }) +(define_expand "cstoreti4" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "aarch64_comparison_operator" + [(match_operand:TI 2 "register_operand") + (match_operand:TI 3 "aarch64_plus_operand")]))] + "" +{ + operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], + operands[3]); + PUT_MODE (operands[1], SImode); + operands[2] = XEXP (operands[1], 0); + operands[3] = const0_rtx; +}) + (define_expand "cstorecc4" [(set (match_operand:SI 0 "register_operand") (match_operator 1 "aarch64_comparison_operator_mode" -- 2.20.1