From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 35526 invoked by alias); 7 Sep 2015 09:27:06 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 35505 invoked by uid 89); 7 Sep 2015 09:27:04 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.7 required=5.0 tests=AWL,BAYES_00,KAM_LAZY_DOMAIN_SECURITY,T_RP_MATCHES_RCVD autolearn=no version=3.3.2 X-HELO: foss.arm.com Received: from foss.arm.com (HELO foss.arm.com) (217.140.101.70) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Mon, 07 Sep 2015 09:27:02 +0000 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.72.51.249]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 646813DA; Mon, 7 Sep 2015 02:27:08 -0700 (PDT) Received: from [10.2.206.27] (e105545-lin.cambridge.arm.com [10.2.206.27]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 211F13F318; Mon, 7 Sep 2015 02:26:58 -0700 (PDT) Subject: Re: [PATCH][ARM][3/3] Expand mod by power of 2 To: Kyrill Tkachov , GCC Patches References: <55B219AE.6010102@arm.com> Cc: Ramana Radhakrishnan , Richard Earnshaw , Marcus Shawcroft , James Greenhalgh From: Ramana Radhakrishnan Message-ID: <55ED5864.9080607@foss.arm.com> Date: Mon, 07 Sep 2015 09:46:00 -0000 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.2.0 MIME-Version: 1.0 In-Reply-To: <55B219AE.6010102@arm.com> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit X-IsSubscribed: yes X-SW-Source: 2015-09/txt/msg00413.txt.bz2 On 24/07/15 11:55, Kyrill Tkachov wrote: > > commit d562629e36ba013b8f77956a74139330d191bc30 > Author: Kyrylo Tkachov > Date: Fri Jul 17 16:30:01 2015 +0100 > > [ARM][3/3] Expand mod by power of 2 > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index e1bc727..6ade07c 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -9556,6 +9556,22 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, > > case MOD: > case UMOD: > + /* MOD by a power of 2 can be expanded as: > + rsbs r1, r0, #0 > + and r0, r0, #(n - 1) > + and r1, r1, #(n - 1) > + rsbpl r0, r1, #0. */ > + if (code == MOD > + && CONST_INT_P (XEXP (x, 1)) > + && exact_log2 (INTVAL (XEXP (x, 1))) > 0 > + && mode == SImode) > + { > + *cost += COSTS_N_INSNS (3) > + + 2 * extra_cost->alu.logical > + + extra_cost->alu.arith; > + return true; > + } > + > *cost = LIBCALL_COST (2); > return false; /* All arguments must be in registers. */ > > diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md > index f341109..8301648 100644 > --- a/gcc/config/arm/arm.md > +++ b/gcc/config/arm/arm.md > @@ -1229,7 +1229,7 @@ (define_peephole2 > "" > ) > > -(define_insn "*subsi3_compare0" > +(define_insn "subsi3_compare0" > [(set (reg:CC_NOOV CC_REGNUM) > (compare:CC_NOOV > (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I") > @@ -2158,7 +2158,7 @@ (define_expand "andsi3" > ) > > ; ??? Check split length for Thumb-2 > -(define_insn_and_split "*arm_andsi3_insn" > +(define_insn_and_split "arm_andsi3_insn" > [(set (match_operand:SI 0 "s_register_operand" "=r,l,r,r,r") > (and:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r") > (match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))] > @@ -11105,6 +11105,78 @@ (define_expand "thumb_legacy_rev" > "" > ) This shouldn't be necessary - you are just adding another interface to produce an and insn. > > +;; ARM-specific expansion of signed mod by power of 2 > +;; using conditional negate. > +;; For r0 % n where n is a power of 2 produce: > +;; rsbs r1, r0, #0 > +;; and r0, r0, #(n - 1) > +;; and r1, r1, #(n - 1) > +;; rsbpl r0, r1, #0 > + > +(define_expand "modsi3" > + [(match_operand:SI 0 "register_operand" "") > + (match_operand:SI 1 "register_operand" "") > + (match_operand:SI 2 "const_int_operand" "")] > + "TARGET_32BIT" > + { > + HOST_WIDE_INT val = INTVAL (operands[2]); > + > + if (val <= 0 > + || exact_log2 (INTVAL (operands[2])) <= 0 > + || !const_ok_for_arm (INTVAL (operands[2]) - 1)) > + FAIL; > + > + rtx mask = GEN_INT (val - 1); > + > + /* In the special case of x0 % 2 we can do the even shorter: > + cmp r0, #0 > + and r0, r0, #1 > + rsblt r0, r0, #0. */ > + > + if (val == 2) > + { > + rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); > + rtx cond = gen_rtx_LT (SImode, cc_reg, const0_rtx); > + > + emit_insn (gen_rtx_SET (cc_reg, > + gen_rtx_COMPARE (CCmode, operands[1], const0_rtx))); > + > + rtx masked = gen_reg_rtx (SImode); > + emit_insn (gen_arm_andsi3_insn (masked, operands[1], mask)); Use emit_insn (gen_andsi3 (masked, operands[1], mask) instead and likewise below. > + emit_move_insn (operands[0], > + gen_rtx_IF_THEN_ELSE (SImode, cond, > + gen_rtx_NEG (SImode, > + masked), > + masked)); > + DONE; > + } > + > + rtx neg_op = gen_reg_rtx (SImode); > + rtx_insn *insn = emit_insn (gen_subsi3_compare0 (neg_op, const0_rtx, > + operands[1])); > + > + /* Extract the condition register and mode. */ > + rtx cmp = XVECEXP (PATTERN (insn), 0, 0); > + rtx cc_reg = SET_DEST (cmp); > + rtx cond = gen_rtx_GE (SImode, cc_reg, const0_rtx); > + > + emit_insn (gen_arm_andsi3_insn (operands[0], operands[1], mask)); > + > + rtx masked_neg = gen_reg_rtx (SImode); > + emit_insn (gen_arm_andsi3_insn (masked_neg, neg_op, mask)); > + > + /* We want a conditional negate here, but emitting COND_EXEC rtxes > + during expand does not always work. Do an IF_THEN_ELSE instead. */ > + emit_move_insn (operands[0], > + gen_rtx_IF_THEN_ELSE (SImode, cond, > + gen_rtx_NEG (SImode, masked_neg), > + operands[0])); > + > + > + DONE; > + } > +) > + > (define_expand "bswapsi2" > [(set (match_operand:SI 0 "s_register_operand" "=r") > (bswap:SI (match_operand:SI 1 "s_register_operand" "r")))] > diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.c b/gcc/testsuite/gcc.target/aarch64/mod_2.c > new file mode 100644 > index 0000000..2645c18 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/mod_2.c > @@ -0,0 +1,7 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */ > + > +#include "mod_2.x" > + > +/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */ > +/* { dg-final { scan-assembler-times "and\t\[wx\]\[0-9\]*" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.x b/gcc/testsuite/gcc.target/aarch64/mod_2.x > new file mode 100644 > index 0000000..2b079a4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/mod_2.x > @@ -0,0 +1,5 @@ > +int > +f (int x) > +{ > + return x % 2; > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.c b/gcc/testsuite/gcc.target/aarch64/mod_256.c > new file mode 100644 > index 0000000..567332c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/mod_256.c > @@ -0,0 +1,6 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */ > + > +#include "mod_256.x" > + > +/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.x b/gcc/testsuite/gcc.target/aarch64/mod_256.x > new file mode 100644 > index 0000000..c1de42c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/mod_256.x > @@ -0,0 +1,5 @@ > +int > +f (int x) > +{ > + return x % 256; > +} > diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c b/gcc/testsuite/gcc.target/arm/mod_2.c > new file mode 100644 > index 0000000..93017a1 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mod_2.c > @@ -0,0 +1,8 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm32 } */ > +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */ > + > +#include "../aarch64/mod_2.x" > + > +/* { dg-final { scan-assembler "rsblt\tr\[0-9\]*" } } */ > +/* { dg-final { scan-assembler-times "and\tr\[0-9\].*1" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c b/gcc/testsuite/gcc.target/arm/mod_256.c > new file mode 100644 > index 0000000..92ab05a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mod_256.c > @@ -0,0 +1,8 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm32 } */ > +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */ > + > +#include "../aarch64/mod_256.x" > + > +/* { dg-final { scan-assembler "rsbpl\tr\[0-9\]*" } } */ > +/* { dg-final { scan-assembler "and\tr\[0-9\].*255" } } */ OK with those changes if no regressions. Ramana