[PATCH][ARM][3/3] Expand mod by power of 2

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Kyrill Tkachov <kyrylo.tkachov@arm.com>
To: GCC Patches <gcc-patches@gcc.gnu.org>
Cc: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>,
	 Richard Earnshaw <Richard.Earnshaw@arm.com>,
	Marcus Shawcroft <marcus.shawcroft@arm.com>,
	 James Greenhalgh <james.greenhalgh@arm.com>
Subject: [PATCH][ARM][3/3] Expand mod by power of 2
Date: Fri, 24 Jul 2015 11:09:00 -0000	[thread overview]
Message-ID: <55B219AE.6010102@arm.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1494 bytes --]

Hi all,

This third patch implements the same algorithm as patch 1/3 but for arm.
That is, for X % N where N is a power of 2 we do:

  rsbs    r1, r0, #0
  and     r0, r0, #(N - 1)
  and     r1, r1, #(N - 1)
  rsbpl   r0, r1, #0

For the special case where N is 2 we do the shorter:
   cmp     r0, #0
   and     r0, r0, #1
   rsblt   r0, r0, #0

Note that for the final conditional negate we expand to an IF_THEN_ELSE of a NEG
rather than a cond_exec rtx because the lra dataflow analysis doesn't always deal
with cond_execs correctly. The splitters fixed in patch 2/3 then break it into a
cond_exec after reload, so it all works out.

Bootstrapped and tested on arm, with both ARM and Thumb2 states.

Tests are added and shared with aarch64.

Ok for trunk?

Thanks,
Kyrill

2015-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/arm/arm.md (*subsi3_compare0): Rename to...
     (subsi3_compare0): ... This.
     (*arm_andsi3_insn): Rename to...
     (arm_andsi3_insn): ... This.
     (modsi3): New define_expand.
     * config/arm/arm.c (arm_new_rtx_costs, MOD case): Handle case
     operand is power of 2.


2015-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * gcc.target/aarch64/mod_2.x: New file.
     * gcc.target/aarch64/mod_256.x: Likewise.
     * gcc.target/arm/mod_2.c: New test.
     * gcc.target/arm/mod_256.c: Likewise.
     * gcc.target/aarch64/mod_2.c: Likewise.
     * gcc.target/aarch64/mod_256.c: Likewise.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: arm-mod-pow2.patch --]
[-- Type: text/x-patch; name=arm-mod-pow2.patch, Size: 6593 bytes --]

commit d562629e36ba013b8f77956a74139330d191bc30
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Fri Jul 17 16:30:01 2015 +0100

    [ARM][3/3] Expand mod by power of 2

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e1bc727..6ade07c 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -9556,6 +9556,22 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
 
     case MOD:
     case UMOD:
+      /* MOD by a power of 2 can be expanded as:
+	 rsbs    r1, r0, #0
+	 and     r0, r0, #(n - 1)
+	 and     r1, r1, #(n - 1)
+	 rsbpl   r0, r1, #0.  */
+      if (code == MOD
+	  && CONST_INT_P (XEXP (x, 1))
+	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
+	  && mode == SImode)
+	{
+	  *cost += COSTS_N_INSNS (3)
+		   + 2 * extra_cost->alu.logical
+		   + extra_cost->alu.arith;
+	  return true;
+	}
+
       *cost = LIBCALL_COST (2);
       return false;	/* All arguments must be in registers.  */
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index f341109..8301648 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1229,7 +1229,7 @@ (define_peephole2
   ""
 )
 
-(define_insn "*subsi3_compare0"
+(define_insn "subsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
 	 (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
@@ -2158,7 +2158,7 @@ (define_expand "andsi3"
 )
 
 ; ??? Check split length for Thumb-2
-(define_insn_and_split "*arm_andsi3_insn"
+(define_insn_and_split "arm_andsi3_insn"
   [(set (match_operand:SI         0 "s_register_operand" "=r,l,r,r,r")
 	(and:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r")
 		(match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))]
@@ -11105,6 +11105,78 @@ (define_expand "thumb_legacy_rev"
   ""
 )
 
+;; ARM-specific expansion of signed mod by power of 2
+;; using conditional negate.
+;; For r0 % n where n is a power of 2 produce:
+;; rsbs    r1, r0, #0
+;; and     r0, r0, #(n - 1)
+;; and     r1, r1, #(n - 1)
+;; rsbpl   r0, r1, #0
+
+(define_expand "modsi3"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_32BIT"
+  {
+    HOST_WIDE_INT val = INTVAL (operands[2]);
+
+    if (val <= 0
+       || exact_log2 (INTVAL (operands[2])) <= 0
+       || !const_ok_for_arm (INTVAL (operands[2]) - 1))
+      FAIL;
+
+    rtx mask = GEN_INT (val - 1);
+
+    /* In the special case of x0 % 2 we can do the even shorter:
+	cmp     r0, #0
+	and     r0, r0, #1
+	rsblt   r0, r0, #0.  */
+
+    if (val == 2)
+      {
+	rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+	rtx cond = gen_rtx_LT (SImode, cc_reg, const0_rtx);
+
+	emit_insn (gen_rtx_SET (cc_reg,
+			gen_rtx_COMPARE (CCmode, operands[1], const0_rtx)));
+
+	rtx masked = gen_reg_rtx (SImode);
+	emit_insn (gen_arm_andsi3_insn (masked, operands[1], mask));
+	emit_move_insn (operands[0],
+			gen_rtx_IF_THEN_ELSE (SImode, cond,
+					      gen_rtx_NEG (SImode,
+							   masked),
+					      masked));
+	DONE;
+      }
+
+    rtx neg_op = gen_reg_rtx (SImode);
+    rtx_insn *insn = emit_insn (gen_subsi3_compare0 (neg_op, const0_rtx,
+						      operands[1]));
+
+    /* Extract the condition register and mode.  */
+    rtx cmp = XVECEXP (PATTERN (insn), 0, 0);
+    rtx cc_reg = SET_DEST (cmp);
+    rtx cond = gen_rtx_GE (SImode, cc_reg, const0_rtx);
+
+    emit_insn (gen_arm_andsi3_insn (operands[0], operands[1], mask));
+
+    rtx masked_neg = gen_reg_rtx (SImode);
+    emit_insn (gen_arm_andsi3_insn (masked_neg, neg_op, mask));
+
+    /* We want a conditional negate here, but emitting COND_EXEC rtxes
+       during expand does not always work.  Do an IF_THEN_ELSE instead.  */
+    emit_move_insn (operands[0],
+		    gen_rtx_IF_THEN_ELSE (SImode, cond,
+					  gen_rtx_NEG (SImode, masked_neg),
+					  operands[0]));
+
+
+    DONE;
+  }
+)
+
 (define_expand "bswapsi2"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
   	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.c b/gcc/testsuite/gcc.target/aarch64/mod_2.c
new file mode 100644
index 0000000..2645c18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "mod_2.x"
+
+/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
+/* { dg-final { scan-assembler-times "and\t\[wx\]\[0-9\]*" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.x b/gcc/testsuite/gcc.target/aarch64/mod_2.x
new file mode 100644
index 0000000..2b079a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_2.x
@@ -0,0 +1,5 @@
+int
+f (int x)
+{
+  return x % 2;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.c b/gcc/testsuite/gcc.target/aarch64/mod_256.c
new file mode 100644
index 0000000..567332c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_256.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "mod_256.x"
+
+/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.x b/gcc/testsuite/gcc.target/aarch64/mod_256.x
new file mode 100644
index 0000000..c1de42c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_256.x
@@ -0,0 +1,5 @@
+int
+f (int x)
+{
+  return x % 256;
+}
diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c b/gcc/testsuite/gcc.target/arm/mod_2.c
new file mode 100644
index 0000000..93017a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mod_2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "../aarch64/mod_2.x"
+
+/* { dg-final { scan-assembler "rsblt\tr\[0-9\]*" } } */
+/* { dg-final { scan-assembler-times "and\tr\[0-9\].*1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c b/gcc/testsuite/gcc.target/arm/mod_256.c
new file mode 100644
index 0000000..92ab05a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mod_256.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "../aarch64/mod_256.x"
+
+/* { dg-final { scan-assembler "rsbpl\tr\[0-9\]*" } } */
+/* { dg-final { scan-assembler "and\tr\[0-9\].*255" } } */

next             reply	other threads:[~2015-07-24 10:55 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-24 11:09 Kyrill Tkachov [this message]
2015-07-31  9:00 ` Kyrill Tkachov
2015-08-10 11:14   ` Kyrill Tkachov
2015-08-19 12:51     ` Kyrill Tkachov
2015-09-01  8:38       ` Kyrill Tkachov
2015-09-07  9:46 ` Ramana Radhakrishnan
2015-09-08  8:35   ` Kyrill Tkachov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55B219AE.6010102@arm.com \
    --to=kyrylo.tkachov@arm.com \
    --cc=Richard.Earnshaw@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=james.greenhalgh@arm.com \
    --cc=marcus.shawcroft@arm.com \
    --cc=ramana.radhakrishnan@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).