public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][ARM][3/3] Expand mod by power of 2
@ 2015-07-24 11:09 Kyrill Tkachov
  2015-07-31  9:00 ` Kyrill Tkachov
  2015-09-07  9:46 ` Ramana Radhakrishnan
  0 siblings, 2 replies; 7+ messages in thread
From: Kyrill Tkachov @ 2015-07-24 11:09 UTC (permalink / raw)
  To: GCC Patches
  Cc: Ramana Radhakrishnan, Richard Earnshaw, Marcus Shawcroft,
	James Greenhalgh

[-- Attachment #1: Type: text/plain, Size: 1494 bytes --]

Hi all,

This third patch implements the same algorithm as patch 1/3 but for arm.
That is, for X % N where N is a power of 2 we do:

  rsbs    r1, r0, #0
  and     r0, r0, #(N - 1)
  and     r1, r1, #(N - 1)
  rsbpl   r0, r1, #0

For the special case where N is 2 we do the shorter:
   cmp     r0, #0
   and     r0, r0, #1
   rsblt   r0, r0, #0

Note that for the final conditional negate we expand to an IF_THEN_ELSE of a NEG
rather than a cond_exec rtx because the lra dataflow analysis doesn't always deal
with cond_execs correctly. The splitters fixed in patch 2/3 then break it into a
cond_exec after reload, so it all works out.

Bootstrapped and tested on arm, with both ARM and Thumb2 states.

Tests are added and shared with aarch64.

Ok for trunk?

Thanks,
Kyrill

2015-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/arm/arm.md (*subsi3_compare0): Rename to...
     (subsi3_compare0): ... This.
     (*arm_andsi3_insn): Rename to...
     (arm_andsi3_insn): ... This.
     (modsi3): New define_expand.
     * config/arm/arm.c (arm_new_rtx_costs, MOD case): Handle case
     operand is power of 2.


2015-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * gcc.target/aarch64/mod_2.x: New file.
     * gcc.target/aarch64/mod_256.x: Likewise.
     * gcc.target/arm/mod_2.c: New test.
     * gcc.target/arm/mod_256.c: Likewise.
     * gcc.target/aarch64/mod_2.c: Likewise.
     * gcc.target/aarch64/mod_256.c: Likewise.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: arm-mod-pow2.patch --]
[-- Type: text/x-patch; name=arm-mod-pow2.patch, Size: 6593 bytes --]

commit d562629e36ba013b8f77956a74139330d191bc30
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Fri Jul 17 16:30:01 2015 +0100

    [ARM][3/3] Expand mod by power of 2

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e1bc727..6ade07c 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -9556,6 +9556,22 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
 
     case MOD:
     case UMOD:
+      /* MOD by a power of 2 can be expanded as:
+	 rsbs    r1, r0, #0
+	 and     r0, r0, #(n - 1)
+	 and     r1, r1, #(n - 1)
+	 rsbpl   r0, r1, #0.  */
+      if (code == MOD
+	  && CONST_INT_P (XEXP (x, 1))
+	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
+	  && mode == SImode)
+	{
+	  *cost += COSTS_N_INSNS (3)
+		   + 2 * extra_cost->alu.logical
+		   + extra_cost->alu.arith;
+	  return true;
+	}
+
       *cost = LIBCALL_COST (2);
       return false;	/* All arguments must be in registers.  */
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index f341109..8301648 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1229,7 +1229,7 @@ (define_peephole2
   ""
 )
 
-(define_insn "*subsi3_compare0"
+(define_insn "subsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
 	 (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
@@ -2158,7 +2158,7 @@ (define_expand "andsi3"
 )
 
 ; ??? Check split length for Thumb-2
-(define_insn_and_split "*arm_andsi3_insn"
+(define_insn_and_split "arm_andsi3_insn"
   [(set (match_operand:SI         0 "s_register_operand" "=r,l,r,r,r")
 	(and:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r")
 		(match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))]
@@ -11105,6 +11105,78 @@ (define_expand "thumb_legacy_rev"
   ""
 )
 
+;; ARM-specific expansion of signed mod by power of 2
+;; using conditional negate.
+;; For r0 % n where n is a power of 2 produce:
+;; rsbs    r1, r0, #0
+;; and     r0, r0, #(n - 1)
+;; and     r1, r1, #(n - 1)
+;; rsbpl   r0, r1, #0
+
+(define_expand "modsi3"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_32BIT"
+  {
+    HOST_WIDE_INT val = INTVAL (operands[2]);
+
+    if (val <= 0
+       || exact_log2 (INTVAL (operands[2])) <= 0
+       || !const_ok_for_arm (INTVAL (operands[2]) - 1))
+      FAIL;
+
+    rtx mask = GEN_INT (val - 1);
+
+    /* In the special case of x0 % 2 we can do the even shorter:
+	cmp     r0, #0
+	and     r0, r0, #1
+	rsblt   r0, r0, #0.  */
+
+    if (val == 2)
+      {
+	rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+	rtx cond = gen_rtx_LT (SImode, cc_reg, const0_rtx);
+
+	emit_insn (gen_rtx_SET (cc_reg,
+			gen_rtx_COMPARE (CCmode, operands[1], const0_rtx)));
+
+	rtx masked = gen_reg_rtx (SImode);
+	emit_insn (gen_arm_andsi3_insn (masked, operands[1], mask));
+	emit_move_insn (operands[0],
+			gen_rtx_IF_THEN_ELSE (SImode, cond,
+					      gen_rtx_NEG (SImode,
+							   masked),
+					      masked));
+	DONE;
+      }
+
+    rtx neg_op = gen_reg_rtx (SImode);
+    rtx_insn *insn = emit_insn (gen_subsi3_compare0 (neg_op, const0_rtx,
+						      operands[1]));
+
+    /* Extract the condition register and mode.  */
+    rtx cmp = XVECEXP (PATTERN (insn), 0, 0);
+    rtx cc_reg = SET_DEST (cmp);
+    rtx cond = gen_rtx_GE (SImode, cc_reg, const0_rtx);
+
+    emit_insn (gen_arm_andsi3_insn (operands[0], operands[1], mask));
+
+    rtx masked_neg = gen_reg_rtx (SImode);
+    emit_insn (gen_arm_andsi3_insn (masked_neg, neg_op, mask));
+
+    /* We want a conditional negate here, but emitting COND_EXEC rtxes
+       during expand does not always work.  Do an IF_THEN_ELSE instead.  */
+    emit_move_insn (operands[0],
+		    gen_rtx_IF_THEN_ELSE (SImode, cond,
+					  gen_rtx_NEG (SImode, masked_neg),
+					  operands[0]));
+
+
+    DONE;
+  }
+)
+
 (define_expand "bswapsi2"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
   	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.c b/gcc/testsuite/gcc.target/aarch64/mod_2.c
new file mode 100644
index 0000000..2645c18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "mod_2.x"
+
+/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
+/* { dg-final { scan-assembler-times "and\t\[wx\]\[0-9\]*" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.x b/gcc/testsuite/gcc.target/aarch64/mod_2.x
new file mode 100644
index 0000000..2b079a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_2.x
@@ -0,0 +1,5 @@
+int
+f (int x)
+{
+  return x % 2;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.c b/gcc/testsuite/gcc.target/aarch64/mod_256.c
new file mode 100644
index 0000000..567332c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_256.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "mod_256.x"
+
+/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.x b/gcc/testsuite/gcc.target/aarch64/mod_256.x
new file mode 100644
index 0000000..c1de42c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_256.x
@@ -0,0 +1,5 @@
+int
+f (int x)
+{
+  return x % 256;
+}
diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c b/gcc/testsuite/gcc.target/arm/mod_2.c
new file mode 100644
index 0000000..93017a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mod_2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "../aarch64/mod_2.x"
+
+/* { dg-final { scan-assembler "rsblt\tr\[0-9\]*" } } */
+/* { dg-final { scan-assembler-times "and\tr\[0-9\].*1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c b/gcc/testsuite/gcc.target/arm/mod_256.c
new file mode 100644
index 0000000..92ab05a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mod_256.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "../aarch64/mod_256.x"
+
+/* { dg-final { scan-assembler "rsbpl\tr\[0-9\]*" } } */
+/* { dg-final { scan-assembler "and\tr\[0-9\].*255" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH][ARM][3/3] Expand mod by power of 2
  2015-07-24 11:09 [PATCH][ARM][3/3] Expand mod by power of 2 Kyrill Tkachov
@ 2015-07-31  9:00 ` Kyrill Tkachov
  2015-08-10 11:14   ` Kyrill Tkachov
  2015-09-07  9:46 ` Ramana Radhakrishnan
  1 sibling, 1 reply; 7+ messages in thread
From: Kyrill Tkachov @ 2015-07-31  9:00 UTC (permalink / raw)
  To: GCC Patches
  Cc: Ramana Radhakrishnan, Richard Earnshaw, Marcus Shawcroft,
	James Greenhalgh

Ping.

https://gcc.gnu.org/ml/gcc-patches/2015-07/msg02037.html
Thanks,
Kyrill

On 24/07/15 11:55, Kyrill Tkachov wrote:
> Hi all,
>
> This third patch implements the same algorithm as patch 1/3 but for arm.
> That is, for X % N where N is a power of 2 we do:
>
>    rsbs    r1, r0, #0
>    and     r0, r0, #(N - 1)
>    and     r1, r1, #(N - 1)
>    rsbpl   r0, r1, #0
>
> For the special case where N is 2 we do the shorter:
>     cmp     r0, #0
>     and     r0, r0, #1
>     rsblt   r0, r0, #0
>
> Note that for the final conditional negate we expand to an IF_THEN_ELSE of a NEG
> rather than a cond_exec rtx because the lra dataflow analysis doesn't always deal
> with cond_execs correctly. The splitters fixed in patch 2/3 then break it into a
> cond_exec after reload, so it all works out.
>
> Bootstrapped and tested on arm, with both ARM and Thumb2 states.
>
> Tests are added and shared with aarch64.
>
> Ok for trunk?
>
> Thanks,
> Kyrill
>
> 2015-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>       * config/arm/arm.md (*subsi3_compare0): Rename to...
>       (subsi3_compare0): ... This.
>       (*arm_andsi3_insn): Rename to...
>       (arm_andsi3_insn): ... This.
>       (modsi3): New define_expand.
>       * config/arm/arm.c (arm_new_rtx_costs, MOD case): Handle case
>       operand is power of 2.
>
>
> 2015-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>       * gcc.target/aarch64/mod_2.x: New file.
>       * gcc.target/aarch64/mod_256.x: Likewise.
>       * gcc.target/arm/mod_2.c: New test.
>       * gcc.target/arm/mod_256.c: Likewise.
>       * gcc.target/aarch64/mod_2.c: Likewise.
>       * gcc.target/aarch64/mod_256.c: Likewise.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH][ARM][3/3] Expand mod by power of 2
  2015-07-31  9:00 ` Kyrill Tkachov
@ 2015-08-10 11:14   ` Kyrill Tkachov
  2015-08-19 12:51     ` Kyrill Tkachov
  0 siblings, 1 reply; 7+ messages in thread
From: Kyrill Tkachov @ 2015-08-10 11:14 UTC (permalink / raw)
  To: GCC Patches
  Cc: Ramana Radhakrishnan, Richard Earnshaw, Marcus Shawcroft,
	James Greenhalgh

[-- Attachment #1: Type: text/plain, Size: 2921 bytes --]

Here is a slight respin.
The important parts are the same, just the expander now uses the slightly shorter
arm_gen_compare_reg and the rtx costs hunk is moved under an explicit case MOD.

Note, the tests still require patch 1/3 that does this for aarch64 that I hope to post
a respinned version of soon.

Ok after the prerequisite goes in?

Thanks,
Kyrill


2015-08-10  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/arm/arm.md (*subsi3_compare0): Rename to...
     (subsi3_compare0): ... This.
     (*arm_andsi3_insn): Rename to...
     (arm_andsi3_insn): ... This.
     (modsi3): New define_expand.
     * config/arm/arm.c (arm_new_rtx_costs, MOD case): Handle case
     when operand is power of 2.


2015-08-10  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * gcc.target/aarch64/mod_2.x: New file.
     * gcc.target/aarch64/mod_256.x: Likewise.
     * gcc.target/arm/mod_2.c: New test.
     * gcc.target/arm/mod_256.c: Likewise.
     * gcc.target/aarch64/mod_2.c: Likewise.
     * gcc.target/aarch64/mod_256.c: Likewise.



On 31/07/15 09:20, Kyrill Tkachov wrote:
> Ping.
>
> https://gcc.gnu.org/ml/gcc-patches/2015-07/msg02037.html
> Thanks,
> Kyrill
>
> On 24/07/15 11:55, Kyrill Tkachov wrote:
>> Hi all,
>>
>> This third patch implements the same algorithm as patch 1/3 but for arm.
>> That is, for X % N where N is a power of 2 we do:
>>
>>     rsbs    r1, r0, #0
>>     and     r0, r0, #(N - 1)
>>     and     r1, r1, #(N - 1)
>>     rsbpl   r0, r1, #0
>>
>> For the special case where N is 2 we do the shorter:
>>      cmp     r0, #0
>>      and     r0, r0, #1
>>      rsblt   r0, r0, #0
>>
>> Note that for the final conditional negate we expand to an IF_THEN_ELSE of a NEG
>> rather than a cond_exec rtx because the lra dataflow analysis doesn't always deal
>> with cond_execs correctly. The splitters fixed in patch 2/3 then break it into a
>> cond_exec after reload, so it all works out.
>>
>> Bootstrapped and tested on arm, with both ARM and Thumb2 states.
>>
>> Tests are added and shared with aarch64.
>>
>> Ok for trunk?
>>
>> Thanks,
>> Kyrill
>>
>> 2015-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>        * config/arm/arm.md (*subsi3_compare0): Rename to...
>>        (subsi3_compare0): ... This.
>>        (*arm_andsi3_insn): Rename to...
>>        (arm_andsi3_insn): ... This.
>>        (modsi3): New define_expand.
>>        * config/arm/arm.c (arm_new_rtx_costs, MOD case): Handle case
>>        operand is power of 2.
>>
>>
>> 2015-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>        * gcc.target/aarch64/mod_2.x: New file.
>>        * gcc.target/aarch64/mod_256.x: Likewise.
>>        * gcc.target/arm/mod_2.c: New test.
>>        * gcc.target/arm/mod_256.c: Likewise.
>>        * gcc.target/aarch64/mod_2.c: Likewise.
>>        * gcc.target/aarch64/mod_256.c: Likewise.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: arm-mod-2.patch --]
[-- Type: text/x-patch; name=arm-mod-2.patch, Size: 6634 bytes --]

commit 7d0da77d73552d8e683525f4e6fb8bc660ed1c56
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Fri Jul 17 16:30:01 2015 +0100

    [ARM][3/3] Expand mod by power of 2

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1ea9e27..a607a5c 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -9559,6 +9559,24 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
       return false;	/* All arguments must be in registers.  */
 
     case MOD:
+      /* MOD by a power of 2 can be expanded as:
+	 rsbs    r1, r0, #0
+	 and     r0, r0, #(n - 1)
+	 and     r1, r1, #(n - 1)
+	 rsbpl   r0, r1, #0.  */
+      if (CONST_INT_P (XEXP (x, 1))
+	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
+	  && mode == SImode)
+	{
+	  *cost += COSTS_N_INSNS (3);
+
+	  if (speed_p)
+	    *cost += 2 * extra_cost->alu.logical
+		     + extra_cost->alu.arith;
+	  return true;
+	}
+
+    /* Fall-through.  */
     case UMOD:
       *cost = LIBCALL_COST (2);
       return false;	/* All arguments must be in registers.  */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 817860d..652ec51 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1229,7 +1229,7 @@ (define_peephole2
   ""
 )
 
-(define_insn "*subsi3_compare0"
+(define_insn "subsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
 	 (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
@@ -2158,7 +2158,7 @@ (define_expand "andsi3"
 )
 
 ; ??? Check split length for Thumb-2
-(define_insn_and_split "*arm_andsi3_insn"
+(define_insn_and_split "arm_andsi3_insn"
   [(set (match_operand:SI         0 "s_register_operand" "=r,l,r,r,r")
 	(and:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r")
 		(match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))]
@@ -11143,6 +11143,76 @@ (define_expand "thumb_legacy_rev"
   ""
 )
 
+;; ARM-specific expansion of signed mod by power of 2
+;; using conditional negate.
+;; For r0 % n where n is a power of 2 produce:
+;; rsbs    r1, r0, #0
+;; and     r0, r0, #(n - 1)
+;; and     r1, r1, #(n - 1)
+;; rsbpl   r0, r1, #0
+
+(define_expand "modsi3"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_32BIT"
+  {
+    HOST_WIDE_INT val = INTVAL (operands[2]);
+
+    if (val <= 0
+       || exact_log2 (INTVAL (operands[2])) <= 0
+       || !const_ok_for_arm (INTVAL (operands[2]) - 1))
+      FAIL;
+
+    rtx mask = GEN_INT (val - 1);
+
+    /* In the special case of x0 % 2 we can do the even shorter:
+	cmp     r0, #0
+	and     r0, r0, #1
+	rsblt   r0, r0, #0.  */
+
+    if (val == 2)
+      {
+	rtx cc_reg = arm_gen_compare_reg (LT,
+					  operands[1], const0_rtx, NULL_RTX);
+	rtx cond = gen_rtx_LT (SImode, cc_reg, const0_rtx);
+	rtx masked = gen_reg_rtx (SImode);
+
+	emit_insn (gen_arm_andsi3_insn (masked, operands[1], mask));
+	emit_move_insn (operands[0],
+			gen_rtx_IF_THEN_ELSE (SImode, cond,
+					      gen_rtx_NEG (SImode,
+							   masked),
+					      masked));
+	DONE;
+      }
+
+    rtx neg_op = gen_reg_rtx (SImode);
+    rtx_insn *insn = emit_insn (gen_subsi3_compare0 (neg_op, const0_rtx,
+						      operands[1]));
+
+    /* Extract the condition register and mode.  */
+    rtx cmp = XVECEXP (PATTERN (insn), 0, 0);
+    rtx cc_reg = SET_DEST (cmp);
+    rtx cond = gen_rtx_GE (SImode, cc_reg, const0_rtx);
+
+    emit_insn (gen_arm_andsi3_insn (operands[0], operands[1], mask));
+
+    rtx masked_neg = gen_reg_rtx (SImode);
+    emit_insn (gen_arm_andsi3_insn (masked_neg, neg_op, mask));
+
+    /* We want a conditional negate here, but emitting COND_EXEC rtxes
+       during expand does not always work.  Do an IF_THEN_ELSE instead.  */
+    emit_move_insn (operands[0],
+		    gen_rtx_IF_THEN_ELSE (SImode, cond,
+					  gen_rtx_NEG (SImode, masked_neg),
+					  operands[0]));
+
+
+    DONE;
+  }
+)
+
 (define_expand "bswapsi2"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
   	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.c b/gcc/testsuite/gcc.target/aarch64/mod_2.c
new file mode 100644
index 0000000..2645c18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "mod_2.x"
+
+/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
+/* { dg-final { scan-assembler-times "and\t\[wx\]\[0-9\]*" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.x b/gcc/testsuite/gcc.target/aarch64/mod_2.x
new file mode 100644
index 0000000..2b079a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_2.x
@@ -0,0 +1,5 @@
+int
+f (int x)
+{
+  return x % 2;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.c b/gcc/testsuite/gcc.target/aarch64/mod_256.c
new file mode 100644
index 0000000..567332c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_256.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "mod_256.x"
+
+/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.x b/gcc/testsuite/gcc.target/aarch64/mod_256.x
new file mode 100644
index 0000000..c1de42c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_256.x
@@ -0,0 +1,5 @@
+int
+f (int x)
+{
+  return x % 256;
+}
diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c b/gcc/testsuite/gcc.target/arm/mod_2.c
new file mode 100644
index 0000000..93017a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mod_2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "../aarch64/mod_2.x"
+
+/* { dg-final { scan-assembler "rsblt\tr\[0-9\]*" } } */
+/* { dg-final { scan-assembler-times "and\tr\[0-9\].*1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c b/gcc/testsuite/gcc.target/arm/mod_256.c
new file mode 100644
index 0000000..92ab05a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mod_256.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "../aarch64/mod_256.x"
+
+/* { dg-final { scan-assembler "rsbpl\tr\[0-9\]*" } } */
+/* { dg-final { scan-assembler "and\tr\[0-9\].*255" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH][ARM][3/3] Expand mod by power of 2
  2015-08-10 11:14   ` Kyrill Tkachov
@ 2015-08-19 12:51     ` Kyrill Tkachov
  2015-09-01  8:38       ` Kyrill Tkachov
  0 siblings, 1 reply; 7+ messages in thread
From: Kyrill Tkachov @ 2015-08-19 12:51 UTC (permalink / raw)
  To: GCC Patches
  Cc: Ramana Radhakrishnan, Richard Earnshaw, Marcus Shawcroft,
	James Greenhalgh

Ping.
https://gcc.gnu.org/ml/gcc-patches/2015-08/msg00448.html

Thanks,
Kyrill

> -----Original Message-----
> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
> owner@gcc.gnu.org] On Behalf Of Kyrill Tkachov
> Sent: 10 August 2015 12:14
> To: GCC Patches
> Cc: Ramana Radhakrishnan; Richard Earnshaw; Marcus Shawcroft; James
> Greenhalgh
> Subject: Re: [PATCH][ARM][3/3] Expand mod by power of 2
> 
> Here is a slight respin.
> The important parts are the same, just the expander now uses the slightly
> shorter arm_gen_compare_reg and the rtx costs hunk is moved under an
> explicit case MOD.
> 
> Note, the tests still require patch 1/3 that does this for aarch64 that I hope to
> post a respinned version of soon.
> 
> Ok after the prerequisite goes in?
> 
> Thanks,
> Kyrill
> 
> 
> 2015-08-10  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
> 
>      * config/arm/arm.md (*subsi3_compare0): Rename to...
>      (subsi3_compare0): ... This.
>      (*arm_andsi3_insn): Rename to...
>      (arm_andsi3_insn): ... This.
>      (modsi3): New define_expand.
>      * config/arm/arm.c (arm_new_rtx_costs, MOD case): Handle case
>      when operand is power of 2.
> 
> 
> 2015-08-10  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
> 
>      * gcc.target/aarch64/mod_2.x: New file.
>      * gcc.target/aarch64/mod_256.x: Likewise.
>      * gcc.target/arm/mod_2.c: New test.
>      * gcc.target/arm/mod_256.c: Likewise.
>      * gcc.target/aarch64/mod_2.c: Likewise.
>      * gcc.target/aarch64/mod_256.c: Likewise.
> 
> 
> 
> On 31/07/15 09:20, Kyrill Tkachov wrote:
> > Ping.
> >
> > https://gcc.gnu.org/ml/gcc-patches/2015-07/msg02037.html
> > Thanks,
> > Kyrill
> >
> > On 24/07/15 11:55, Kyrill Tkachov wrote:
> >> Hi all,
> >>
> >> This third patch implements the same algorithm as patch 1/3 but for arm.
> >> That is, for X % N where N is a power of 2 we do:
> >>
> >>     rsbs    r1, r0, #0
> >>     and     r0, r0, #(N - 1)
> >>     and     r1, r1, #(N - 1)
> >>     rsbpl   r0, r1, #0
> >>
> >> For the special case where N is 2 we do the shorter:
> >>      cmp     r0, #0
> >>      and     r0, r0, #1
> >>      rsblt   r0, r0, #0
> >>
> >> Note that for the final conditional negate we expand to an
> >> IF_THEN_ELSE of a NEG rather than a cond_exec rtx because the lra
> >> dataflow analysis doesn't always deal with cond_execs correctly. The
> >> splitters fixed in patch 2/3 then break it into a cond_exec after reload, so
> it all works out.
> >>
> >> Bootstrapped and tested on arm, with both ARM and Thumb2 states.
> >>
> >> Tests are added and shared with aarch64.
> >>
> >> Ok for trunk?
> >>
> >> Thanks,
> >> Kyrill
> >>
> >> 2015-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
> >>
> >>        * config/arm/arm.md (*subsi3_compare0): Rename to...
> >>        (subsi3_compare0): ... This.
> >>        (*arm_andsi3_insn): Rename to...
> >>        (arm_andsi3_insn): ... This.
> >>        (modsi3): New define_expand.
> >>        * config/arm/arm.c (arm_new_rtx_costs, MOD case): Handle case
> >>        operand is power of 2.
> >>
> >>
> >> 2015-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
> >>
> >>        * gcc.target/aarch64/mod_2.x: New file.
> >>        * gcc.target/aarch64/mod_256.x: Likewise.
> >>        * gcc.target/arm/mod_2.c: New test.
> >>        * gcc.target/arm/mod_256.c: Likewise.
> >>        * gcc.target/aarch64/mod_2.c: Likewise.
> >>        * gcc.target/aarch64/mod_256.c: Likewise.



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH][ARM][3/3] Expand mod by power of 2
  2015-08-19 12:51     ` Kyrill Tkachov
@ 2015-09-01  8:38       ` Kyrill Tkachov
  0 siblings, 0 replies; 7+ messages in thread
From: Kyrill Tkachov @ 2015-09-01  8:38 UTC (permalink / raw)
  To: GCC Patches
  Cc: Ramana Radhakrishnan, Richard Earnshaw, Marcus Shawcroft,
	James Greenhalgh

Ping.

Thanks,
Kyrill
On 19/08/15 13:50, Kyrill Tkachov wrote:
> Ping.
> https://gcc.gnu.org/ml/gcc-patches/2015-08/msg00448.html
>
> Thanks,
> Kyrill
>
>> -----Original Message-----
>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>> owner@gcc.gnu.org] On Behalf Of Kyrill Tkachov
>> Sent: 10 August 2015 12:14
>> To: GCC Patches
>> Cc: Ramana Radhakrishnan; Richard Earnshaw; Marcus Shawcroft; James
>> Greenhalgh
>> Subject: Re: [PATCH][ARM][3/3] Expand mod by power of 2
>>
>> Here is a slight respin.
>> The important parts are the same, just the expander now uses the slightly
>> shorter arm_gen_compare_reg and the rtx costs hunk is moved under an
>> explicit case MOD.
>>
>> Note, the tests still require patch 1/3 that does this for aarch64 that I hope to
>> post a respinned version of soon.
>>
>> Ok after the prerequisite goes in?
>>
>> Thanks,
>> Kyrill
>>
>>
>> 2015-08-10  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>       * config/arm/arm.md (*subsi3_compare0): Rename to...
>>       (subsi3_compare0): ... This.
>>       (*arm_andsi3_insn): Rename to...
>>       (arm_andsi3_insn): ... This.
>>       (modsi3): New define_expand.
>>       * config/arm/arm.c (arm_new_rtx_costs, MOD case): Handle case
>>       when operand is power of 2.
>>
>>
>> 2015-08-10  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>       * gcc.target/aarch64/mod_2.x: New file.
>>       * gcc.target/aarch64/mod_256.x: Likewise.
>>       * gcc.target/arm/mod_2.c: New test.
>>       * gcc.target/arm/mod_256.c: Likewise.
>>       * gcc.target/aarch64/mod_2.c: Likewise.
>>       * gcc.target/aarch64/mod_256.c: Likewise.
>>
>>
>>
>> On 31/07/15 09:20, Kyrill Tkachov wrote:
>>> Ping.
>>>
>>> https://gcc.gnu.org/ml/gcc-patches/2015-07/msg02037.html
>>> Thanks,
>>> Kyrill
>>>
>>> On 24/07/15 11:55, Kyrill Tkachov wrote:
>>>> Hi all,
>>>>
>>>> This third patch implements the same algorithm as patch 1/3 but for arm.
>>>> That is, for X % N where N is a power of 2 we do:
>>>>
>>>>      rsbs    r1, r0, #0
>>>>      and     r0, r0, #(N - 1)
>>>>      and     r1, r1, #(N - 1)
>>>>      rsbpl   r0, r1, #0
>>>>
>>>> For the special case where N is 2 we do the shorter:
>>>>       cmp     r0, #0
>>>>       and     r0, r0, #1
>>>>       rsblt   r0, r0, #0
>>>>
>>>> Note that for the final conditional negate we expand to an
>>>> IF_THEN_ELSE of a NEG rather than a cond_exec rtx because the lra
>>>> dataflow analysis doesn't always deal with cond_execs correctly. The
>>>> splitters fixed in patch 2/3 then break it into a cond_exec after reload, so
>> it all works out.
>>>> Bootstrapped and tested on arm, with both ARM and Thumb2 states.
>>>>
>>>> Tests are added and shared with aarch64.
>>>>
>>>> Ok for trunk?
>>>>
>>>> Thanks,
>>>> Kyrill
>>>>
>>>> 2015-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>>>
>>>>         * config/arm/arm.md (*subsi3_compare0): Rename to...
>>>>         (subsi3_compare0): ... This.
>>>>         (*arm_andsi3_insn): Rename to...
>>>>         (arm_andsi3_insn): ... This.
>>>>         (modsi3): New define_expand.
>>>>         * config/arm/arm.c (arm_new_rtx_costs, MOD case): Handle case
>>>>         operand is power of 2.
>>>>
>>>>
>>>> 2015-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>>>
>>>>         * gcc.target/aarch64/mod_2.x: New file.
>>>>         * gcc.target/aarch64/mod_256.x: Likewise.
>>>>         * gcc.target/arm/mod_2.c: New test.
>>>>         * gcc.target/arm/mod_256.c: Likewise.
>>>>         * gcc.target/aarch64/mod_2.c: Likewise.
>>>>         * gcc.target/aarch64/mod_256.c: Likewise.
>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH][ARM][3/3] Expand mod by power of 2
  2015-07-24 11:09 [PATCH][ARM][3/3] Expand mod by power of 2 Kyrill Tkachov
  2015-07-31  9:00 ` Kyrill Tkachov
@ 2015-09-07  9:46 ` Ramana Radhakrishnan
  2015-09-08  8:35   ` Kyrill Tkachov
  1 sibling, 1 reply; 7+ messages in thread
From: Ramana Radhakrishnan @ 2015-09-07  9:46 UTC (permalink / raw)
  To: Kyrill Tkachov, GCC Patches
  Cc: Ramana Radhakrishnan, Richard Earnshaw, Marcus Shawcroft,
	James Greenhalgh



On 24/07/15 11:55, Kyrill Tkachov wrote:
> 
> commit d562629e36ba013b8f77956a74139330d191bc30
> Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
> Date:   Fri Jul 17 16:30:01 2015 +0100
> 
>     [ARM][3/3] Expand mod by power of 2
> 
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index e1bc727..6ade07c 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -9556,6 +9556,22 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
>  
>      case MOD:
>      case UMOD:
> +      /* MOD by a power of 2 can be expanded as:
> +	 rsbs    r1, r0, #0
> +	 and     r0, r0, #(n - 1)
> +	 and     r1, r1, #(n - 1)
> +	 rsbpl   r0, r1, #0.  */
> +      if (code == MOD
> +	  && CONST_INT_P (XEXP (x, 1))
> +	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
> +	  && mode == SImode)
> +	{
> +	  *cost += COSTS_N_INSNS (3)
> +		   + 2 * extra_cost->alu.logical
> +		   + extra_cost->alu.arith;
> +	  return true;
> +	}
> +
>        *cost = LIBCALL_COST (2);
>        return false;	/* All arguments must be in registers.  */
>  
> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
> index f341109..8301648 100644
> --- a/gcc/config/arm/arm.md
> +++ b/gcc/config/arm/arm.md
> @@ -1229,7 +1229,7 @@ (define_peephole2
>    ""
>  )
>  
> -(define_insn "*subsi3_compare0"
> +(define_insn "subsi3_compare0"
>    [(set (reg:CC_NOOV CC_REGNUM)
>  	(compare:CC_NOOV
>  	 (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
> @@ -2158,7 +2158,7 @@ (define_expand "andsi3"
>  )
>  
>  ; ??? Check split length for Thumb-2
> -(define_insn_and_split "*arm_andsi3_insn"
> +(define_insn_and_split "arm_andsi3_insn"
>    [(set (match_operand:SI         0 "s_register_operand" "=r,l,r,r,r")
>  	(and:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r")
>  		(match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))]
> @@ -11105,6 +11105,78 @@ (define_expand "thumb_legacy_rev"
>    ""
>  )

This shouldn't be necessary - you are just adding another interface to produce an and insn.

>  
> +;; ARM-specific expansion of signed mod by power of 2
> +;; using conditional negate.
> +;; For r0 % n where n is a power of 2 produce:
> +;; rsbs    r1, r0, #0
> +;; and     r0, r0, #(n - 1)
> +;; and     r1, r1, #(n - 1)
> +;; rsbpl   r0, r1, #0
> +
> +(define_expand "modsi3"
> +  [(match_operand:SI 0 "register_operand" "")
> +   (match_operand:SI 1 "register_operand" "")
> +   (match_operand:SI 2 "const_int_operand" "")]
> +  "TARGET_32BIT"
> +  {
> +    HOST_WIDE_INT val = INTVAL (operands[2]);
> +
> +    if (val <= 0
> +       || exact_log2 (INTVAL (operands[2])) <= 0
> +       || !const_ok_for_arm (INTVAL (operands[2]) - 1))
> +      FAIL;
> +
> +    rtx mask = GEN_INT (val - 1);
> +
> +    /* In the special case of x0 % 2 we can do the even shorter:
> +	cmp     r0, #0
> +	and     r0, r0, #1
> +	rsblt   r0, r0, #0.  */
> +
> +    if (val == 2)
> +      {
> +	rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
> +	rtx cond = gen_rtx_LT (SImode, cc_reg, const0_rtx);
> +
> +	emit_insn (gen_rtx_SET (cc_reg,
> +			gen_rtx_COMPARE (CCmode, operands[1], const0_rtx)));
> +
> +	rtx masked = gen_reg_rtx (SImode);
> +	emit_insn (gen_arm_andsi3_insn (masked, operands[1], mask));

Use emit_insn (gen_andsi3 (masked, operands[1], mask) instead and likewise below.


> +	emit_move_insn (operands[0],
> +			gen_rtx_IF_THEN_ELSE (SImode, cond,
> +					      gen_rtx_NEG (SImode,
> +							   masked),
> +					      masked));
> +	DONE;
> +      }
> +
> +    rtx neg_op = gen_reg_rtx (SImode);
> +    rtx_insn *insn = emit_insn (gen_subsi3_compare0 (neg_op, const0_rtx,
> +						      operands[1]));
> +
> +    /* Extract the condition register and mode.  */
> +    rtx cmp = XVECEXP (PATTERN (insn), 0, 0);
> +    rtx cc_reg = SET_DEST (cmp);
> +    rtx cond = gen_rtx_GE (SImode, cc_reg, const0_rtx);
> +
> +    emit_insn (gen_arm_andsi3_insn (operands[0], operands[1], mask));
> +
> +    rtx masked_neg = gen_reg_rtx (SImode);
> +    emit_insn (gen_arm_andsi3_insn (masked_neg, neg_op, mask));
> +
> +    /* We want a conditional negate here, but emitting COND_EXEC rtxes
> +       during expand does not always work.  Do an IF_THEN_ELSE instead.  */
> +    emit_move_insn (operands[0],
> +		    gen_rtx_IF_THEN_ELSE (SImode, cond,
> +					  gen_rtx_NEG (SImode, masked_neg),
> +					  operands[0]));
> +
> +
> +    DONE;
> +  }
> +)
> +
>  (define_expand "bswapsi2"
>    [(set (match_operand:SI 0 "s_register_operand" "=r")
>    	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.c b/gcc/testsuite/gcc.target/aarch64/mod_2.c
> new file mode 100644
> index 0000000..2645c18
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/mod_2.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
> +
> +#include "mod_2.x"
> +
> +/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
> +/* { dg-final { scan-assembler-times "and\t\[wx\]\[0-9\]*" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.x b/gcc/testsuite/gcc.target/aarch64/mod_2.x
> new file mode 100644
> index 0000000..2b079a4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/mod_2.x
> @@ -0,0 +1,5 @@
> +int
> +f (int x)
> +{
> +  return x % 2;
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.c b/gcc/testsuite/gcc.target/aarch64/mod_256.c
> new file mode 100644
> index 0000000..567332c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/mod_256.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
> +
> +#include "mod_256.x"
> +
> +/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.x b/gcc/testsuite/gcc.target/aarch64/mod_256.x
> new file mode 100644
> index 0000000..c1de42c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/mod_256.x
> @@ -0,0 +1,5 @@
> +int
> +f (int x)
> +{
> +  return x % 256;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c b/gcc/testsuite/gcc.target/arm/mod_2.c
> new file mode 100644
> index 0000000..93017a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mod_2.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
> +
> +#include "../aarch64/mod_2.x"
> +
> +/* { dg-final { scan-assembler "rsblt\tr\[0-9\]*" } } */
> +/* { dg-final { scan-assembler-times "and\tr\[0-9\].*1" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c b/gcc/testsuite/gcc.target/arm/mod_256.c
> new file mode 100644
> index 0000000..92ab05a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mod_256.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
> +
> +#include "../aarch64/mod_256.x"
> +
> +/* { dg-final { scan-assembler "rsbpl\tr\[0-9\]*" } } */
> +/* { dg-final { scan-assembler "and\tr\[0-9\].*255" } } */


OK with those changes if no regressions.

Ramana

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH][ARM][3/3] Expand mod by power of 2
  2015-09-07  9:46 ` Ramana Radhakrishnan
@ 2015-09-08  8:35   ` Kyrill Tkachov
  0 siblings, 0 replies; 7+ messages in thread
From: Kyrill Tkachov @ 2015-09-08  8:35 UTC (permalink / raw)
  To: Ramana Radhakrishnan, GCC Patches
  Cc: Richard Earnshaw, Marcus Shawcroft, James Greenhalgh

[-- Attachment #1: Type: text/plain, Size: 8827 bytes --]


On 07/09/15 10:27, Ramana Radhakrishnan wrote:
>
> On 24/07/15 11:55, Kyrill Tkachov wrote:
>> commit d562629e36ba013b8f77956a74139330d191bc30
>> Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
>> Date:   Fri Jul 17 16:30:01 2015 +0100
>>
>>      [ARM][3/3] Expand mod by power of 2
>>
>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>> index e1bc727..6ade07c 100644
>> --- a/gcc/config/arm/arm.c
>> +++ b/gcc/config/arm/arm.c
>> @@ -9556,6 +9556,22 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
>>   
>>       case MOD:
>>       case UMOD:
>> +      /* MOD by a power of 2 can be expanded as:
>> +	 rsbs    r1, r0, #0
>> +	 and     r0, r0, #(n - 1)
>> +	 and     r1, r1, #(n - 1)
>> +	 rsbpl   r0, r1, #0.  */
>> +      if (code == MOD
>> +	  && CONST_INT_P (XEXP (x, 1))
>> +	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
>> +	  && mode == SImode)
>> +	{
>> +	  *cost += COSTS_N_INSNS (3)
>> +		   + 2 * extra_cost->alu.logical
>> +		   + extra_cost->alu.arith;
>> +	  return true;
>> +	}
>> +
>>         *cost = LIBCALL_COST (2);
>>         return false;	/* All arguments must be in registers.  */
>>   
>> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
>> index f341109..8301648 100644
>> --- a/gcc/config/arm/arm.md
>> +++ b/gcc/config/arm/arm.md
>> @@ -1229,7 +1229,7 @@ (define_peephole2
>>     ""
>>   )
>>   
>> -(define_insn "*subsi3_compare0"
>> +(define_insn "subsi3_compare0"
>>     [(set (reg:CC_NOOV CC_REGNUM)
>>   	(compare:CC_NOOV
>>   	 (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
>> @@ -2158,7 +2158,7 @@ (define_expand "andsi3"
>>   )
>>   
>>   ; ??? Check split length for Thumb-2
>> -(define_insn_and_split "*arm_andsi3_insn"
>> +(define_insn_and_split "arm_andsi3_insn"
>>     [(set (match_operand:SI         0 "s_register_operand" "=r,l,r,r,r")
>>   	(and:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r")
>>   		(match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))]
>> @@ -11105,6 +11105,78 @@ (define_expand "thumb_legacy_rev"
>>     ""
>>   )
> This shouldn't be necessary - you are just adding another interface to produce an and insn.

I see what you mean. Ok, I'll drop this.

>
>>   
>> +;; ARM-specific expansion of signed mod by power of 2
>> +;; using conditional negate.
>> +;; For r0 % n where n is a power of 2 produce:
>> +;; rsbs    r1, r0, #0
>> +;; and     r0, r0, #(n - 1)
>> +;; and     r1, r1, #(n - 1)
>> +;; rsbpl   r0, r1, #0
>> +
>> +(define_expand "modsi3"
>> +  [(match_operand:SI 0 "register_operand" "")
>> +   (match_operand:SI 1 "register_operand" "")
>> +   (match_operand:SI 2 "const_int_operand" "")]
>> +  "TARGET_32BIT"
>> +  {
>> +    HOST_WIDE_INT val = INTVAL (operands[2]);
>> +
>> +    if (val <= 0
>> +       || exact_log2 (INTVAL (operands[2])) <= 0
>> +       || !const_ok_for_arm (INTVAL (operands[2]) - 1))
>> +      FAIL;
>> +
>> +    rtx mask = GEN_INT (val - 1);
>> +
>> +    /* In the special case of x0 % 2 we can do the even shorter:
>> +	cmp     r0, #0
>> +	and     r0, r0, #1
>> +	rsblt   r0, r0, #0.  */
>> +
>> +    if (val == 2)
>> +      {
>> +	rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
>> +	rtx cond = gen_rtx_LT (SImode, cc_reg, const0_rtx);
>> +
>> +	emit_insn (gen_rtx_SET (cc_reg,
>> +			gen_rtx_COMPARE (CCmode, operands[1], const0_rtx)));
>> +
>> +	rtx masked = gen_reg_rtx (SImode);
>> +	emit_insn (gen_arm_andsi3_insn (masked, operands[1], mask));
> Use emit_insn (gen_andsi3 (masked, operands[1], mask) instead and likewise below.

Ok, done that. A side effect of this is that since the andsi3 expander handles
any reg_or_int we can catch more masks this way. Also, due to the way the andsi3 expander
is written, for the mask 255 it will generate a zero_extend instead of an and.
This may or may not be optimal on some cores but perhaps we should look at the andsi3 expander
to make it more robust as a separate task.

>
>
>> +	emit_move_insn (operands[0],
>> +			gen_rtx_IF_THEN_ELSE (SImode, cond,
>> +					      gen_rtx_NEG (SImode,
>> +							   masked),
>> +					      masked));
>> +	DONE;
>> +      }
>> +
>> +    rtx neg_op = gen_reg_rtx (SImode);
>> +    rtx_insn *insn = emit_insn (gen_subsi3_compare0 (neg_op, const0_rtx,
>> +						      operands[1]));
>> +
>> +    /* Extract the condition register and mode.  */
>> +    rtx cmp = XVECEXP (PATTERN (insn), 0, 0);
>> +    rtx cc_reg = SET_DEST (cmp);
>> +    rtx cond = gen_rtx_GE (SImode, cc_reg, const0_rtx);
>> +
>> +    emit_insn (gen_arm_andsi3_insn (operands[0], operands[1], mask));
>> +
>> +    rtx masked_neg = gen_reg_rtx (SImode);
>> +    emit_insn (gen_arm_andsi3_insn (masked_neg, neg_op, mask));
>> +
>> +    /* We want a conditional negate here, but emitting COND_EXEC rtxes
>> +       during expand does not always work.  Do an IF_THEN_ELSE instead.  */
>> +    emit_move_insn (operands[0],
>> +		    gen_rtx_IF_THEN_ELSE (SImode, cond,
>> +					  gen_rtx_NEG (SImode, masked_neg),
>> +					  operands[0]));
>> +
>> +
>> +    DONE;
>> +  }
>> +)
>> +
>>   (define_expand "bswapsi2"
>>     [(set (match_operand:SI 0 "s_register_operand" "=r")
>>     	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
>> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.c b/gcc/testsuite/gcc.target/aarch64/mod_2.c
>> new file mode 100644
>> index 0000000..2645c18
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/mod_2.c
>> @@ -0,0 +1,7 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
>> +
>> +#include "mod_2.x"
>> +
>> +/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
>> +/* { dg-final { scan-assembler-times "and\t\[wx\]\[0-9\]*" 1 } } */
>> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.x b/gcc/testsuite/gcc.target/aarch64/mod_2.x
>> new file mode 100644
>> index 0000000..2b079a4
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/mod_2.x
>> @@ -0,0 +1,5 @@
>> +int
>> +f (int x)
>> +{
>> +  return x % 2;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.c b/gcc/testsuite/gcc.target/aarch64/mod_256.c
>> new file mode 100644
>> index 0000000..567332c
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/mod_256.c
>> @@ -0,0 +1,6 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
>> +
>> +#include "mod_256.x"
>> +
>> +/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
>> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.x b/gcc/testsuite/gcc.target/aarch64/mod_256.x
>> new file mode 100644
>> index 0000000..c1de42c
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/mod_256.x
>> @@ -0,0 +1,5 @@
>> +int
>> +f (int x)
>> +{
>> +  return x % 256;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c b/gcc/testsuite/gcc.target/arm/mod_2.c
>> new file mode 100644
>> index 0000000..93017a1
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/arm/mod_2.c
>> @@ -0,0 +1,8 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target arm32 } */
>> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
>> +
>> +#include "../aarch64/mod_2.x"
>> +
>> +/* { dg-final { scan-assembler "rsblt\tr\[0-9\]*" } } */
>> +/* { dg-final { scan-assembler-times "and\tr\[0-9\].*1" 1 } } */
>> diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c b/gcc/testsuite/gcc.target/arm/mod_256.c
>> new file mode 100644
>> index 0000000..92ab05a
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/arm/mod_256.c
>> @@ -0,0 +1,8 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target arm32 } */
>> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
>> +
>> +#include "../aarch64/mod_256.x"
>> +
>> +/* { dg-final { scan-assembler "rsbpl\tr\[0-9\]*" } } */
>> +/* { dg-final { scan-assembler "and\tr\[0-9\].*255" } } */
>
> OK with those changes if no regressions.

Thanks,
I'm attaching the updated patch.
I'll commit it when the aarch64 one is ok'd as the tests are partly shared with this patch.

Kyrill

2015-09-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

       * config/arm/arm.md (*subsi3_compare0): Rename to...
       (subsi3_compare0): ... This.
       (modsi3): New define_expand.
       * config/arm/arm.c (arm_new_rtx_costs, MOD case): Handle case
       when operand is power of 2.


2015-09-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

       * gcc.target/aarch64/mod_2.x: New file.
       * gcc.target/aarch64/mod_256.x: Likewise.
       * gcc.target/arm/mod_2.c: New test.
       * gcc.target/arm/mod_256.c: Likewise.
       * gcc.target/aarch64/mod_2.c: Likewise.
       * gcc.target/aarch64/mod_256.c: Likewise.

> Ramana
>


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: arm-mod-2.patch --]
[-- Type: text/x-patch; name=arm-mod-2.patch, Size: 6094 bytes --]

commit 9a205ad46fe96e35b0dcbdbfcd89e2a2933a7cbd
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Fri Jul 17 16:30:01 2015 +0100

    [ARM][3/3] Expand mod by power of 2

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index fa4e083..a81114d 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -9580,6 +9580,24 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
       return false;	/* All arguments must be in registers.  */
 
     case MOD:
+      /* MOD by a power of 2 can be expanded as:
+	 rsbs    r1, r0, #0
+	 and     r0, r0, #(n - 1)
+	 and     r1, r1, #(n - 1)
+	 rsbpl   r0, r1, #0.  */
+      if (CONST_INT_P (XEXP (x, 1))
+	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
+	  && mode == SImode)
+	{
+	  *cost += COSTS_N_INSNS (3);
+
+	  if (speed_p)
+	    *cost += 2 * extra_cost->alu.logical
+		     + extra_cost->alu.arith;
+	  return true;
+	}
+
+    /* Fall-through.  */
     case UMOD:
       *cost = LIBCALL_COST (2);
       return false;	/* All arguments must be in registers.  */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index b6c2047..775ca25 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1229,7 +1229,7 @@ (define_peephole2
   ""
 )
 
-(define_insn "*subsi3_compare0"
+(define_insn "subsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
 	 (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
@@ -11142,6 +11142,75 @@ (define_expand "thumb_legacy_rev"
   ""
 )
 
+;; ARM-specific expansion of signed mod by power of 2
+;; using conditional negate.
+;; For r0 % n where n is a power of 2 produce:
+;; rsbs    r1, r0, #0
+;; and     r0, r0, #(n - 1)
+;; and     r1, r1, #(n - 1)
+;; rsbpl   r0, r1, #0
+
+(define_expand "modsi3"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_32BIT"
+  {
+    HOST_WIDE_INT val = INTVAL (operands[2]);
+
+    if (val <= 0
+       || exact_log2 (val) <= 0)
+      FAIL;
+
+    rtx mask = GEN_INT (val - 1);
+
+    /* In the special case of x0 % 2 we can do the even shorter:
+	cmp     r0, #0
+	and     r0, r0, #1
+	rsblt   r0, r0, #0.  */
+
+    if (val == 2)
+      {
+	rtx cc_reg = arm_gen_compare_reg (LT,
+					  operands[1], const0_rtx, NULL_RTX);
+	rtx cond = gen_rtx_LT (SImode, cc_reg, const0_rtx);
+	rtx masked = gen_reg_rtx (SImode);
+
+	emit_insn (gen_andsi3 (masked, operands[1], mask));
+	emit_move_insn (operands[0],
+			gen_rtx_IF_THEN_ELSE (SImode, cond,
+					      gen_rtx_NEG (SImode,
+							   masked),
+					      masked));
+	DONE;
+      }
+
+    rtx neg_op = gen_reg_rtx (SImode);
+    rtx_insn *insn = emit_insn (gen_subsi3_compare0 (neg_op, const0_rtx,
+						      operands[1]));
+
+    /* Extract the condition register and mode.  */
+    rtx cmp = XVECEXP (PATTERN (insn), 0, 0);
+    rtx cc_reg = SET_DEST (cmp);
+    rtx cond = gen_rtx_GE (SImode, cc_reg, const0_rtx);
+
+    emit_insn (gen_andsi3 (operands[0], operands[1], mask));
+
+    rtx masked_neg = gen_reg_rtx (SImode);
+    emit_insn (gen_andsi3 (masked_neg, neg_op, mask));
+
+    /* We want a conditional negate here, but emitting COND_EXEC rtxes
+       during expand does not always work.  Do an IF_THEN_ELSE instead.  */
+    emit_move_insn (operands[0],
+		    gen_rtx_IF_THEN_ELSE (SImode, cond,
+					  gen_rtx_NEG (SImode, masked_neg),
+					  operands[0]));
+
+
+    DONE;
+  }
+)
+
 (define_expand "bswapsi2"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
   	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.c b/gcc/testsuite/gcc.target/aarch64/mod_2.c
new file mode 100644
index 0000000..2645c18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "mod_2.x"
+
+/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
+/* { dg-final { scan-assembler-times "and\t\[wx\]\[0-9\]*" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.x b/gcc/testsuite/gcc.target/aarch64/mod_2.x
new file mode 100644
index 0000000..2b079a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_2.x
@@ -0,0 +1,5 @@
+int
+f (int x)
+{
+  return x % 2;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.c b/gcc/testsuite/gcc.target/aarch64/mod_256.c
new file mode 100644
index 0000000..567332c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_256.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "mod_256.x"
+
+/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.x b/gcc/testsuite/gcc.target/aarch64/mod_256.x
new file mode 100644
index 0000000..c1de42c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mod_256.x
@@ -0,0 +1,5 @@
+int
+f (int x)
+{
+  return x % 256;
+}
diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c b/gcc/testsuite/gcc.target/arm/mod_2.c
new file mode 100644
index 0000000..93017a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mod_2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "../aarch64/mod_2.x"
+
+/* { dg-final { scan-assembler "rsblt\tr\[0-9\]*" } } */
+/* { dg-final { scan-assembler-times "and\tr\[0-9\].*1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c b/gcc/testsuite/gcc.target/arm/mod_256.c
new file mode 100644
index 0000000..ccb7f3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mod_256.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+
+#include "../aarch64/mod_256.x"
+
+/* { dg-final { scan-assembler "rsbpl\tr\[0-9\]*" } } */
+

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-09-08  8:33 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-24 11:09 [PATCH][ARM][3/3] Expand mod by power of 2 Kyrill Tkachov
2015-07-31  9:00 ` Kyrill Tkachov
2015-08-10 11:14   ` Kyrill Tkachov
2015-08-19 12:51     ` Kyrill Tkachov
2015-09-01  8:38       ` Kyrill Tkachov
2015-09-07  9:46 ` Ramana Radhakrishnan
2015-09-08  8:35   ` Kyrill Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).