public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH: PR target/44072] Replace "cmp r0, -1" by "adds r0, 1" in  thumb2
@ 2010-05-12  8:43 Carrot Wei
  2010-05-13  7:29 ` Richard Earnshaw
  0 siblings, 1 reply; 8+ messages in thread
From: Carrot Wei @ 2010-05-12  8:43 UTC (permalink / raw)
  To: gcc-patches

Hi

Both "cmp r0, -1" and "adds r0, 1" have the same effect when used to set
conditions for following branch. But adds with small positive immediate is a
16 bit instruction and cmp with negative immediate is a 32 bit instruction. So
we prefer adds in thumb2.

Thumb1 already has an insn pattern "cbranchsi4_scratch" to do this, this patch
adds the corresponding insn pattern for thumb2.

Test:
Regression tests on arm qemu.

thanks
Guozhi


ChangeLog:
2010-05-12  Wei Guozhi  <carrot@google.com>

       PR target/44072
       * config/arm/thumb2.md (thumb2_cbranchsi4_scratch): New insn pattern.

ChangeLog:
2010-05-12  Wei Guozhi  <carrot@google.com>

       PR target/44072
       * gcc.target/arm/pr44072.c: New testcase.
	
	
Index: config/arm/thumb2.md
===================================================================
--- config/arm/thumb2.md        (revision 159251)
+++ config/arm/thumb2.md        (working copy)
@@ -1516,3 +1516,74 @@
                (const_int 8)
                (const_int 10)))))]
 )
+
+(define_insn "thumb2_cbranchsi4_scratch"
+  [(set (pc) (if_then_else
+             (match_operator 4 "arm_comparison_operator"
+              [(match_operand:SI 1 "s_register_operand" "l,?h,0")
+               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
+             (label_ref (match_operand 3 "" ""))
+             (pc)))
+   (clobber (match_scratch:SI 0 "=l,X,l"))]
+  "TARGET_THUMB2"
+  "*
+  if (which_alternative == 1)
+    {
+      output_asm_insn (\"cmp\\t%1, #%n2\", operands);
+      switch (get_attr_length (insn))
+       {
+         case 6:  return \"b%d4\\t%l3\";
+         case 8:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+       }
+    }
+  else
+    {
+      if (which_alternative == 0)
+       output_asm_insn (\"adds\\t%0, %1, #%n2\", operands);
+      else
+       output_asm_insn (\"adds\\t%0, #%n2\", operands);
+
+      switch (get_attr_length (insn))
+       {
+         case 4:  return \"b%d4\\t%l3\";
+         case 6:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+       }
+    }
+  "
+  [(set (attr "far_jump")
+       (if_then_else
+         (eq (symbol_ref ("which_alternative"))
+                         (const_int 1))
+         (if_then_else
+           (eq_attr "length" "10")
+           (const_string "yes")
+           (const_string "no"))
+         (if_then_else
+           (eq_attr "length" "8")
+           (const_string "yes")
+           (const_string "no"))))
+   (set (attr "length")
+       (if_then_else
+         (eq (symbol_ref ("which_alternative"))
+                         (const_int 1))
+         (if_then_else
+           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+                (le (minus (match_dup 3) (pc)) (const_int 256)))
+           (const_int 6)
+           (if_then_else
+               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
+               (const_int 8)
+               (const_int 10)))
+         (if_then_else
+           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+                (le (minus (match_dup 3) (pc)) (const_int 256)))
+           (const_int 4)
+           (if_then_else
+               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
+               (const_int 6)
+               (const_int 8)))))]
+)

Index: pr44072.c
===================================================================
--- pr44072.c   (revision 0)
+++ pr44072.c   (revision 0)
@@ -0,0 +1,9 @@
+/* { dg-options "-march=armv7-a -mthumb -Os" }  */
+/* { dg-final { scan-assembler "adds" } } */
+
+void foo1();
+void bar5(int x)
+{
+  if (x == -1)
+    foo1();
+}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH: PR target/44072] Replace "cmp r0, -1" by "adds r0, 1" in  thumb2
  2010-05-12  8:43 [PATCH: PR target/44072] Replace "cmp r0, -1" by "adds r0, 1" in thumb2 Carrot Wei
@ 2010-05-13  7:29 ` Richard Earnshaw
  2010-05-14  7:05   ` Carrot Wei
  2010-06-07  8:48   ` Carrot Wei
  0 siblings, 2 replies; 8+ messages in thread
From: Richard Earnshaw @ 2010-05-13  7:29 UTC (permalink / raw)
  To: Carrot Wei; +Cc: gcc-patches

On Wed, 2010-05-12 at 16:43 +0800, Carrot Wei wrote:
> Hi
> 
> Both "cmp r0, -1" and "adds r0, 1" have the same effect when used to set
> conditions for following branch. But adds with small positive immediate is a
> 16 bit instruction and cmp with negative immediate is a 32 bit instruction. So
> we prefer adds in thumb2.
> 
> Thumb1 already has an insn pattern "cbranchsi4_scratch" to do this, this patch
> adds the corresponding insn pattern for thumb2.
> 
> +
> +(define_insn "thumb2_cbranchsi4_scratch"
> +  [(set (pc) (if_then_else
> +             (match_operator 4 "arm_comparison_operator"
> +              [(match_operand:SI 1 "s_register_operand" "l,?h,0")
> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])

You should put the constrains in the order that they are preferred,
rather than trying to force the behaviour with '?'.  So the constraints
for op1 should be "l,0,h" and the rest of the pattern adjusted to match.

OK with that change.

R.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH: PR target/44072] Replace "cmp r0, -1" by "adds r0, 1" in  thumb2
  2010-05-13  7:29 ` Richard Earnshaw
@ 2010-05-14  7:05   ` Carrot Wei
  2010-06-07  8:48   ` Carrot Wei
  1 sibling, 0 replies; 8+ messages in thread
From: Carrot Wei @ 2010-05-14  7:05 UTC (permalink / raw)
  To: Richard Earnshaw; +Cc: gcc-patches

Hi Richard

I tried to remove the '?' sign and depend on the order of 'l,0,h'
only. But sometimes it doesn't work as expected.

For example in the case of (x == -20),  gcc generates

        mov     ip, r0
        cmp     ip, #-20
        bne     .L1
        ...

It is even worse than the default behavior because it introduces an
additional mov instruction.

My guess is because of the following insn, cmp instruction uses one
less register. It automatically boosts the priority of this choice. If
I replace 'X' with 'l' (for experiment purpose only), gcc can
correctly choose an adds instruction.

            (clobber (match_scratch:SI 0 "=l,l,X"))

So a '?' sign is still required. What's your opinion?

thanks
Guozhi

On Thu, May 13, 2010 at 3:28 PM, Richard Earnshaw <rearnsha@arm.com> wrote:
> On Wed, 2010-05-12 at 16:43 +0800, Carrot Wei wrote:
>> Hi
>>
>> Both "cmp r0, -1" and "adds r0, 1" have the same effect when used to set
>> conditions for following branch. But adds with small positive immediate is a
>> 16 bit instruction and cmp with negative immediate is a 32 bit instruction. So
>> we prefer adds in thumb2.
>>
>> Thumb1 already has an insn pattern "cbranchsi4_scratch" to do this, this patch
>> adds the corresponding insn pattern for thumb2.
>>
>> +
>> +(define_insn "thumb2_cbranchsi4_scratch"
>> +  [(set (pc) (if_then_else
>> +             (match_operator 4 "arm_comparison_operator"
>> +              [(match_operand:SI 1 "s_register_operand" "l,?h,0")
>> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
>
> You should put the constrains in the order that they are preferred,
> rather than trying to force the behaviour with '?'.  So the constraints
> for op1 should be "l,0,h" and the rest of the pattern adjusted to match.
>
> OK with that change.
>
> R.
>
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH: PR target/44072] Replace "cmp r0, -1" by "adds r0, 1" in  thumb2
  2010-05-13  7:29 ` Richard Earnshaw
  2010-05-14  7:05   ` Carrot Wei
@ 2010-06-07  8:48   ` Carrot Wei
  2010-06-10  7:04     ` Carrot Wei
  2010-06-20 11:08     ` Richard Earnshaw
  1 sibling, 2 replies; 8+ messages in thread
From: Carrot Wei @ 2010-06-07  8:48 UTC (permalink / raw)
  To: Richard Earnshaw; +Cc: gcc-patches

Hi

I've modified the patch a little. Please take another look.

1. Add code to clobber cc register.
2. Change the constraint "l,?h,0" to "l,0,h".

It should be noted that the change of constraint doesn't bring the expected
result. For the following simple code:

if (x == -8)
    foo1();
	
GCC generates:

       mov     ip, r0
       cmp     ip, #-8
       bne     .L1
       ...
	
It is even worse than current default behavior. Ian explained as:

"At first glance, I would say that the third alternative does not
require a scratch register whereas the second alternative does require
one.  Therefore, the second alternative costs more.  Scratch registers
are, rightly, expensive, because in the general case (though not in
this tiny case) they require spilling values onto the stack."

So gcc can't correctly model the costs of the second and third alternatives
in a situation without high register pressure. I will open a bug entry after
check in this patch.

In practice it is not a big problem. Because -1 is the most frequently compared
negative constant since it is usually used to indicate an error return value.
Testing with CSiBE confirms this. There are only less than 10 functions get
larger but more than 10 times number of functions get smaller. This patch also
passed regression testing on qemu.

thanks
Guozhi


ChangeLog:
2010-06-07  Wei Guozhi  <carrot@google.com>

        PR target/44072
        * config/arm/thumb2.md (thumb2_cbranchsi4_scratch): New insn pattern.

ChangeLog:
2010-06-07  Wei Guozhi  <carrot@google.com>

        PR target/44072
        * gcc.target/arm/pr44072.c: New testcase.

	
Index: thumb2.md
===================================================================
--- thumb2.md   (revision 160356)
+++ thumb2.md   (working copy)
@@ -1591,3 +1591,75 @@
                (const_int 8)
                (const_int 10)))))]
 )
+
+(define_insn "thumb2_cbranchsi4_scratch"
+  [(set (pc) (if_then_else
+             (match_operator 4 "arm_comparison_operator"
+              [(match_operand:SI 1 "s_register_operand" "l,0,h")
+               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
+             (label_ref (match_operand 3 "" ""))
+             (pc)))
+   (clobber (match_scratch:SI 0 "=l,l,X"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "*
+  if (which_alternative == 2)
+    {
+      output_asm_insn (\"cmp\\t%1, %2\", operands);
+      switch (get_attr_length (insn))
+       {
+         case 6:  return \"b%d4\\t%l3\";
+         case 8:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+       }
+    }
+  else
+    {
+      if (which_alternative == 0)
+       output_asm_insn (\"adds\\t%0, %1, #%n2\", operands);
+      else
+       output_asm_insn (\"adds\\t%0, #%n2\", operands);
+
+      switch (get_attr_length (insn))
+       {
+         case 4:  return \"b%d4\\t%l3\";
+         case 6:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+       }
+    }
+  "
+  [(set (attr "far_jump")
+       (if_then_else
+         (eq (symbol_ref ("which_alternative"))
+                         (const_int 2))
+         (if_then_else
+           (eq_attr "length" "10")
+           (const_string "yes")
+           (const_string "no"))
+         (if_then_else
+           (eq_attr "length" "8")
+           (const_string "yes")
+           (const_string "no"))))
+   (set (attr "length")
+       (if_then_else
+         (eq (symbol_ref ("which_alternative"))
+                         (const_int 2))
+         (if_then_else
+           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+                (le (minus (match_dup 3) (pc)) (const_int 256)))
+           (const_int 6)
+           (if_then_else
+               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
+               (const_int 8)
+               (const_int 10)))
+         (if_then_else
+           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+                (le (minus (match_dup 3) (pc)) (const_int 256)))
+           (const_int 4)
+           (if_then_else
+               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
+               (const_int 6)
+               (const_int 8)))))]
+)

Index: pr44072.c
===================================================================
--- pr44072.c   (revision 0)
+++ pr44072.c   (revision 0)
@@ -0,0 +1,9 @@
+/* { dg-options "-march=armv7-a -mthumb -Os" }  */
+/* { dg-final { scan-assembler "adds" } } */
+
+void foo1();
+void bar5(int x)
+{
+  if (x == -1)
+    foo1();
+}


On Thu, May 13, 2010 at 3:28 PM, Richard Earnshaw <rearnsha@arm.com> wrote:
> On Wed, 2010-05-12 at 16:43 +0800, Carrot Wei wrote:
>> Hi
>>
>> Both "cmp r0, -1" and "adds r0, 1" have the same effect when used to set
>> conditions for following branch. But adds with small positive immediate is a
>> 16 bit instruction and cmp with negative immediate is a 32 bit instruction. So
>> we prefer adds in thumb2.
>>
>> Thumb1 already has an insn pattern "cbranchsi4_scratch" to do this, this patch
>> adds the corresponding insn pattern for thumb2.
>>
>> +
>> +(define_insn "thumb2_cbranchsi4_scratch"
>> +  [(set (pc) (if_then_else
>> +             (match_operator 4 "arm_comparison_operator"
>> +              [(match_operand:SI 1 "s_register_operand" "l,?h,0")
>> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
>
> You should put the constrains in the order that they are preferred,
> rather than trying to force the behaviour with '?'.  So the constraints
> for op1 should be "l,0,h" and the rest of the pattern adjusted to match.
>
> OK with that change.
>
> R.
>
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH: PR target/44072] Replace "cmp r0, -1" by "adds r0, 1" in  thumb2
  2010-06-07  8:48   ` Carrot Wei
@ 2010-06-10  7:04     ` Carrot Wei
  2010-06-12  7:54       ` [PING] " Carrot Wei
  2010-06-20 11:08     ` Richard Earnshaw
  1 sibling, 1 reply; 8+ messages in thread
From: Carrot Wei @ 2010-06-10  7:04 UTC (permalink / raw)
  To: Richard Earnshaw; +Cc: gcc-patches

ping

On Mon, Jun 7, 2010 at 4:48 PM, Carrot Wei <carrot@google.com> wrote:
> Hi
>
> I've modified the patch a little. Please take another look.
>
> 1. Add code to clobber cc register.
> 2. Change the constraint "l,?h,0" to "l,0,h".
>
> It should be noted that the change of constraint doesn't bring the expected
> result. For the following simple code:
>
> if (x == -8)
>    foo1();
>
> GCC generates:
>
>       mov     ip, r0
>       cmp     ip, #-8
>       bne     .L1
>       ...
>
> It is even worse than current default behavior. Ian explained as:
>
> "At first glance, I would say that the third alternative does not
> require a scratch register whereas the second alternative does require
> one.  Therefore, the second alternative costs more.  Scratch registers
> are, rightly, expensive, because in the general case (though not in
> this tiny case) they require spilling values onto the stack."
>
> So gcc can't correctly model the costs of the second and third alternatives
> in a situation without high register pressure. I will open a bug entry after
> check in this patch.
>
> In practice it is not a big problem. Because -1 is the most frequently compared
> negative constant since it is usually used to indicate an error return value.
> Testing with CSiBE confirms this. There are only less than 10 functions get
> larger but more than 10 times number of functions get smaller. This patch also
> passed regression testing on qemu.
>
> thanks
> Guozhi
>
>
> ChangeLog:
> 2010-06-07  Wei Guozhi  <carrot@google.com>
>
>        PR target/44072
>        * config/arm/thumb2.md (thumb2_cbranchsi4_scratch): New insn pattern.
>
> ChangeLog:
> 2010-06-07  Wei Guozhi  <carrot@google.com>
>
>        PR target/44072
>        * gcc.target/arm/pr44072.c: New testcase.
>
>
> Index: thumb2.md
> ===================================================================
> --- thumb2.md   (revision 160356)
> +++ thumb2.md   (working copy)
> @@ -1591,3 +1591,75 @@
>                (const_int 8)
>                (const_int 10)))))]
>  )
> +
> +(define_insn "thumb2_cbranchsi4_scratch"
> +  [(set (pc) (if_then_else
> +             (match_operator 4 "arm_comparison_operator"
> +              [(match_operand:SI 1 "s_register_operand" "l,0,h")
> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
> +             (label_ref (match_operand 3 "" ""))
> +             (pc)))
> +   (clobber (match_scratch:SI 0 "=l,l,X"))
> +   (clobber (reg:CC CC_REGNUM))]
> +  "TARGET_THUMB2"
> +  "*
> +  if (which_alternative == 2)
> +    {
> +      output_asm_insn (\"cmp\\t%1, %2\", operands);
> +      switch (get_attr_length (insn))
> +       {
> +         case 6:  return \"b%d4\\t%l3\";
> +         case 8:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
> +         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
> +       }
> +    }
> +  else
> +    {
> +      if (which_alternative == 0)
> +       output_asm_insn (\"adds\\t%0, %1, #%n2\", operands);
> +      else
> +       output_asm_insn (\"adds\\t%0, #%n2\", operands);
> +
> +      switch (get_attr_length (insn))
> +       {
> +         case 4:  return \"b%d4\\t%l3\";
> +         case 6:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
> +         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
> +       }
> +    }
> +  "
> +  [(set (attr "far_jump")
> +       (if_then_else
> +         (eq (symbol_ref ("which_alternative"))
> +                         (const_int 2))
> +         (if_then_else
> +           (eq_attr "length" "10")
> +           (const_string "yes")
> +           (const_string "no"))
> +         (if_then_else
> +           (eq_attr "length" "8")
> +           (const_string "yes")
> +           (const_string "no"))))
> +   (set (attr "length")
> +       (if_then_else
> +         (eq (symbol_ref ("which_alternative"))
> +                         (const_int 2))
> +         (if_then_else
> +           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
> +                (le (minus (match_dup 3) (pc)) (const_int 256)))
> +           (const_int 6)
> +           (if_then_else
> +               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
> +                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
> +               (const_int 8)
> +               (const_int 10)))
> +         (if_then_else
> +           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
> +                (le (minus (match_dup 3) (pc)) (const_int 256)))
> +           (const_int 4)
> +           (if_then_else
> +               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
> +                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
> +               (const_int 6)
> +               (const_int 8)))))]
> +)
>
> Index: pr44072.c
> ===================================================================
> --- pr44072.c   (revision 0)
> +++ pr44072.c   (revision 0)
> @@ -0,0 +1,9 @@
> +/* { dg-options "-march=armv7-a -mthumb -Os" }  */
> +/* { dg-final { scan-assembler "adds" } } */
> +
> +void foo1();
> +void bar5(int x)
> +{
> +  if (x == -1)
> +    foo1();
> +}
>
>
> On Thu, May 13, 2010 at 3:28 PM, Richard Earnshaw <rearnsha@arm.com> wrote:
>> On Wed, 2010-05-12 at 16:43 +0800, Carrot Wei wrote:
>>> Hi
>>>
>>> Both "cmp r0, -1" and "adds r0, 1" have the same effect when used to set
>>> conditions for following branch. But adds with small positive immediate is a
>>> 16 bit instruction and cmp with negative immediate is a 32 bit instruction. So
>>> we prefer adds in thumb2.
>>>
>>> Thumb1 already has an insn pattern "cbranchsi4_scratch" to do this, this patch
>>> adds the corresponding insn pattern for thumb2.
>>>
>>> +
>>> +(define_insn "thumb2_cbranchsi4_scratch"
>>> +  [(set (pc) (if_then_else
>>> +             (match_operator 4 "arm_comparison_operator"
>>> +              [(match_operand:SI 1 "s_register_operand" "l,?h,0")
>>> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
>>
>> You should put the constrains in the order that they are preferred,
>> rather than trying to force the behaviour with '?'.  So the constraints
>> for op1 should be "l,0,h" and the rest of the pattern adjusted to match.
>>
>> OK with that change.
>>
>> R.
>>
>>
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PING] [PATCH: PR target/44072] Replace "cmp r0, -1" by "adds r0, 1"  in thumb2
  2010-06-10  7:04     ` Carrot Wei
@ 2010-06-12  7:54       ` Carrot Wei
  2010-06-18 20:12         ` Carrot Wei
  0 siblings, 1 reply; 8+ messages in thread
From: Carrot Wei @ 2010-06-12  7:54 UTC (permalink / raw)
  To: Richard Earnshaw; +Cc: gcc-patches

On Thu, Jun 10, 2010 at 2:30 PM, Carrot Wei <carrot@google.com> wrote:
> ping
>
> On Mon, Jun 7, 2010 at 4:48 PM, Carrot Wei <carrot@google.com> wrote:
>> Hi
>>
>> I've modified the patch a little. Please take another look.
>>
>> 1. Add code to clobber cc register.
>> 2. Change the constraint "l,?h,0" to "l,0,h".
>>
>> It should be noted that the change of constraint doesn't bring the expected
>> result. For the following simple code:
>>
>> if (x == -8)
>>    foo1();
>>
>> GCC generates:
>>
>>       mov     ip, r0
>>       cmp     ip, #-8
>>       bne     .L1
>>       ...
>>
>> It is even worse than current default behavior. Ian explained as:
>>
>> "At first glance, I would say that the third alternative does not
>> require a scratch register whereas the second alternative does require
>> one.  Therefore, the second alternative costs more.  Scratch registers
>> are, rightly, expensive, because in the general case (though not in
>> this tiny case) they require spilling values onto the stack."
>>
>> So gcc can't correctly model the costs of the second and third alternatives
>> in a situation without high register pressure. I will open a bug entry after
>> check in this patch.
>>
>> In practice it is not a big problem. Because -1 is the most frequently compared
>> negative constant since it is usually used to indicate an error return value.
>> Testing with CSiBE confirms this. There are only less than 10 functions get
>> larger but more than 10 times number of functions get smaller. This patch also
>> passed regression testing on qemu.
>>
>> thanks
>> Guozhi
>>
>>
>> ChangeLog:
>> 2010-06-07  Wei Guozhi  <carrot@google.com>
>>
>>        PR target/44072
>>        * config/arm/thumb2.md (thumb2_cbranchsi4_scratch): New insn pattern.
>>
>> ChangeLog:
>> 2010-06-07  Wei Guozhi  <carrot@google.com>
>>
>>        PR target/44072
>>        * gcc.target/arm/pr44072.c: New testcase.
>>
>>
>> Index: thumb2.md
>> ===================================================================
>> --- thumb2.md   (revision 160356)
>> +++ thumb2.md   (working copy)
>> @@ -1591,3 +1591,75 @@
>>                (const_int 8)
>>                (const_int 10)))))]
>>  )
>> +
>> +(define_insn "thumb2_cbranchsi4_scratch"
>> +  [(set (pc) (if_then_else
>> +             (match_operator 4 "arm_comparison_operator"
>> +              [(match_operand:SI 1 "s_register_operand" "l,0,h")
>> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
>> +             (label_ref (match_operand 3 "" ""))
>> +             (pc)))
>> +   (clobber (match_scratch:SI 0 "=l,l,X"))
>> +   (clobber (reg:CC CC_REGNUM))]
>> +  "TARGET_THUMB2"
>> +  "*
>> +  if (which_alternative == 2)
>> +    {
>> +      output_asm_insn (\"cmp\\t%1, %2\", operands);
>> +      switch (get_attr_length (insn))
>> +       {
>> +         case 6:  return \"b%d4\\t%l3\";
>> +         case 8:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
>> +         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
>> +       }
>> +    }
>> +  else
>> +    {
>> +      if (which_alternative == 0)
>> +       output_asm_insn (\"adds\\t%0, %1, #%n2\", operands);
>> +      else
>> +       output_asm_insn (\"adds\\t%0, #%n2\", operands);
>> +
>> +      switch (get_attr_length (insn))
>> +       {
>> +         case 4:  return \"b%d4\\t%l3\";
>> +         case 6:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
>> +         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
>> +       }
>> +    }
>> +  "
>> +  [(set (attr "far_jump")
>> +       (if_then_else
>> +         (eq (symbol_ref ("which_alternative"))
>> +                         (const_int 2))
>> +         (if_then_else
>> +           (eq_attr "length" "10")
>> +           (const_string "yes")
>> +           (const_string "no"))
>> +         (if_then_else
>> +           (eq_attr "length" "8")
>> +           (const_string "yes")
>> +           (const_string "no"))))
>> +   (set (attr "length")
>> +       (if_then_else
>> +         (eq (symbol_ref ("which_alternative"))
>> +                         (const_int 2))
>> +         (if_then_else
>> +           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
>> +                (le (minus (match_dup 3) (pc)) (const_int 256)))
>> +           (const_int 6)
>> +           (if_then_else
>> +               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
>> +                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
>> +               (const_int 8)
>> +               (const_int 10)))
>> +         (if_then_else
>> +           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
>> +                (le (minus (match_dup 3) (pc)) (const_int 256)))
>> +           (const_int 4)
>> +           (if_then_else
>> +               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
>> +                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
>> +               (const_int 6)
>> +               (const_int 8)))))]
>> +)
>>
>> Index: pr44072.c
>> ===================================================================
>> --- pr44072.c   (revision 0)
>> +++ pr44072.c   (revision 0)
>> @@ -0,0 +1,9 @@
>> +/* { dg-options "-march=armv7-a -mthumb -Os" }  */
>> +/* { dg-final { scan-assembler "adds" } } */
>> +
>> +void foo1();
>> +void bar5(int x)
>> +{
>> +  if (x == -1)
>> +    foo1();
>> +}
>>
>>
>> On Thu, May 13, 2010 at 3:28 PM, Richard Earnshaw <rearnsha@arm.com> wrote:
>>> On Wed, 2010-05-12 at 16:43 +0800, Carrot Wei wrote:
>>>> Hi
>>>>
>>>> Both "cmp r0, -1" and "adds r0, 1" have the same effect when used to set
>>>> conditions for following branch. But adds with small positive immediate is a
>>>> 16 bit instruction and cmp with negative immediate is a 32 bit instruction. So
>>>> we prefer adds in thumb2.
>>>>
>>>> Thumb1 already has an insn pattern "cbranchsi4_scratch" to do this, this patch
>>>> adds the corresponding insn pattern for thumb2.
>>>>
>>>> +
>>>> +(define_insn "thumb2_cbranchsi4_scratch"
>>>> +  [(set (pc) (if_then_else
>>>> +             (match_operator 4 "arm_comparison_operator"
>>>> +              [(match_operand:SI 1 "s_register_operand" "l,?h,0")
>>>> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
>>>
>>> You should put the constrains in the order that they are preferred,
>>> rather than trying to force the behaviour with '?'.  So the constraints
>>> for op1 should be "l,0,h" and the rest of the pattern adjusted to match.
>>>
>>> OK with that change.
>>>
>>> R.
>>>
>>>
>>
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PING] [PATCH: PR target/44072] Replace "cmp r0, -1" by "adds r0,  1" in thumb2
  2010-06-12  7:54       ` [PING] " Carrot Wei
@ 2010-06-18 20:12         ` Carrot Wei
  0 siblings, 0 replies; 8+ messages in thread
From: Carrot Wei @ 2010-06-18 20:12 UTC (permalink / raw)
  To: Richard Earnshaw; +Cc: gcc-patches

ping

On Sat, Jun 12, 2010 at 2:00 PM, Carrot Wei <carrot@google.com> wrote:
> On Thu, Jun 10, 2010 at 2:30 PM, Carrot Wei <carrot@google.com> wrote:
>> ping
>>
>> On Mon, Jun 7, 2010 at 4:48 PM, Carrot Wei <carrot@google.com> wrote:
>>> Hi
>>>
>>> I've modified the patch a little. Please take another look.
>>>
>>> 1. Add code to clobber cc register.
>>> 2. Change the constraint "l,?h,0" to "l,0,h".
>>>
>>> It should be noted that the change of constraint doesn't bring the expected
>>> result. For the following simple code:
>>>
>>> if (x == -8)
>>>    foo1();
>>>
>>> GCC generates:
>>>
>>>       mov     ip, r0
>>>       cmp     ip, #-8
>>>       bne     .L1
>>>       ...
>>>
>>> It is even worse than current default behavior. Ian explained as:
>>>
>>> "At first glance, I would say that the third alternative does not
>>> require a scratch register whereas the second alternative does require
>>> one.  Therefore, the second alternative costs more.  Scratch registers
>>> are, rightly, expensive, because in the general case (though not in
>>> this tiny case) they require spilling values onto the stack."
>>>
>>> So gcc can't correctly model the costs of the second and third alternatives
>>> in a situation without high register pressure. I will open a bug entry after
>>> check in this patch.
>>>
>>> In practice it is not a big problem. Because -1 is the most frequently compared
>>> negative constant since it is usually used to indicate an error return value.
>>> Testing with CSiBE confirms this. There are only less than 10 functions get
>>> larger but more than 10 times number of functions get smaller. This patch also
>>> passed regression testing on qemu.
>>>
>>> thanks
>>> Guozhi
>>>
>>>
>>> ChangeLog:
>>> 2010-06-07  Wei Guozhi  <carrot@google.com>
>>>
>>>        PR target/44072
>>>        * config/arm/thumb2.md (thumb2_cbranchsi4_scratch): New insn pattern.
>>>
>>> ChangeLog:
>>> 2010-06-07  Wei Guozhi  <carrot@google.com>
>>>
>>>        PR target/44072
>>>        * gcc.target/arm/pr44072.c: New testcase.
>>>
>>>
>>> Index: thumb2.md
>>> ===================================================================
>>> --- thumb2.md   (revision 160356)
>>> +++ thumb2.md   (working copy)
>>> @@ -1591,3 +1591,75 @@
>>>                (const_int 8)
>>>                (const_int 10)))))]
>>>  )
>>> +
>>> +(define_insn "thumb2_cbranchsi4_scratch"
>>> +  [(set (pc) (if_then_else
>>> +             (match_operator 4 "arm_comparison_operator"
>>> +              [(match_operand:SI 1 "s_register_operand" "l,0,h")
>>> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
>>> +             (label_ref (match_operand 3 "" ""))
>>> +             (pc)))
>>> +   (clobber (match_scratch:SI 0 "=l,l,X"))
>>> +   (clobber (reg:CC CC_REGNUM))]
>>> +  "TARGET_THUMB2"
>>> +  "*
>>> +  if (which_alternative == 2)
>>> +    {
>>> +      output_asm_insn (\"cmp\\t%1, %2\", operands);
>>> +      switch (get_attr_length (insn))
>>> +       {
>>> +         case 6:  return \"b%d4\\t%l3\";
>>> +         case 8:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
>>> +         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
>>> +       }
>>> +    }
>>> +  else
>>> +    {
>>> +      if (which_alternative == 0)
>>> +       output_asm_insn (\"adds\\t%0, %1, #%n2\", operands);
>>> +      else
>>> +       output_asm_insn (\"adds\\t%0, #%n2\", operands);
>>> +
>>> +      switch (get_attr_length (insn))
>>> +       {
>>> +         case 4:  return \"b%d4\\t%l3\";
>>> +         case 6:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
>>> +         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
>>> +       }
>>> +    }
>>> +  "
>>> +  [(set (attr "far_jump")
>>> +       (if_then_else
>>> +         (eq (symbol_ref ("which_alternative"))
>>> +                         (const_int 2))
>>> +         (if_then_else
>>> +           (eq_attr "length" "10")
>>> +           (const_string "yes")
>>> +           (const_string "no"))
>>> +         (if_then_else
>>> +           (eq_attr "length" "8")
>>> +           (const_string "yes")
>>> +           (const_string "no"))))
>>> +   (set (attr "length")
>>> +       (if_then_else
>>> +         (eq (symbol_ref ("which_alternative"))
>>> +                         (const_int 2))
>>> +         (if_then_else
>>> +           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
>>> +                (le (minus (match_dup 3) (pc)) (const_int 256)))
>>> +           (const_int 6)
>>> +           (if_then_else
>>> +               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
>>> +                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
>>> +               (const_int 8)
>>> +               (const_int 10)))
>>> +         (if_then_else
>>> +           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
>>> +                (le (minus (match_dup 3) (pc)) (const_int 256)))
>>> +           (const_int 4)
>>> +           (if_then_else
>>> +               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
>>> +                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
>>> +               (const_int 6)
>>> +               (const_int 8)))))]
>>> +)
>>>
>>> Index: pr44072.c
>>> ===================================================================
>>> --- pr44072.c   (revision 0)
>>> +++ pr44072.c   (revision 0)
>>> @@ -0,0 +1,9 @@
>>> +/* { dg-options "-march=armv7-a -mthumb -Os" }  */
>>> +/* { dg-final { scan-assembler "adds" } } */
>>> +
>>> +void foo1();
>>> +void bar5(int x)
>>> +{
>>> +  if (x == -1)
>>> +    foo1();
>>> +}
>>>
>>>
>>> On Thu, May 13, 2010 at 3:28 PM, Richard Earnshaw <rearnsha@arm.com> wrote:
>>>> On Wed, 2010-05-12 at 16:43 +0800, Carrot Wei wrote:
>>>>> Hi
>>>>>
>>>>> Both "cmp r0, -1" and "adds r0, 1" have the same effect when used to set
>>>>> conditions for following branch. But adds with small positive immediate is a
>>>>> 16 bit instruction and cmp with negative immediate is a 32 bit instruction. So
>>>>> we prefer adds in thumb2.
>>>>>
>>>>> Thumb1 already has an insn pattern "cbranchsi4_scratch" to do this, this patch
>>>>> adds the corresponding insn pattern for thumb2.
>>>>>
>>>>> +
>>>>> +(define_insn "thumb2_cbranchsi4_scratch"
>>>>> +  [(set (pc) (if_then_else
>>>>> +             (match_operator 4 "arm_comparison_operator"
>>>>> +              [(match_operand:SI 1 "s_register_operand" "l,?h,0")
>>>>> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
>>>>
>>>> You should put the constrains in the order that they are preferred,
>>>> rather than trying to force the behaviour with '?'.  So the constraints
>>>> for op1 should be "l,0,h" and the rest of the pattern adjusted to match.
>>>>
>>>> OK with that change.
>>>>
>>>> R.
>>>>
>>>>
>>>
>>
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH: PR target/44072] Replace "cmp r0, -1" by "adds r0, 1" in  thumb2
  2010-06-07  8:48   ` Carrot Wei
  2010-06-10  7:04     ` Carrot Wei
@ 2010-06-20 11:08     ` Richard Earnshaw
  1 sibling, 0 replies; 8+ messages in thread
From: Richard Earnshaw @ 2010-06-20 11:08 UTC (permalink / raw)
  To: Carrot Wei; +Cc: Richard Earnshaw, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 7233 bytes --]

On 07/06/10 09:48, Carrot Wei wrote:
> Hi
>
> I've modified the patch a little. Please take another look.
>
> 1. Add code to clobber cc register.
> 2. Change the constraint "l,?h,0" to "l,0,h".
>
> It should be noted that the change of constraint doesn't bring the expected
> result. For the following simple code:
>
> if (x == -8)
>      foo1();
> 	
> GCC generates:
>
>         mov     ip, r0
>         cmp     ip, #-8
>         bne     .L1
>         ...
> 	
> It is even worse than current default behavior. Ian explained as:
>
> "At first glance, I would say that the third alternative does not
> require a scratch register whereas the second alternative does require
> one.  Therefore, the second alternative costs more.  Scratch registers
> are, rightly, expensive, because in the general case (though not in
> this tiny case) they require spilling values onto the stack."
>
> So gcc can't correctly model the costs of the second and third alternatives
> in a situation without high register pressure. I will open a bug entry after
> check in this patch.
>
> In practice it is not a big problem. Because -1 is the most frequently compared
> negative constant since it is usually used to indicate an error return value.
> Testing with CSiBE confirms this. There are only less than 10 functions get
> larger but more than 10 times number of functions get smaller. This patch also
> passed regression testing on qemu.
>
> thanks
> Guozhi
>
>
> ChangeLog:
> 2010-06-07  Wei Guozhi<carrot@google.com>
>
>          PR target/44072
>          * config/arm/thumb2.md (thumb2_cbranchsi4_scratch): New insn pattern.
>
> ChangeLog:
> 2010-06-07  Wei Guozhi<carrot@google.com>
>
>          PR target/44072
>          * gcc.target/arm/pr44072.c: New testcase.
>
> 	
> Index: thumb2.md
> ===================================================================
> --- thumb2.md   (revision 160356)
> +++ thumb2.md   (working copy)
> @@ -1591,3 +1591,75 @@
>                  (const_int 8)
>                  (const_int 10)))))]
>   )
> +
> +(define_insn "thumb2_cbranchsi4_scratch"
> +  [(set (pc) (if_then_else
> +             (match_operator 4 "arm_comparison_operator"
> +              [(match_operand:SI 1 "s_register_operand" "l,0,h")
> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
> +             (label_ref (match_operand 3 "" ""))
> +             (pc)))
> +   (clobber (match_scratch:SI 0 "=l,l,X"))
> +   (clobber (reg:CC CC_REGNUM))]
> +  "TARGET_THUMB2"
> +  "*
> +  if (which_alternative == 2)
> +    {
> +      output_asm_insn (\"cmp\\t%1, %2\", operands);
> +      switch (get_attr_length (insn))
> +       {
> +         case 6:  return \"b%d4\\t%l3\";
> +         case 8:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
> +         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
> +       }
> +    }
> +  else
> +    {
> +      if (which_alternative == 0)
> +       output_asm_insn (\"adds\\t%0, %1, #%n2\", operands);
> +      else
> +       output_asm_insn (\"adds\\t%0, #%n2\", operands);
> +
> +      switch (get_attr_length (insn))
> +       {
> +         case 4:  return \"b%d4\\t%l3\";
> +         case 6:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
> +         default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
> +       }
> +    }
> +  "
> +  [(set (attr "far_jump")
> +       (if_then_else
> +         (eq (symbol_ref ("which_alternative"))
> +                         (const_int 2))
> +         (if_then_else
> +           (eq_attr "length" "10")
> +           (const_string "yes")
> +           (const_string "no"))
> +         (if_then_else
> +           (eq_attr "length" "8")
> +           (const_string "yes")
> +           (const_string "no"))))
> +   (set (attr "length")
> +       (if_then_else
> +         (eq (symbol_ref ("which_alternative"))
> +                         (const_int 2))
> +         (if_then_else
> +           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
> +                (le (minus (match_dup 3) (pc)) (const_int 256)))
> +           (const_int 6)
> +           (if_then_else
> +               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
> +                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
> +               (const_int 8)
> +               (const_int 10)))
> +         (if_then_else
> +           (and (ge (minus (match_dup 3) (pc)) (const_int -250))
> +                (le (minus (match_dup 3) (pc)) (const_int 256)))
> +           (const_int 4)
> +           (if_then_else
> +               (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
> +                    (le (minus (match_dup 3) (pc)) (const_int 2048)))
> +               (const_int 6)
> +               (const_int 8)))))]
> +)
>
> Index: pr44072.c
> ===================================================================
> --- pr44072.c   (revision 0)
> +++ pr44072.c   (revision 0)
> @@ -0,0 +1,9 @@
> +/* { dg-options "-march=armv7-a -mthumb -Os" }  */
> +/* { dg-final { scan-assembler "adds" } } */
> +
> +void foo1();
> +void bar5(int x)
> +{
> +  if (x == -1)
> +    foo1();
> +}
>
>
> On Thu, May 13, 2010 at 3:28 PM, Richard Earnshaw<rearnsha@arm.com>  wrote:
>> On Wed, 2010-05-12 at 16:43 +0800, Carrot Wei wrote:
>>> Hi
>>>
>>> Both "cmp r0, -1" and "adds r0, 1" have the same effect when used to set
>>> conditions for following branch. But adds with small positive immediate is a
>>> 16 bit instruction and cmp with negative immediate is a 32 bit instruction. So
>>> we prefer adds in thumb2.
>>>
>>> Thumb1 already has an insn pattern "cbranchsi4_scratch" to do this, this patch
>>> adds the corresponding insn pattern for thumb2.
>>>
>>> +
>>> +(define_insn "thumb2_cbranchsi4_scratch"
>>> +  [(set (pc) (if_then_else
>>> +             (match_operator 4 "arm_comparison_operator"
>>> +              [(match_operand:SI 1 "s_register_operand" "l,?h,0")
>>> +               (match_operand:SI 2 "thumb1_cmpneg_operand" "Pt,Ps,Ps")])
>>
>> You should put the constrains in the order that they are preferred,
>> rather than trying to force the behaviour with '?'.  So the constraints
>> for op1 should be "l,0,h" and the rest of the pattern adjusted to match.
>>
>> OK with that change.

My apologies for the delay responding to this, I've been pondering Ian's 
comments about the cost of adding an extra scratch register.  We really 
don't want to do that if there isn't one available for free (using CMN 
will be much cheaper).

I think the best way to handle that is the way we handle similar 
conversions to use 16-bit thumb instructions, namely to use peephole2. 
That runs after register allocation and will tell us quickly if we can 
either clobber the input operand, or if there's a suitable scratch 
register elsewhere that's available.

To that effect, I've just committed the attached patch.

R.

2010-06-19  Richard Earnshaw  <rearnsha@arm.com>

	PR target/44072
	* arm.md (cmpsi2_addneg): Prefer emitting adds to subs with a negative
	immediate.
	* constraints.md (Pw, Px): New constraints.
	* thumb2.md (cmpsi2_addneg peephole2): New peepholes.

	PR target/44072
	* gcc.target/arm/thumb2-cmpneg2add-1.c: New test.
	* gcc.target/arm/thumb2-cmpneg2add-2.c: New test.


[-- Attachment #2: cmn.patch --]
[-- Type: text/x-patch, Size: 4327 bytes --]

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 628bd62..1608929 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -737,14 +737,14 @@
   [(set (reg:CC CC_REGNUM)
 	(compare:CC
 	 (match_operand:SI 1 "s_register_operand" "r,r")
-	 (match_operand:SI 2 "arm_addimm_operand" "I,L")))
+	 (match_operand:SI 2 "arm_addimm_operand" "L,I")))
    (set (match_operand:SI 0 "s_register_operand" "=r,r")
 	(plus:SI (match_dup 1)
-		 (match_operand:SI 3 "arm_addimm_operand" "L,I")))]
+		 (match_operand:SI 3 "arm_addimm_operand" "I,L")))]
   "TARGET_32BIT && INTVAL (operands[2]) == -INTVAL (operands[3])"
   "@
-   sub%.\\t%0, %1, %2
-   add%.\\t%0, %1, #%n2"
+   add%.\\t%0, %1, %3
+   sub%.\\t%0, %1, #%n3"
   [(set_attr "conds" "set")]
 )
 
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 575d0ac..6d6c77d 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -31,7 +31,7 @@
 ;; The following multi-letter normal constraints have been used:
 ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy
 ;; in Thumb-1 state: Pa, Pb
-;; in Thumb-2 state: Ps, Pt, Pu, Pv
+;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
 
 ;; The following memory constraints have been used:
 ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -168,6 +168,16 @@
   (and (match_code "const_int")
        (match_test "TARGET_THUMB2 && ival >= -255 && ival <= 0")))
 
+(define_constraint "Pw"
+  "@internal In Thumb-2 state a constant in the range -255 to -1"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -255 && ival <= -1")))
+
+(define_constraint "Px"
+  "@internal In Thumb-2 state a constant in the range -7 to -1"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))
+
 (define_constraint "G"
  "In ARM/Thumb-2 state a valid FPA immediate constant."
  (and (match_code "const_double")
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 76a3b98..7045d14 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1231,6 +1231,32 @@
    (set_attr "length" "2")]
 )
 
+(define_peephole2
+  [(set (match_operand:CC 0 "cc_register" "")
+	(compare:CC (match_operand:SI 1 "low_register_operand" "")
+		    (match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_THUMB2
+   && peep2_reg_dead_p (1, operands[1])
+   && satisfies_constraint_Pw (operands[2])"
+  [(parallel
+    [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2)))
+     (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 3)))])]
+  "operands[3] = GEN_INT (- INTVAL (operands[2]));"
+)
+
+(define_peephole2
+  [(match_scratch:SI 3 "l")
+   (set (match_operand:CC 0 "cc_register" "")
+	(compare:CC (match_operand:SI 1 "low_register_operand" "")
+		    (match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_THUMB2
+   && satisfies_constraint_Px (operands[2])"
+  [(parallel
+    [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2)))
+     (set (match_dup 3) (plus:SI (match_dup 1) (match_dup 4)))])]
+  "operands[4] = GEN_INT (- INTVAL (operands[2]));"
+)
+
 (define_insn "*thumb2_addsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
diff --git a/gcc/testsuite/gcc.target/arm/thumb2-cmpneg2add-1.c b/gcc/testsuite/gcc.target/arm/thumb2-cmpneg2add-1.c
new file mode 100644
index 0000000..d75f13a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb2-cmpneg2add-1.c
@@ -0,0 +1,12 @@
+/* Use ADDS clobbering source operand, rather than CMN */
+/* { dg-options "-mthumb -Os" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-final { scan-assembler "adds" } } */
+/* { dg-final { scan-assembler-not "cmn" } } */
+
+void foo1(void);
+void bar5(int x)
+{
+  if (x == -15)
+    foo1();
+}
diff --git a/gcc/testsuite/gcc.target/arm/thumb2-cmpneg2add-2.c b/gcc/testsuite/gcc.target/arm/thumb2-cmpneg2add-2.c
new file mode 100644
index 0000000..358bc6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb2-cmpneg2add-2.c
@@ -0,0 +1,12 @@
+/* Use ADDS with a scratch, rather than CMN */
+/* { dg-options "-mthumb -Os" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-final { scan-assembler "adds" } } */
+/* { dg-final { scan-assembler-not "cmn" } } */
+
+void foo1(int);
+void bar5(int x)
+{
+  if (x == -1)
+    foo1(x);
+}

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2010-06-19 23:11 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-05-12  8:43 [PATCH: PR target/44072] Replace "cmp r0, -1" by "adds r0, 1" in thumb2 Carrot Wei
2010-05-13  7:29 ` Richard Earnshaw
2010-05-14  7:05   ` Carrot Wei
2010-06-07  8:48   ` Carrot Wei
2010-06-10  7:04     ` Carrot Wei
2010-06-12  7:54       ` [PING] " Carrot Wei
2010-06-18 20:12         ` Carrot Wei
2010-06-20 11:08     ` Richard Earnshaw

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).