* [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words @ 2010-08-22 6:49 Carrot Wei 2010-08-24 13:55 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-08-22 6:49 UTC (permalink / raw) To: gcc-patches Hi Current arm compiler can't merge the two consecutive load or store into ldrd or strd. This patch adds new patterns of ldrd and strd, and new peephole2 rules to do the optimization. This patch handles thumb2 instructions only. For arm instructions there are more constraints on the register usage, it's better to be handled before register allocation. This patch has been tested on qemu with thumb2 instructions. ChangeLog: 2010-08-22 Wei Guozhi <carrot@google.com> PR target/45335 * gcc/config/arm/thumb2.md (*thumb2_ldrd and peephole2): New insn pattern and related peephole2. (*thumb2_strd and peephole2): New insn pattern and related peephole2. * gcc/config/arm/arm.c (thumb2_ldrd_addr): New function. * gcc/config/arm/arm-protos.h (thumb2_ldrd_addr): New prototype. 2010-08-22 Wei Guozhi <carrot@google.com> PR target/45335 * gcc.target/arm/pr45335.c: New test. thanks Wei Guozhi Index: thumb2.md =================================================================== --- thumb2.md (revision 163363) +++ thumb2.md (working copy) @@ -1257,3 +1257,69 @@ " operands[2] = GEN_INT (32 - INTVAL (operands[2])); ") + +(define_insn "*thumb2_ldrd" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (match_operand:SI 2 "" ""))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (match_operand:SI 3 "" "")))])] + "TARGET_THUMB2 && + thumb2_ldrd_addr (operands[0], operands[1], operands[2], operands[3], 1)" + "* + { + rtx ldrd_addr = thumb2_ldrd_addr (operands[0], operands[1], + operands[2], operands[3], 1); + operands[4] = gen_rtx_MEM (SImode, ldrd_addr); + if (ldrd_addr == operands[3]) + return \"ldrd\\t%1, %0, %4\"; + else + return \"ldrd\\t%0, %1, %4\"; + }" +) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (match_operand:SI 2 "" ""))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (match_operand:SI 3 "" "")))] + "TARGET_THUMB2 && + thumb2_ldrd_addr (operands[0], operands[1], operands[2], operands[3], 1)" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (match_operand:SI 2 "" ""))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (match_operand:SI 3 "" "")))])] + "" +) + +(define_insn "*thumb2_strd" + [(parallel [(set (mem:SI (match_operand:SI 2 "" "")) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (match_operand:SI 3 "" "")) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && + thumb2_ldrd_addr (operands[0], operands[1], operands[2], operands[3], 0)" + "* + { + rtx strd_addr = thumb2_ldrd_addr (operands[0], operands[1], + operands[2], operands[3], 0); + operands[4] = gen_rtx_MEM (SImode, strd_addr); + if (strd_addr == operands[3]) + return \"strd\\t%1, %0, %4\"; + else + return \"strd\\t%0, %1, %4\"; + }" +) + +(define_peephole2 + [(set (mem:SI (match_operand:SI 2 "" "")) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (match_operand:SI 3 "" "")) + (match_operand:SI 1 "s_register_operand" ""))] + "TARGET_THUMB2 && + thumb2_ldrd_addr (operands[0], operands[1], operands[2], operands[3], 0)" + [(parallel [(set (mem:SI (match_operand:SI 2 "" "")) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (match_operand:SI 3 "" "")) + (match_operand:SI 1 "s_register_operand" ""))])] + "" +) Index: arm.c =================================================================== --- arm.c (revision 163363) +++ arm.c (working copy) @@ -22959,4 +22959,76 @@ arm_expand_sync (enum machine_mode mode, } } +/* Check if the two memory addresses can be accessed by an ldrd instruction. + That is they use the same base register, and the gap between constant + offsets should be 4. It can also be used for strd instruction. + If so return the lower address, otherwise return NULL. */ +rtx +thumb2_ldrd_addr (rtx dest1, rtx dest2, rtx addr1, rtx addr2, bool ldrd) +{ + rtx reg1, reg2, op0, op1; + rtx addr = NULL; + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + switch (GET_CODE (addr1)) + { + case REG: + reg1 = addr1; + break; + + case PLUS: + op0 = XEXP (addr1, 0); + op1 = XEXP (addr1, 1); + if ((GET_CODE (op0) != REG) || (GET_CODE (op1) != CONST_INT)) + return NULL; + reg1 = op0; + offset1 = INTVAL (op1); + break; + + default: + return NULL; + } + + switch (GET_CODE (addr2)) + { + case REG: + reg2 = addr2; + break; + + case PLUS: + op0 = XEXP (addr2, 0); + op1 = XEXP (addr2, 1); + if ((GET_CODE (op0) != REG) || (GET_CODE (op1) != CONST_INT)) + return NULL; + reg2 = op0; + offset2 = INTVAL (op1); + break; + + default: + return NULL; + } + + if (reg1 != reg2) + return NULL; + + if (ldrd && ((dest1 == dest2) || (dest1 == reg1))) + return NULL; + + if ((offset1 + 4) == offset2) + addr = addr1; + else if ((offset2 + 4) == offset1) + { + addr = addr2; + offset1 = offset2; + } + else + return NULL; + + if (((offset1 % 4) != 0) || (offset1 > 1020) || (offset1 < -1020)) + return NULL; + + return addr; +} + #include "gt-arm.h" Index: arm-protos.h =================================================================== --- arm-protos.h (revision 163363) +++ arm-protos.h (working copy) @@ -149,7 +149,7 @@ extern void arm_expand_sync (enum machin extern const char *arm_output_memory_barrier (rtx *); extern const char *arm_output_sync_insn (rtx, rtx *); extern unsigned int arm_sync_loop_insns (rtx , rtx *); - +extern rtx thumb2_ldrd_addr (rtx, rtx, rtx, rtx, bool); extern bool arm_output_addr_const_extra (FILE *, rtx); #if defined TREE_CODE Index: pr45335.c =================================================================== --- pr45335.c (revision 0) +++ pr45335.c (revision 0) @@ -0,0 +1,20 @@ +/* { dg-options "-mthumb -O2" } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-final { scan-assembler "ldrd" } } */ +/* { dg-final { scan-assembler "strd" } } */ + +struct S +{ + void* p1; + void* p2; + void* p3; + void* p4; +}; + +void foo1(struct S* fp, struct S* otherSaveArea) +{ + struct S* saveA = fp - 1; + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); +} ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-08-22 6:49 [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words Carrot Wei @ 2010-08-24 13:55 ` Carrot Wei 2010-08-24 14:14 ` Ramana Radhakrishnan 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-08-24 13:55 UTC (permalink / raw) To: gcc-patches The patterns in original patch conflict with ldm2/stm2 patterns. In thumb2 ldrd/strd instructions are more flexible than ldm2/stm2, we don't have any reason to continue to use ldm2/stm2. In this new patch I removed the thumb2 support of ldm2/stm2. The ldm2/stm2 with update patterns are not affected. It passed testing on arm qemu. thanks Wei Guozhi ChangeLog: 2010-08-24 Wei Guozhi <carrot@google.com> PR target/45335 * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, thumb2_ldrd_reg2 and peephole2): New insn pattern and related peephole2. (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): New insn pattern and related peephole2. * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. (thumb2_check_ldrd_operands): New function. * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. (thumb2_check_ldrd_operands): New prototype. * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): Change the ldm/stm patterns with 2 words to ARM only. * gcc/config/arm/constraints.md (Py): New thumb2 constant constraint suitable to ldrd/strd instructions. 2010-08-24 Wei Guozhi <carrot@google.com> PR target/45335 * gcc.target/arm/pr45335.c: New test. * gcc.target/arm/pr40457-1.c: Changed to load 3 words. * gcc.target/arm/pr40457-2.c: Changed to store 3 words. Index: thumb2.md =================================================================== --- thumb2.md (revision 163363) +++ thumb2.md (working copy) @@ -1257,3 +1257,147 @@ " operands[2] = GEN_INT (32 - INTVAL (operands[2])); ") + +(define_insn "*thumb2_ldrd" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "Py")))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" "Py"))))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], operands[3], operands[4], 1)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + HOST_WIDE_INT offset2 = INTVAL (operands[4]); + if (offset1 < offset2 ) + return \"ldrd\\t%0, %1, [%2, %3]\"; + else + return \"ldrd\\t%1, %0, [%2, %4]\"; + }" +) + +(define_insn "*thumb2_ldrd_reg1" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" "Py"))))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], 0, operands[3], 1)" + "* + { + HOST_WIDE_INT offset2 = INTVAL (operands[3]); + if (offset2 == 4) + return \"ldrd\\t%0, %1, [%2]\"; + else + return \"ldrd\\t%1, %0, [%2, %3]\"; + }" +) + +(define_insn "*thumb2_ldrd_reg2" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "Py")))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (match_dup 2)))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], operands[3], 0, 1)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + if (offset1 == -4) + return \"ldrd\\t%0, %1, [%2, %3]\"; + else + return \"ldrd\\t%1, %0, [%2]\"; + }" +) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], 1)" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))])] + "" +) + +(define_insn "*thumb2_strd" + [(parallel [(set (mem:SI + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "Py"))) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" "Py"))) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], operands[3], operands[4], 0)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + HOST_WIDE_INT offset2 = INTVAL (operands[4]); + if (offset1 < offset2 ) + return \"strd\\t%0, %1, [%2, %3]\"; + else + return \"strd\\t%1, %0, [%2, %4]\"; + }" +) + +(define_insn "*thumb2_strd_reg1" + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" "Py"))) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], 0, operands[3], 0)" + "* + { + HOST_WIDE_INT offset2 = INTVAL (operands[3]); + if (offset2 == 4) + return \"strd\\t%0, %1, [%2]\"; + else + return \"strd\\t%1, %0, [%2, %3]\"; + }" +) + +(define_insn "*thumb2_strd_reg2" + [(parallel [(set (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "Py"))) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (match_dup 2)) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], operands[3], 0, 0)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + if (offset1 == -4) + return \"strd\\t%0, %1, [%2, %3]\"; + else + return \"strd\\t%1, %0, [%2]\"; + }" +) + +(define_peephole2 + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))] + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], 0)" + [(parallel [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))])] + "" +) Index: arm.c =================================================================== --- arm.c (revision 163363) +++ arm.c (working copy) @@ -22959,4 +22959,85 @@ arm_expand_sync (enum machine_mode mode, } } +/* Check the legality of operands in an ldrd/strd instruction. */ +bool +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, + rtx off1, rtx off2, bool ldrd) +{ + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (off1 != NULL) + offset1 = INTVAL (off1); + if (off2 != NULL) + offset2 = INTVAL (off2); + + if (ldrd && (reg1 == reg2)) + return false; + + if ((offset1 + 4) == offset2) + return true; + if ((offset2 + 4) == offset1) + return true; + + return false; +} + +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. + That is they use the same base register, and the gap between constant + offsets should be 4. */ +bool +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) +{ + rtx base1, base2, op1; + rtx addr1 = XEXP (mem1, 0); + rtx addr2 = XEXP (mem2, 0); + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (REG_P (addr1)) + base1 = addr1; + else if (GET_CODE (addr1) == PLUS) + { + base1 = XEXP (addr1, 0); + op1 = XEXP (addr1, 1); + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) + return false; + offset1 = INTVAL (op1); + } + else + return false; + + if (REG_P (addr2)) + base2 = addr2; + else if (GET_CODE (addr2) == PLUS) + { + base2 = XEXP (addr2, 0); + op1 = XEXP (addr2, 1); + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) + return false; + offset2 = INTVAL (op1); + } + else + return false; + + if (base1 != base2) + return false; + + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) + return false; + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) + return false; + + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) + return false; + + if ((offset1 + 4) == offset2) + return true; + if ((offset2 + 4) == offset1) + return true; + + return false; +} + #include "gt-arm.h" Index: arm-protos.h =================================================================== --- arm-protos.h (revision 163363) +++ arm-protos.h (working copy) @@ -149,7 +149,8 @@ extern void arm_expand_sync (enum machin extern const char *arm_output_memory_barrier (rtx *); extern const char *arm_output_sync_insn (rtx, rtx *); extern unsigned int arm_sync_loop_insns (rtx , rtx *); - +extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool); +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); extern bool arm_output_addr_const_extra (FILE *, rtx); #if defined TREE_CODE Index: ldmstm.md =================================================================== --- ldmstm.md (revision 163363) +++ ldmstm.md (working copy) @@ -852,7 +852,7 @@ (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int 4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "ldm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -901,7 +901,7 @@ (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "stm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -1041,7 +1041,7 @@ (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int -4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "ldm%(db%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -1067,7 +1067,7 @@ (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "stm%(db%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) Index: constraints.md =================================================================== --- constraints.md (revision 163363) +++ constraints.md (working copy) @@ -31,7 +31,7 @@ ;; The following multi-letter normal constraints have been used: ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di ;; in Thumb-1 state: Pa, Pb, Pc, Pd -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px +;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py ;; The following memory constraints have been used: ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us @@ -189,6 +189,13 @@ (and (match_code "const_int") (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) +(define_constraint "Py" + "@internal In Thumb-2 state a constant that is a multiple of 4 in the + range -1020 to 1024" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024 + && (ival & 3) == 0"))) + (define_constraint "G" "In ARM/Thumb-2 state a valid FPA immediate constant." (and (match_code "const_double") Index: pr40457-1.c =================================================================== --- pr40457-1.c (revision 163363) +++ pr40457-1.c (working copy) @@ -1,9 +1,9 @@ -/* { dg-options "-Os" } */ +/* { dg-options "-O2" } */ /* { dg-do compile } */ int bar(int* p) { - int x = p[0] + p[1]; + int x = p[0] + p[1] + p[2]; return x; } Index: pr40457-2.c =================================================================== --- pr40457-2.c (revision 163363) +++ pr40457-2.c (working copy) @@ -5,6 +5,7 @@ void foo(int* p) { p[0] = 1; p[1] = 0; + p[2] = 2; } /* { dg-final { scan-assembler "stm" } } */ Index: pr45335.c =================================================================== --- pr45335.c (revision 0) +++ pr45335.c (revision 0) @@ -0,0 +1,20 @@ +/* { dg-options "-mthumb -O2" } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-final { scan-assembler "ldrd" } } */ +/* { dg-final { scan-assembler "strd" } } */ + +struct S +{ + void* p1; + void* p2; + void* p3; + void* p4; +}; + +void foo1(struct S* fp, struct S* otherSaveArea) +{ + struct S* saveA = fp - 1; + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); +} ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-08-24 13:55 ` Carrot Wei @ 2010-08-24 14:14 ` Ramana Radhakrishnan 2010-08-25 10:02 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Ramana Radhakrishnan @ 2010-08-24 14:14 UTC (permalink / raw) To: Carrot Wei; +Cc: gcc-patches On Tue, 2010-08-24 at 21:09 +0800, Carrot Wei wrote: > The patterns in original patch conflict with ldm2/stm2 patterns. In thumb2 > ldrd/strd instructions are more flexible than ldm2/stm2, we don't have any > reason to continue to use ldm2/stm2. In this new patch I removed the thumb2 > support of ldm2/stm2. The ldm2/stm2 with update patterns are not affected. I can't approve / reject your patch but ... You need to consider the case for the Cortex M3 where we don't want to use ldrd's with overlapping address and destination registers because it may trigger a hardware erratum, thus you need to allow ldm2's on the M3 and *not* generate any ldrd's in such situations. Look at the implementation of -mfix-cortexm3-ldrd Also it's not clear which patterns in ldmstm.md you are modifying without applying the patch , could you regenerate your patch with svn diff --diff-cmd "diff" -x "-aup -F^(define" so that we know easily which patterns you are modifying. Thanks, Ramana > > It passed testing on arm qemu. > > thanks > Wei Guozhi > > > ChangeLog: > 2010-08-24 Wei Guozhi <carrot@google.com> > > PR target/45335 > * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, > thumb2_ldrd_reg2 and peephole2): New insn pattern and related > peephole2. > (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): > New insn pattern and related peephole2. > * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. > (thumb2_check_ldrd_operands): New function. > * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. > (thumb2_check_ldrd_operands): New prototype. > * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): > Change the ldm/stm patterns with 2 words to ARM only. > * gcc/config/arm/constraints.md (Py): New thumb2 constant constraint > suitable to ldrd/strd instructions. > > > 2010-08-24 Wei Guozhi <carrot@google.com> > > PR target/45335 > * gcc.target/arm/pr45335.c: New test. > * gcc.target/arm/pr40457-1.c: Changed to load 3 words. > * gcc.target/arm/pr40457-2.c: Changed to store 3 words. > > > Index: thumb2.md > =================================================================== > --- thumb2.md (revision 163363) > +++ thumb2.md (working copy) > @@ -1257,3 +1257,147 @@ > " > operands[2] = GEN_INT (32 - INTVAL (operands[2])); > ") > + > +(define_insn "*thumb2_ldrd" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (mem:SI (plus:SI > + (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "Py")))) > + (set (match_operand:SI 1 "s_register_operand" "") > + (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 4 "const_int_operand" "Py"))))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], operands[3], operands[4], 1)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + HOST_WIDE_INT offset2 = INTVAL (operands[4]); > + if (offset1 < offset2 ) > + return \"ldrd\\t%0, %1, [%2, %3]\"; > + else > + return \"ldrd\\t%1, %0, [%2, %4]\"; > + }" > +) > + > +(define_insn "*thumb2_ldrd_reg1" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) > + (set (match_operand:SI 1 "s_register_operand" "") > + (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 3 "const_int_operand" "Py"))))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], 0, operands[3], 1)" > + "* > + { > + HOST_WIDE_INT offset2 = INTVAL (operands[3]); > + if (offset2 == 4) > + return \"ldrd\\t%0, %1, [%2]\"; > + else > + return \"ldrd\\t%1, %0, [%2, %3]\"; > + }" > +) > + > +(define_insn "*thumb2_ldrd_reg2" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (mem:SI (plus:SI > + (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "Py")))) > + (set (match_operand:SI 1 "s_register_operand" "") > + (mem:SI (match_dup 2)))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], operands[3], 0, 1)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + if (offset1 == -4) > + return \"ldrd\\t%0, %1, [%2, %3]\"; > + else > + return \"ldrd\\t%1, %0, [%2]\"; > + }" > +) > + > +(define_peephole2 > + [(set (match_operand:SI 0 "s_register_operand" "") > + (match_operand:SI 2 "memory_operand" "")) > + (set (match_operand:SI 1 "s_register_operand" "") > + (match_operand:SI 3 "memory_operand" ""))] > + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], > + operands[2], operands[3], 1)" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (match_operand:SI 2 "memory_operand" "")) > + (set (match_operand:SI 1 "s_register_operand" "") > + (match_operand:SI 3 "memory_operand" ""))])] > + "" > +) > + > +(define_insn "*thumb2_strd" > + [(parallel [(set (mem:SI > + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "Py"))) > + (match_operand:SI 0 "s_register_operand" "")) > + (set (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 4 "const_int_operand" "Py"))) > + (match_operand:SI 1 "s_register_operand" ""))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], operands[3], operands[4], 0)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + HOST_WIDE_INT offset2 = INTVAL (operands[4]); > + if (offset1 < offset2 ) > + return \"strd\\t%0, %1, [%2, %3]\"; > + else > + return \"strd\\t%1, %0, [%2, %4]\"; > + }" > +) > + > +(define_insn "*thumb2_strd_reg1" > + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) > + (match_operand:SI 0 "s_register_operand" "")) > + (set (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 3 "const_int_operand" "Py"))) > + (match_operand:SI 1 "s_register_operand" ""))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], 0, operands[3], 0)" > + "* > + { > + HOST_WIDE_INT offset2 = INTVAL (operands[3]); > + if (offset2 == 4) > + return \"strd\\t%0, %1, [%2]\"; > + else > + return \"strd\\t%1, %0, [%2, %3]\"; > + }" > +) > + > +(define_insn "*thumb2_strd_reg2" > + [(parallel [(set (mem:SI (plus:SI > + (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "Py"))) > + (match_operand:SI 0 "s_register_operand" "")) > + (set (mem:SI (match_dup 2)) > + (match_operand:SI 1 "s_register_operand" ""))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], operands[3], 0, 0)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + if (offset1 == -4) > + return \"strd\\t%0, %1, [%2, %3]\"; > + else > + return \"strd\\t%1, %0, [%2]\"; > + }" > +) > + > +(define_peephole2 > + [(set (match_operand:SI 2 "memory_operand" "") > + (match_operand:SI 0 "s_register_operand" "")) > + (set (match_operand:SI 3 "memory_operand" "") > + (match_operand:SI 1 "s_register_operand" ""))] > + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], > + operands[2], operands[3], 0)" > + [(parallel [(set (match_operand:SI 2 "memory_operand" "") > + (match_operand:SI 0 "s_register_operand" "")) > + (set (match_operand:SI 3 "memory_operand" "") > + (match_operand:SI 1 "s_register_operand" ""))])] > + "" > +) > Index: arm.c > =================================================================== > --- arm.c (revision 163363) > +++ arm.c (working copy) > @@ -22959,4 +22959,85 @@ arm_expand_sync (enum machine_mode mode, > } > } > > +/* Check the legality of operands in an ldrd/strd instruction. */ > +bool > +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, > + rtx off1, rtx off2, bool ldrd) > +{ > + HOST_WIDE_INT offset1 = 0; > + HOST_WIDE_INT offset2 = 0; > + > + if (off1 != NULL) > + offset1 = INTVAL (off1); > + if (off2 != NULL) > + offset2 = INTVAL (off2); > + > + if (ldrd && (reg1 == reg2)) > + return false; > + > + if ((offset1 + 4) == offset2) > + return true; > + if ((offset2 + 4) == offset1) > + return true; > + > + return false; > +} > + > +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. > + That is they use the same base register, and the gap between constant > + offsets should be 4. */ > +bool > +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) > +{ > + rtx base1, base2, op1; > + rtx addr1 = XEXP (mem1, 0); > + rtx addr2 = XEXP (mem2, 0); > + HOST_WIDE_INT offset1 = 0; > + HOST_WIDE_INT offset2 = 0; > + > + if (REG_P (addr1)) > + base1 = addr1; > + else if (GET_CODE (addr1) == PLUS) > + { > + base1 = XEXP (addr1, 0); > + op1 = XEXP (addr1, 1); > + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) > + return false; > + offset1 = INTVAL (op1); > + } > + else > + return false; > + > + if (REG_P (addr2)) > + base2 = addr2; > + else if (GET_CODE (addr2) == PLUS) > + { > + base2 = XEXP (addr2, 0); > + op1 = XEXP (addr2, 1); > + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) > + return false; > + offset2 = INTVAL (op1); > + } > + else > + return false; > + > + if (base1 != base2) > + return false; > + > + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) > + return false; > + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) > + return false; > + > + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) > + return false; > + > + if ((offset1 + 4) == offset2) > + return true; > + if ((offset2 + 4) == offset1) > + return true; > + > + return false; > +} > + > #include "gt-arm.h" > Index: arm-protos.h > =================================================================== > --- arm-protos.h (revision 163363) > +++ arm-protos.h (working copy) > @@ -149,7 +149,8 @@ extern void arm_expand_sync (enum machin > extern const char *arm_output_memory_barrier (rtx *); > extern const char *arm_output_sync_insn (rtx, rtx *); > extern unsigned int arm_sync_loop_insns (rtx , rtx *); > - > +extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool); > +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); > extern bool arm_output_addr_const_extra (FILE *, rtx); > > #if defined TREE_CODE > Index: ldmstm.md > =================================================================== > --- ldmstm.md (revision 163363) > +++ ldmstm.md (working copy) > @@ -852,7 +852,7 @@ > (set (match_operand:SI 2 "arm_hard_register_operand" "") > (mem:SI (plus:SI (match_dup 3) > (const_int 4))))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "ldm%(ia%)\t%3, {%1, %2}" > [(set_attr "type" "load2") > (set_attr "predicable" "yes")]) > @@ -901,7 +901,7 @@ > (match_operand:SI 1 "arm_hard_register_operand" "")) > (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) > (match_operand:SI 2 "arm_hard_register_operand" ""))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "stm%(ia%)\t%3, {%1, %2}" > [(set_attr "type" "store2") > (set_attr "predicable" "yes")]) > @@ -1041,7 +1041,7 @@ > (set (match_operand:SI 2 "arm_hard_register_operand" "") > (mem:SI (plus:SI (match_dup 3) > (const_int -4))))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "ldm%(db%)\t%3, {%1, %2}" > [(set_attr "type" "load2") > (set_attr "predicable" "yes")]) > @@ -1067,7 +1067,7 @@ > (match_operand:SI 1 "arm_hard_register_operand" "")) > (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) > (match_operand:SI 2 "arm_hard_register_operand" ""))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "stm%(db%)\t%3, {%1, %2}" > [(set_attr "type" "store2") > (set_attr "predicable" "yes")]) > Index: constraints.md > =================================================================== > --- constraints.md (revision 163363) > +++ constraints.md (working copy) > @@ -31,7 +31,7 @@ > ;; The following multi-letter normal constraints have been used: > ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di > ;; in Thumb-1 state: Pa, Pb, Pc, Pd > -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px > +;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py > > ;; The following memory constraints have been used: > ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us > @@ -189,6 +189,13 @@ > (and (match_code "const_int") > (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) > > +(define_constraint "Py" > + "@internal In Thumb-2 state a constant that is a multiple of 4 in the > + range -1020 to 1024" > + (and (match_code "const_int") > + (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024 > + && (ival & 3) == 0"))) > + > (define_constraint "G" > "In ARM/Thumb-2 state a valid FPA immediate constant." > (and (match_code "const_double") > > > > Index: pr40457-1.c > =================================================================== > --- pr40457-1.c (revision 163363) > +++ pr40457-1.c (working copy) > @@ -1,9 +1,9 @@ > -/* { dg-options "-Os" } */ > +/* { dg-options "-O2" } */ > /* { dg-do compile } */ > > int bar(int* p) > { > - int x = p[0] + p[1]; > + int x = p[0] + p[1] + p[2]; > return x; > } > > Index: pr40457-2.c > =================================================================== > --- pr40457-2.c (revision 163363) > +++ pr40457-2.c (working copy) > @@ -5,6 +5,7 @@ void foo(int* p) > { > p[0] = 1; > p[1] = 0; > + p[2] = 2; > } > > /* { dg-final { scan-assembler "stm" } } */ > Index: pr45335.c > =================================================================== > --- pr45335.c (revision 0) > +++ pr45335.c (revision 0) > @@ -0,0 +1,20 @@ > +/* { dg-options "-mthumb -O2" } */ > +/* { dg-require-effective-target arm_thumb2_ok } */ > +/* { dg-final { scan-assembler "ldrd" } } */ > +/* { dg-final { scan-assembler "strd" } } */ > + > +struct S > +{ > + void* p1; > + void* p2; > + void* p3; > + void* p4; > +}; > + > +void foo1(struct S* fp, struct S* otherSaveArea) > +{ > + struct S* saveA = fp - 1; > + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); > + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", > + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); > +} ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-08-24 14:14 ` Ramana Radhakrishnan @ 2010-08-25 10:02 ` Carrot Wei 2010-09-01 15:25 ` Richard Earnshaw 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-08-25 10:02 UTC (permalink / raw) To: ramana.radhakrishnan; +Cc: gcc-patches Thank you, following is the revised patch that addresses cortex M3 erratum and new diff for ldmstm.md. Index: thumb2.md =================================================================== --- thumb2.md (revision 163363) +++ thumb2.md (working copy) @@ -1257,3 +1257,205 @@ (define_peephole2 " operands[2] = GEN_INT (32 - INTVAL (operands[2])); ") + +(define_insn "*thumb2_ldrd" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "Py")))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" "Py"))))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], operands[3], operands[4], 1)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + HOST_WIDE_INT offset2 = INTVAL (operands[4]); + if (offset1 > offset2) + { + /* Swap the operands so that memory [base+offset1] is loaded into + operands[0]. */ + rtx tmp = operands[0]; + operands[0] = operands[1]; + operands[1] = tmp; + tmp = operands[3]; + operands[3] = operands[4]; + operands[4] = tmp; + offset1 = INTVAL (operands[3]); + offset2 = INTVAL (operands[4]); + } + if (fix_cm3_ldrd && (operands[2] == operands[0])) + { + if (offset1 <= -256) + { + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); + output_asm_insn (\"ldr\\t%0, [%2]\", operands); + } + else + { + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); + } + return \"\"; + } + else + return \"ldrd\\t%0, %1, [%2, %3]\"; + }" +) + +(define_insn "*thumb2_ldrd_reg1" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" "Py"))))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], 0, operands[3], 1)" + "* + { + HOST_WIDE_INT offset2 = INTVAL (operands[3]); + if (offset2 == 4) + { + if (fix_cm3_ldrd && (operands[2] == operands[0])) + { + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); + output_asm_insn (\"ldr\\t%0, [%2]\", operands); + return \"\"; + } + return \"ldrd\\t%0, %1, [%2]\"; + } + else + { + if (fix_cm3_ldrd && (operands[2] == operands[1])) + { + output_asm_insn (\"ldr\\t%0, [%2]\", operands); + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); + return \"\"; + } + return \"ldrd\\t%1, %0, [%2, %3]\"; + } + }" +) + +(define_insn "*thumb2_ldrd_reg2" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "Py")))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (match_dup 2)))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], operands[3], 0, 1)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + if (offset1 == -4) + { + if (fix_cm3_ldrd && (operands[2] == operands[0])) + { + output_asm_insn (\"ldr\\t%1, [%2]\", operands); + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); + return \"\"; + } + return \"ldrd\\t%0, %1, [%2, %3]\"; + } + else + { + if (fix_cm3_ldrd && (operands[2] == operands[1])) + { + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); + output_asm_insn (\"ldr\\t%1, [%2]\", operands); + return \"\"; + } + return \"ldrd\\t%1, %0, [%2]\"; + } + }" +) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], 1)" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))])] + "" +) + +(define_insn "*thumb2_strd" + [(parallel [(set (mem:SI + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "Py"))) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" "Py"))) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], operands[3], operands[4], 0)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + HOST_WIDE_INT offset2 = INTVAL (operands[4]); + if (offset1 < offset2 ) + return \"strd\\t%0, %1, [%2, %3]\"; + else + return \"strd\\t%1, %0, [%2, %4]\"; + }" +) + +(define_insn "*thumb2_strd_reg1" + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" "Py"))) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], 0, operands[3], 0)" + "* + { + HOST_WIDE_INT offset2 = INTVAL (operands[3]); + if (offset2 == 4) + return \"strd\\t%0, %1, [%2]\"; + else + return \"strd\\t%1, %0, [%2, %3]\"; + }" +) + +(define_insn "*thumb2_strd_reg2" + [(parallel [(set (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "Py"))) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (match_dup 2)) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], operands[3], 0, 0)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + if (offset1 == -4) + return \"strd\\t%0, %1, [%2, %3]\"; + else + return \"strd\\t%1, %0, [%2]\"; + }" +) + +(define_peephole2 + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))] + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], 0)" + [(parallel [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))])] + "" +) Index: arm.c =================================================================== --- arm.c (revision 163363) +++ arm.c (working copy) @@ -22959,4 +22959,85 @@ arm_expand_sync (enum machine_mode mode, } } +/* Check the legality of operands in an ldrd/strd instruction. */ +bool +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, + rtx off1, rtx off2, bool ldrd) +{ + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (off1 != NULL) + offset1 = INTVAL (off1); + if (off2 != NULL) + offset2 = INTVAL (off2); + + if (ldrd && (reg1 == reg2)) + return false; + + if ((offset1 + 4) == offset2) + return true; + if ((offset2 + 4) == offset1) + return true; + + return false; +} + +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. + That is they use the same base register, and the gap between constant + offsets should be 4. */ +bool +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) +{ + rtx base1, base2, op1; + rtx addr1 = XEXP (mem1, 0); + rtx addr2 = XEXP (mem2, 0); + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (REG_P (addr1)) + base1 = addr1; + else if (GET_CODE (addr1) == PLUS) + { + base1 = XEXP (addr1, 0); + op1 = XEXP (addr1, 1); + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) + return false; + offset1 = INTVAL (op1); + } + else + return false; + + if (REG_P (addr2)) + base2 = addr2; + else if (GET_CODE (addr2) == PLUS) + { + base2 = XEXP (addr2, 0); + op1 = XEXP (addr2, 1); + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) + return false; + offset2 = INTVAL (op1); + } + else + return false; + + if (base1 != base2) + return false; + + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) + return false; + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) + return false; + + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) + return false; + + if ((offset1 + 4) == offset2) + return true; + if ((offset2 + 4) == offset1) + return true; + + return false; +} + #include "gt-arm.h" Index: arm-protos.h =================================================================== --- arm-protos.h (revision 163363) +++ arm-protos.h (working copy) @@ -149,7 +149,8 @@ extern void arm_expand_sync (enum machin extern const char *arm_output_memory_barrier (rtx *); extern const char *arm_output_sync_insn (rtx, rtx *); extern unsigned int arm_sync_loop_insns (rtx , rtx *); - +extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool); +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); extern bool arm_output_addr_const_extra (FILE *, rtx); #if defined TREE_CODE Index: ldmstm.md =================================================================== --- ldmstm.md (revision 163363) +++ ldmstm.md (working copy) @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int 4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "ldm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "stm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int -4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "ldm%(db%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "stm%(db%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) Index: constraints.md =================================================================== --- constraints.md (revision 163363) +++ constraints.md (working copy) @@ -31,7 +31,7 @@ ;; The following multi-letter normal constraints have been used: ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di ;; in Thumb-1 state: Pa, Pb, Pc, Pd -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px +;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py ;; The following memory constraints have been used: ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us @@ -189,6 +189,13 @@ (define_constraint "Px" (and (match_code "const_int") (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) +(define_constraint "Py" + "@internal In Thumb-2 state a constant that is a multiple of 4 in the + range -1020 to 1024" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024 + && (ival & 3) == 0"))) + (define_constraint "G" "In ARM/Thumb-2 state a valid FPA immediate constant." (and (match_code "const_double") Index: pr40457-1.c =================================================================== --- pr40457-1.c (revision 163363) +++ pr40457-1.c (working copy) @@ -1,9 +1,9 @@ -/* { dg-options "-Os" } */ +/* { dg-options "-O2" } */ /* { dg-do compile } */ int bar(int* p) { - int x = p[0] + p[1]; + int x = p[0] + p[1] + p[2]; return x; } Index: pr40457-2.c =================================================================== --- pr40457-2.c (revision 163363) +++ pr40457-2.c (working copy) @@ -5,6 +5,7 @@ void foo(int* p) { p[0] = 1; p[1] = 0; + p[2] = 2; } /* { dg-final { scan-assembler "stm" } } */ Index: pr45335.c =================================================================== --- pr45335.c (revision 0) +++ pr45335.c (revision 0) @@ -0,0 +1,22 @@ +/* { dg-options "-mthumb -O2" } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-final { scan-assembler "ldrd" } } */ +/* { dg-final { scan-assembler "strd" } } */ + +struct S +{ + void* p1; + void* p2; + void* p3; + void* p4; +}; + +extern printf(char*, ...); + +void foo1(struct S* fp, struct S* otherSaveArea) +{ + struct S* saveA = fp - 1; + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); +} On Tue, Aug 24, 2010 at 9:55 PM, Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> wrote: > > On Tue, 2010-08-24 at 21:09 +0800, Carrot Wei wrote: >> The patterns in original patch conflict with ldm2/stm2 patterns. In thumb2 >> ldrd/strd instructions are more flexible than ldm2/stm2, we don't have any >> reason to continue to use ldm2/stm2. In this new patch I removed the thumb2 >> support of ldm2/stm2. The ldm2/stm2 with update patterns are not affected. > > > I can't approve / reject your patch but ... > > You need to consider the case for the Cortex M3 where we don't want to > use ldrd's with overlapping address and destination registers because it > may trigger a hardware erratum, thus you need to allow ldm2's on the M3 > and *not* generate any ldrd's in such situations. Look at the > implementation of -mfix-cortexm3-ldrd > > Also it's not clear which patterns in ldmstm.md you are modifying > without applying the patch , could you regenerate your patch with > > svn diff --diff-cmd "diff" -x "-aup -F^(define" > > so that we know easily which patterns you are modifying. > > > Thanks, > Ramana > > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-08-25 10:02 ` Carrot Wei @ 2010-09-01 15:25 ` Richard Earnshaw 2010-09-04 13:15 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Richard Earnshaw @ 2010-09-01 15:25 UTC (permalink / raw) To: Carrot Wei; +Cc: ramana.radhakrishnan, gcc-patches On Wed, 2010-08-25 at 17:57 +0800, Carrot Wei wrote: > Thank you, following is the revised patch that addresses cortex M3 > erratum and new diff for ldmstm.md. > If you submit an updated patch, please re-include the changelog entry, even if it's the same. There are two obvious problems with this patch: 1) You presume that ldrd is always cheaper than ldm(2 regs). This isn't the case on Cortex-a9. I'm not expecting you to work out all the details of when A9 should use LDM and when it should use ldrd, but your code needs to ascertain the costs of each alternative and make a decision based on that answer, not on a static choice. 2) Your code fails to check for volatile mems. These must not be transformed and the original load/store instructions must be preserved. R. > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-09-01 15:25 ` Richard Earnshaw @ 2010-09-04 13:15 ` Carrot Wei 2010-09-13 14:54 ` Carrot Wei ` (2 more replies) 0 siblings, 3 replies; 46+ messages in thread From: Carrot Wei @ 2010-09-04 13:15 UTC (permalink / raw) To: Richard Earnshaw; +Cc: ramana.radhakrishnan, gcc-patches On Wed, Sep 1, 2010 at 11:22 PM, Richard Earnshaw <rearnsha@arm.com> wrote: > If you submit an updated patch, please re-include the changelog entry, > even if it's the same. > > There are two obvious problems with this patch: > > 1) You presume that ldrd is always cheaper than ldm(2 regs). This isn't > the case on Cortex-a9. I'm not expecting you to work out all the > details of when A9 should use LDM and when it should use ldrd, but your > code needs to ascertain the costs of each alternative and make a > decision based on that answer, not on a static choice. > > 2) Your code fails to check for volatile mems. These must not be > transformed and the original load/store instructions must be preserved. > 1. A new function thumb2_prefer_ldmstm is used to choose ldm/stm or ldrd/strd. The default behavior is to output ldrd/strd. One should update this function if ldm/stm is better. 2. Function thumb2_legitimate_ldrd_p is updated to check volatile memory access. Following is the new patch ChangeLog: 2010-09-04 Wei Guozhi <carrot@google.com> PR target/45335 * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, thumb2_ldrd_reg2 and peephole2): New insn pattern and related peephole2. (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): New insn pattern and related peephole2. * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. (thumb2_check_ldrd_operands): New function. (thumb2_prefer_ldmstm): New function. * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. (thumb2_check_ldrd_operands): New prototype. (thumb2_prefer_ldmstm): New prototype. * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): Change the ldm/stm patterns with 2 words to ARM only. * gcc/config/arm/constraints.md (Py): New thumb2 constant constraint suitable to ldrd/strd instructions. 2010-09-04 Wei Guozhi <carrot@google.com> PR target/45335 * gcc.target/arm/pr45335.c: New test. * gcc.target/arm/pr40457-1.c: Changed to load 3 words. * gcc.target/arm/pr40457-2.c: Changed to store 3 words. Index: thumb2.md =================================================================== --- thumb2.md (revision 163853) +++ thumb2.md (working copy) @@ -1257,3 +1257,226 @@ (define_peephole2 " operands[2] = GEN_INT (32 - INTVAL (operands[2])); ") + +(define_insn "*thumb2_ldrd" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "Py")))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" "Py"))))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], operands[3], operands[4], 1)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + HOST_WIDE_INT offset2 = INTVAL (operands[4]); + if (offset1 > offset2) + { + /* Swap the operands so that memory [base+offset1] is loaded into + operands[0]. */ + rtx tmp = operands[0]; + operands[0] = operands[1]; + operands[1] = tmp; + tmp = operands[3]; + operands[3] = operands[4]; + operands[4] = tmp; + offset1 = INTVAL (operands[3]); + offset2 = INTVAL (operands[4]); + } + if (thumb2_prefer_ldmstm (operands[0], operands[1], + operands[2], operands[3], operands[4], 1)) + return \"ldmdb\\t%2, {%0, %1}\"; + else if (fix_cm3_ldrd && (operands[2] == operands[0])) + { + if (offset1 <= -256) + { + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); + output_asm_insn (\"ldr\\t%0, [%2]\", operands); + } + else + { + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); + } + return \"\"; + } + else + return \"ldrd\\t%0, %1, [%2, %3]\"; + }" +) + +(define_insn "*thumb2_ldrd_reg1" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" "Py"))))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], 0, operands[3], 1)" + "* + { + HOST_WIDE_INT offset2 = INTVAL (operands[3]); + if (offset2 == 4) + { + if (thumb2_prefer_ldmstm (operands[0], operands[1], + operands[2], 0, operands[3], 1)) + return \"ldmia\\t%2, {%0, %1}\"; + if (fix_cm3_ldrd && (operands[2] == operands[0])) + { + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); + output_asm_insn (\"ldr\\t%0, [%2]\", operands); + return \"\"; + } + return \"ldrd\\t%0, %1, [%2]\"; + } + else + { + if (fix_cm3_ldrd && (operands[2] == operands[1])) + { + output_asm_insn (\"ldr\\t%0, [%2]\", operands); + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); + } + return \"ldrd\\t%1, %0, [%2, %3]\"; + } + }" +) + +(define_insn "*thumb2_ldrd_reg2" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "Py")))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (match_dup 2)))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], operands[3], 0, 1)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + if (offset1 == -4) + { + if (fix_cm3_ldrd && (operands[2] == operands[0])) + { + output_asm_insn (\"ldr\\t%1, [%2]\", operands); + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); + return \"\"; + } + return \"ldrd\\t%0, %1, [%2, %3]\"; + } + else + { + if (thumb2_prefer_ldmstm (operands[0], operands[1], + operands[2], operands[3], 0, 1)) + return \"ldmia\\t%2, {%1, %0}\"; + if (fix_cm3_ldrd && (operands[2] == operands[1])) + { + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); + output_asm_insn (\"ldr\\t%1, [%2]\", operands); + return \"\"; + } + return \"ldrd\\t%1, %0, [%2]\"; + } + }" +) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], 1)" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))])] + "" +) + +(define_insn "*thumb2_strd" + [(parallel [(set (mem:SI + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "Py"))) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" "Py"))) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], operands[3], operands[4], 0)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + HOST_WIDE_INT offset2 = INTVAL (operands[4]); + if (thumb2_prefer_ldmstm (operands[0], operands[1], + operands[2], operands[3], operands[4], 0)) + return \"stmdb\\t%2, {%0, %1}\"; + if (offset1 < offset2 ) + return \"strd\\t%0, %1, [%2, %3]\"; + else + return \"strd\\t%1, %0, [%2, %4]\"; + }" +) + +(define_insn "*thumb2_strd_reg1" + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" "Py"))) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], 0, operands[3], 0)" + "* + { + HOST_WIDE_INT offset2 = INTVAL (operands[3]); + if (offset2 == 4) + { + if (thumb2_prefer_ldmstm (operands[0], operands[1], + operands[2], 0, operands[3], 0)) + return \"stmia\\t%2, {%0, %1}\"; + return \"strd\\t%0, %1, [%2]\"; + } + else + return \"strd\\t%1, %0, [%2, %3]\"; + }" +) + +(define_insn "*thumb2_strd_reg2" + [(parallel [(set (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "Py"))) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (match_dup 2)) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], + operands[2], operands[3], 0, 0)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + if (offset1 == -4) + return \"strd\\t%0, %1, [%2, %3]\"; + else + { + if (thumb2_prefer_ldmstm (operands[0], operands[1], + operands[2], operands[3], 0, 0)) + return \"stmia\\t%2, {%1, %0}\"; + return \"strd\\t%1, %0, [%2]\"; + } + }" +) + +(define_peephole2 + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))] + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], 0)" + [(parallel [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))])] + "" +) Index: arm.c =================================================================== --- arm.c (revision 163853) +++ arm.c (working copy) @@ -22976,4 +22976,125 @@ arm_expand_sync (enum machine_mode mode, } } +/* Check the legality of operands in an ldrd/strd instruction. */ +bool +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, + rtx off1, rtx off2, bool ldrd) +{ + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (off1 != NULL) + offset1 = INTVAL (off1); + if (off2 != NULL) + offset2 = INTVAL (off2); + + if (ldrd && (reg1 == reg2)) + return false; + + if ((offset1 + 4) == offset2) + return true; + if ((offset2 + 4) == offset1) + return true; + + return false; +} + +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. + That is they use the same base register, and the gap between constant + offsets should be 4. */ +bool +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) +{ + rtx base1, base2, op1; + rtx addr1 = XEXP (mem1, 0); + rtx addr2 = XEXP (mem2, 0); + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) + return false; + + if (REG_P (addr1)) + base1 = addr1; + else if (GET_CODE (addr1) == PLUS) + { + base1 = XEXP (addr1, 0); + op1 = XEXP (addr1, 1); + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) + return false; + offset1 = INTVAL (op1); + } + else + return false; + + if (REG_P (addr2)) + base2 = addr2; + else if (GET_CODE (addr2) == PLUS) + { + base2 = XEXP (addr2, 0); + op1 = XEXP (addr2, 1); + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) + return false; + offset2 = INTVAL (op1); + } + else + return false; + + if (base1 != base2) + return false; + + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) + return false; + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) + return false; + + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) + return false; + + if ((offset1 + 4) == offset2) + return true; + if ((offset2 + 4) == offset1) + return true; + + return false; +} + +/* Check if the insn can be expressed as ldm/stm with less cost. */ +bool +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, + rtx off1, rtx off2, bool ldrd) +{ + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (off1 != NULL) + offset1 = INTVAL (off1); + if (off2 != NULL) + offset2 = INTVAL (off2); + + if (offset1 > offset2) + { + rtx tmp; + HOST_WIDE_INT t = offset1; + offset1 = offset2; + offset2 = t; + tmp = reg1; + reg1 = reg2; + reg2 = tmp; + } + + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ + if ((offset1 != -8) && (offset1 != 0)) + return false; + + /* Lower register corresponds to lower memory. */ + if (REGNO (reg1) > REGNO (reg2)) + return false; + + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower + cost. */ + return false; +} + #include "gt-arm.h" Index: arm-protos.h =================================================================== --- arm-protos.h (revision 163853) +++ arm-protos.h (working copy) @@ -149,7 +149,9 @@ extern void arm_expand_sync (enum machin extern const char *arm_output_memory_barrier (rtx *); extern const char *arm_output_sync_insn (rtx, rtx *); extern unsigned int arm_sync_loop_insns (rtx , rtx *); - +extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool); +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); extern bool arm_output_addr_const_extra (FILE *, rtx); #if defined TREE_CODE Index: ldmstm.md =================================================================== --- ldmstm.md (revision 163853) +++ ldmstm.md (working copy) @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int 4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "ldm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "stm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int -4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "ldm%(db%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "stm%(db%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) Index: constraints.md =================================================================== --- constraints.md (revision 163853) +++ constraints.md (working copy) @@ -31,7 +31,7 @@ ;; The following multi-letter normal constraints have been used: ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz ;; in Thumb-1 state: Pa, Pb, Pc, Pd -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px +;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py ;; The following memory constraints have been used: ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us @@ -189,6 +189,13 @@ (define_constraint "Px" (and (match_code "const_int") (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) +(define_constraint "Py" + "@internal In Thumb-2 state a constant that is a multiple of 4 in the + range -1020 to 1024" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024 + && (ival & 3) == 0"))) + (define_constraint "G" "In ARM/Thumb-2 state a valid FPA immediate constant." (and (match_code "const_double") Index: pr40457-1.c =================================================================== --- pr40457-1.c (revision 163853) +++ pr40457-1.c (working copy) @@ -1,9 +1,9 @@ -/* { dg-options "-Os" } */ +/* { dg-options "-O2" } */ /* { dg-do compile } */ int bar(int* p) { - int x = p[0] + p[1]; + int x = p[0] + p[1] + p[2]; return x; } Index: pr40457-2.c =================================================================== --- pr40457-2.c (revision 163853) +++ pr40457-2.c (working copy) @@ -5,6 +5,7 @@ void foo(int* p) { p[0] = 1; p[1] = 0; + p[2] = 2; } /* { dg-final { scan-assembler "stm" } } */ Index: pr45335.c =================================================================== --- pr45335.c (revision 0) +++ pr45335.c (revision 0) @@ -0,0 +1,22 @@ +/* { dg-options "-mthumb -O2" } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-final { scan-assembler "ldrd" } } */ +/* { dg-final { scan-assembler "strd" } } */ + +struct S +{ + void* p1; + void* p2; + void* p3; + void* p4; +}; + +extern printf(char*, ...); + +void foo1(struct S* fp, struct S* otherSaveArea) +{ + struct S* saveA = fp - 1; + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); +} ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-09-04 13:15 ` Carrot Wei @ 2010-09-13 14:54 ` Carrot Wei 2010-09-19 9:10 ` [PING][PATCH: " Carrot Wei 2010-10-13 11:28 ` [PATCH: " Paul Brook 2 siblings, 0 replies; 46+ messages in thread From: Carrot Wei @ 2010-09-13 14:54 UTC (permalink / raw) To: Richard Earnshaw; +Cc: ramana.radhakrishnan, gcc-patches Ping On Sat, Sep 4, 2010 at 8:41 PM, Carrot Wei <carrot@google.com> wrote: > > On Wed, Sep 1, 2010 at 11:22 PM, Richard Earnshaw <rearnsha@arm.com> wrote: > > If you submit an updated patch, please re-include the changelog entry, > > even if it's the same. > > > > There are two obvious problems with this patch: > > > > 1) You presume that ldrd is always cheaper than ldm(2 regs). This isn't > > the case on Cortex-a9. I'm not expecting you to work out all the > > details of when A9 should use LDM and when it should use ldrd, but your > > code needs to ascertain the costs of each alternative and make a > > decision based on that answer, not on a static choice. > > > > 2) Your code fails to check for volatile mems. These must not be > > transformed and the original load/store instructions must be preserved. > > > > 1. A new function thumb2_prefer_ldmstm is used to choose ldm/stm or ldrd/strd. > The default behavior is to output ldrd/strd. One should update this function if > ldm/stm is better. > > 2. Function thumb2_legitimate_ldrd_p is updated to check volatile memory access. > > Following is the new patch > > ChangeLog: > 2010-09-04 Wei Guozhi <carrot@google.com> > > PR target/45335 > * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, > thumb2_ldrd_reg2 and peephole2): New insn pattern and related > peephole2. > (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): > New insn pattern and related peephole2. > * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. > (thumb2_check_ldrd_operands): New function. > (thumb2_prefer_ldmstm): New function. > * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. > (thumb2_check_ldrd_operands): New prototype. > (thumb2_prefer_ldmstm): New prototype. > * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): > Change the ldm/stm patterns with 2 words to ARM only. > * gcc/config/arm/constraints.md (Py): New thumb2 constant constraint > suitable to ldrd/strd instructions. > > > 2010-09-04 Wei Guozhi <carrot@google.com> > > PR target/45335 > * gcc.target/arm/pr45335.c: New test. > * gcc.target/arm/pr40457-1.c: Changed to load 3 words. > * gcc.target/arm/pr40457-2.c: Changed to store 3 words. > > > > Index: thumb2.md > =================================================================== > --- thumb2.md (revision 163853) > +++ thumb2.md (working copy) > @@ -1257,3 +1257,226 @@ (define_peephole2 > " > operands[2] = GEN_INT (32 - INTVAL (operands[2])); > ") > + > +(define_insn "*thumb2_ldrd" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (mem:SI (plus:SI > + (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "Py")))) > + (set (match_operand:SI 1 "s_register_operand" "") > + (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 4 "const_int_operand" "Py"))))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], operands[3], operands[4], 1)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + HOST_WIDE_INT offset2 = INTVAL (operands[4]); > + if (offset1 > offset2) > + { > + /* Swap the operands so that memory [base+offset1] is loaded into > + operands[0]. */ > + rtx tmp = operands[0]; > + operands[0] = operands[1]; > + operands[1] = tmp; > + tmp = operands[3]; > + operands[3] = operands[4]; > + operands[4] = tmp; > + offset1 = INTVAL (operands[3]); > + offset2 = INTVAL (operands[4]); > + } > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], operands[3], operands[4], 1)) > + return \"ldmdb\\t%2, {%0, %1}\"; > + else if (fix_cm3_ldrd && (operands[2] == operands[0])) > + { > + if (offset1 <= -256) > + { > + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); > + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); > + output_asm_insn (\"ldr\\t%0, [%2]\", operands); > + } > + else > + { > + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); > + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); > + } > + return \"\"; > + } > + else > + return \"ldrd\\t%0, %1, [%2, %3]\"; > + }" > +) > + > +(define_insn "*thumb2_ldrd_reg1" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) > + (set (match_operand:SI 1 "s_register_operand" "") > + (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 3 "const_int_operand" "Py"))))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], 0, operands[3], 1)" > + "* > + { > + HOST_WIDE_INT offset2 = INTVAL (operands[3]); > + if (offset2 == 4) > + { > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], 0, operands[3], 1)) > + return \"ldmia\\t%2, {%0, %1}\"; > + if (fix_cm3_ldrd && (operands[2] == operands[0])) > + { > + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); > + output_asm_insn (\"ldr\\t%0, [%2]\", operands); > + return \"\"; > + } > + return \"ldrd\\t%0, %1, [%2]\"; > + } > + else > + { > + if (fix_cm3_ldrd && (operands[2] == operands[1])) > + { > + output_asm_insn (\"ldr\\t%0, [%2]\", operands); > + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); > + } > + return \"ldrd\\t%1, %0, [%2, %3]\"; > + } > + }" > +) > + > +(define_insn "*thumb2_ldrd_reg2" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (mem:SI (plus:SI > + (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "Py")))) > + (set (match_operand:SI 1 "s_register_operand" "") > + (mem:SI (match_dup 2)))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], operands[3], 0, 1)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + if (offset1 == -4) > + { > + if (fix_cm3_ldrd && (operands[2] == operands[0])) > + { > + output_asm_insn (\"ldr\\t%1, [%2]\", operands); > + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); > + return \"\"; > + } > + return \"ldrd\\t%0, %1, [%2, %3]\"; > + } > + else > + { > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], operands[3], 0, 1)) > + return \"ldmia\\t%2, {%1, %0}\"; > + if (fix_cm3_ldrd && (operands[2] == operands[1])) > + { > + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); > + output_asm_insn (\"ldr\\t%1, [%2]\", operands); > + return \"\"; > + } > + return \"ldrd\\t%1, %0, [%2]\"; > + } > + }" > +) > + > +(define_peephole2 > + [(set (match_operand:SI 0 "s_register_operand" "") > + (match_operand:SI 2 "memory_operand" "")) > + (set (match_operand:SI 1 "s_register_operand" "") > + (match_operand:SI 3 "memory_operand" ""))] > + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], > + operands[2], operands[3], 1)" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (match_operand:SI 2 "memory_operand" "")) > + (set (match_operand:SI 1 "s_register_operand" "") > + (match_operand:SI 3 "memory_operand" ""))])] > + "" > +) > + > +(define_insn "*thumb2_strd" > + [(parallel [(set (mem:SI > + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "Py"))) > + (match_operand:SI 0 "s_register_operand" "")) > + (set (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 4 "const_int_operand" "Py"))) > + (match_operand:SI 1 "s_register_operand" ""))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], operands[3], operands[4], 0)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + HOST_WIDE_INT offset2 = INTVAL (operands[4]); > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], operands[3], operands[4], 0)) > + return \"stmdb\\t%2, {%0, %1}\"; > + if (offset1 < offset2 ) > + return \"strd\\t%0, %1, [%2, %3]\"; > + else > + return \"strd\\t%1, %0, [%2, %4]\"; > + }" > +) > + > +(define_insn "*thumb2_strd_reg1" > + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) > + (match_operand:SI 0 "s_register_operand" "")) > + (set (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 3 "const_int_operand" "Py"))) > + (match_operand:SI 1 "s_register_operand" ""))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], 0, operands[3], 0)" > + "* > + { > + HOST_WIDE_INT offset2 = INTVAL (operands[3]); > + if (offset2 == 4) > + { > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], 0, operands[3], 0)) > + return \"stmia\\t%2, {%0, %1}\"; > + return \"strd\\t%0, %1, [%2]\"; > + } > + else > + return \"strd\\t%1, %0, [%2, %3]\"; > + }" > +) > + > +(define_insn "*thumb2_strd_reg2" > + [(parallel [(set (mem:SI (plus:SI > + (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "Py"))) > + (match_operand:SI 0 "s_register_operand" "")) > + (set (mem:SI (match_dup 2)) > + (match_operand:SI 1 "s_register_operand" ""))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], operands[3], 0, 0)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + if (offset1 == -4) > + return \"strd\\t%0, %1, [%2, %3]\"; > + else > + { > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], operands[3], 0, 0)) > + return \"stmia\\t%2, {%1, %0}\"; > + return \"strd\\t%1, %0, [%2]\"; > + } > + }" > +) > + > +(define_peephole2 > + [(set (match_operand:SI 2 "memory_operand" "") > + (match_operand:SI 0 "s_register_operand" "")) > + (set (match_operand:SI 3 "memory_operand" "") > + (match_operand:SI 1 "s_register_operand" ""))] > + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], > + operands[2], operands[3], 0)" > + [(parallel [(set (match_operand:SI 2 "memory_operand" "") > + (match_operand:SI 0 "s_register_operand" "")) > + (set (match_operand:SI 3 "memory_operand" "") > + (match_operand:SI 1 "s_register_operand" ""))])] > + "" > +) > Index: arm.c > =================================================================== > --- arm.c (revision 163853) > +++ arm.c (working copy) > @@ -22976,4 +22976,125 @@ arm_expand_sync (enum machine_mode mode, > } > } > > +/* Check the legality of operands in an ldrd/strd instruction. */ > +bool > +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, > + rtx off1, rtx off2, bool ldrd) > +{ > + HOST_WIDE_INT offset1 = 0; > + HOST_WIDE_INT offset2 = 0; > + > + if (off1 != NULL) > + offset1 = INTVAL (off1); > + if (off2 != NULL) > + offset2 = INTVAL (off2); > + > + if (ldrd && (reg1 == reg2)) > + return false; > + > + if ((offset1 + 4) == offset2) > + return true; > + if ((offset2 + 4) == offset1) > + return true; > + > + return false; > +} > + > +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. > + That is they use the same base register, and the gap between constant > + offsets should be 4. */ > +bool > +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) > +{ > + rtx base1, base2, op1; > + rtx addr1 = XEXP (mem1, 0); > + rtx addr2 = XEXP (mem2, 0); > + HOST_WIDE_INT offset1 = 0; > + HOST_WIDE_INT offset2 = 0; > + > + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) > + return false; > + > + if (REG_P (addr1)) > + base1 = addr1; > + else if (GET_CODE (addr1) == PLUS) > + { > + base1 = XEXP (addr1, 0); > + op1 = XEXP (addr1, 1); > + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) > + return false; > + offset1 = INTVAL (op1); > + } > + else > + return false; > + > + if (REG_P (addr2)) > + base2 = addr2; > + else if (GET_CODE (addr2) == PLUS) > + { > + base2 = XEXP (addr2, 0); > + op1 = XEXP (addr2, 1); > + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) > + return false; > + offset2 = INTVAL (op1); > + } > + else > + return false; > + > + if (base1 != base2) > + return false; > + > + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) > + return false; > + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) > + return false; > + > + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) > + return false; > + > + if ((offset1 + 4) == offset2) > + return true; > + if ((offset2 + 4) == offset1) > + return true; > + > + return false; > +} > + > +/* Check if the insn can be expressed as ldm/stm with less cost. */ > +bool > +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, > + rtx off1, rtx off2, bool ldrd) > +{ > + HOST_WIDE_INT offset1 = 0; > + HOST_WIDE_INT offset2 = 0; > + > + if (off1 != NULL) > + offset1 = INTVAL (off1); > + if (off2 != NULL) > + offset2 = INTVAL (off2); > + > + if (offset1 > offset2) > + { > + rtx tmp; > + HOST_WIDE_INT t = offset1; > + offset1 = offset2; > + offset2 = t; > + tmp = reg1; > + reg1 = reg2; > + reg2 = tmp; > + } > + > + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ > + if ((offset1 != -8) && (offset1 != 0)) > + return false; > + > + /* Lower register corresponds to lower memory. */ > + if (REGNO (reg1) > REGNO (reg2)) > + return false; > + > + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower > + cost. */ > + return false; > +} > + > #include "gt-arm.h" > > Index: arm-protos.h > =================================================================== > --- arm-protos.h (revision 163853) > +++ arm-protos.h (working copy) > @@ -149,7 +149,9 @@ extern void arm_expand_sync (enum machin > extern const char *arm_output_memory_barrier (rtx *); > extern const char *arm_output_sync_insn (rtx, rtx *); > extern unsigned int arm_sync_loop_insns (rtx , rtx *); > - > +extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool); > +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); > +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); > extern bool arm_output_addr_const_extra (FILE *, rtx); > > #if defined TREE_CODE > Index: ldmstm.md > =================================================================== > --- ldmstm.md (revision 163853) > +++ ldmstm.md (working copy) > @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" > (set (match_operand:SI 2 "arm_hard_register_operand" "") > (mem:SI (plus:SI (match_dup 3) > (const_int 4))))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "ldm%(ia%)\t%3, {%1, %2}" > [(set_attr "type" "load2") > (set_attr "predicable" "yes")]) > @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" > (match_operand:SI 1 "arm_hard_register_operand" "")) > (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) > (match_operand:SI 2 "arm_hard_register_operand" ""))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "stm%(ia%)\t%3, {%1, %2}" > [(set_attr "type" "store2") > (set_attr "predicable" "yes")]) > @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" > (set (match_operand:SI 2 "arm_hard_register_operand" "") > (mem:SI (plus:SI (match_dup 3) > (const_int -4))))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "ldm%(db%)\t%3, {%1, %2}" > [(set_attr "type" "load2") > (set_attr "predicable" "yes")]) > @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" > (match_operand:SI 1 "arm_hard_register_operand" "")) > (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) > (match_operand:SI 2 "arm_hard_register_operand" ""))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "stm%(db%)\t%3, {%1, %2}" > [(set_attr "type" "store2") > (set_attr "predicable" "yes")]) > Index: constraints.md > =================================================================== > --- constraints.md (revision 163853) > +++ constraints.md (working copy) > @@ -31,7 +31,7 @@ > ;; The following multi-letter normal constraints have been used: > ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz > ;; in Thumb-1 state: Pa, Pb, Pc, Pd > -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px > +;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py > > ;; The following memory constraints have been used: > ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us > @@ -189,6 +189,13 @@ (define_constraint "Px" > (and (match_code "const_int") > (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) > > +(define_constraint "Py" > + "@internal In Thumb-2 state a constant that is a multiple of 4 in the > + range -1020 to 1024" > + (and (match_code "const_int") > + (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024 > + && (ival & 3) == 0"))) > + > (define_constraint "G" > "In ARM/Thumb-2 state a valid FPA immediate constant." > (and (match_code "const_double") > > > Index: pr40457-1.c > =================================================================== > --- pr40457-1.c (revision 163853) > +++ pr40457-1.c (working copy) > @@ -1,9 +1,9 @@ > -/* { dg-options "-Os" } */ > +/* { dg-options "-O2" } */ > /* { dg-do compile } */ > > int bar(int* p) > { > - int x = p[0] + p[1]; > + int x = p[0] + p[1] + p[2]; > return x; > } > > Index: pr40457-2.c > =================================================================== > --- pr40457-2.c (revision 163853) > +++ pr40457-2.c (working copy) > @@ -5,6 +5,7 @@ void foo(int* p) > { > p[0] = 1; > p[1] = 0; > + p[2] = 2; > } > > /* { dg-final { scan-assembler "stm" } } */ > Index: pr45335.c > =================================================================== > --- pr45335.c (revision 0) > +++ pr45335.c (revision 0) > @@ -0,0 +1,22 @@ > +/* { dg-options "-mthumb -O2" } */ > +/* { dg-require-effective-target arm_thumb2_ok } */ > +/* { dg-final { scan-assembler "ldrd" } } */ > +/* { dg-final { scan-assembler "strd" } } */ > + > +struct S > +{ > + void* p1; > + void* p2; > + void* p3; > + void* p4; > +}; > + > +extern printf(char*, ...); > + > +void foo1(struct S* fp, struct S* otherSaveArea) > +{ > + struct S* saveA = fp - 1; > + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); > + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", > + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); > +} ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PING][PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-09-04 13:15 ` Carrot Wei 2010-09-13 14:54 ` Carrot Wei @ 2010-09-19 9:10 ` Carrot Wei 2010-09-25 19:25 ` Carrot Wei 2010-10-13 11:28 ` [PATCH: " Paul Brook 2 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-09-19 9:10 UTC (permalink / raw) To: Richard Earnshaw; +Cc: ramana.radhakrishnan, gcc-patches Ping On Sat, Sep 4, 2010 at 8:41 PM, Carrot Wei <carrot@google.com> wrote: > On Wed, Sep 1, 2010 at 11:22 PM, Richard Earnshaw <rearnsha@arm.com> wrote: >> If you submit an updated patch, please re-include the changelog entry, >> even if it's the same. >> >> There are two obvious problems with this patch: >> >> 1) You presume that ldrd is always cheaper than ldm(2 regs). This isn't >> the case on Cortex-a9. I'm not expecting you to work out all the >> details of when A9 should use LDM and when it should use ldrd, but your >> code needs to ascertain the costs of each alternative and make a >> decision based on that answer, not on a static choice. >> >> 2) Your code fails to check for volatile mems. These must not be >> transformed and the original load/store instructions must be preserved. >> > > 1. A new function thumb2_prefer_ldmstm is used to choose ldm/stm or ldrd/strd. > The default behavior is to output ldrd/strd. One should update this function if > ldm/stm is better. > > 2. Function thumb2_legitimate_ldrd_p is updated to check volatile memory access. > > Following is the new patch > > ChangeLog: > 2010-09-04 Wei Guozhi <carrot@google.com> > > PR target/45335 > * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, > thumb2_ldrd_reg2 and peephole2): New insn pattern and related > peephole2. > (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): > New insn pattern and related peephole2. > * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. > (thumb2_check_ldrd_operands): New function. > (thumb2_prefer_ldmstm): New function. > * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. > (thumb2_check_ldrd_operands): New prototype. > (thumb2_prefer_ldmstm): New prototype. > * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): > Change the ldm/stm patterns with 2 words to ARM only. > * gcc/config/arm/constraints.md (Py): New thumb2 constant constraint > suitable to ldrd/strd instructions. > > > 2010-09-04 Wei Guozhi <carrot@google.com> > > PR target/45335 > * gcc.target/arm/pr45335.c: New test. > * gcc.target/arm/pr40457-1.c: Changed to load 3 words. > * gcc.target/arm/pr40457-2.c: Changed to store 3 words. > > > > Index: thumb2.md > =================================================================== > --- thumb2.md (revision 163853) > +++ thumb2.md (working copy) > @@ -1257,3 +1257,226 @@ (define_peephole2 > " > operands[2] = GEN_INT (32 - INTVAL (operands[2])); > ") > + > +(define_insn "*thumb2_ldrd" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (mem:SI (plus:SI > + (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "Py")))) > + (set (match_operand:SI 1 "s_register_operand" "") > + (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 4 "const_int_operand" "Py"))))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], operands[3], operands[4], 1)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + HOST_WIDE_INT offset2 = INTVAL (operands[4]); > + if (offset1 > offset2) > + { > + /* Swap the operands so that memory [base+offset1] is loaded into > + operands[0]. */ > + rtx tmp = operands[0]; > + operands[0] = operands[1]; > + operands[1] = tmp; > + tmp = operands[3]; > + operands[3] = operands[4]; > + operands[4] = tmp; > + offset1 = INTVAL (operands[3]); > + offset2 = INTVAL (operands[4]); > + } > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], operands[3], operands[4], 1)) > + return \"ldmdb\\t%2, {%0, %1}\"; > + else if (fix_cm3_ldrd && (operands[2] == operands[0])) > + { > + if (offset1 <= -256) > + { > + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); > + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); > + output_asm_insn (\"ldr\\t%0, [%2]\", operands); > + } > + else > + { > + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); > + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); > + } > + return \"\"; > + } > + else > + return \"ldrd\\t%0, %1, [%2, %3]\"; > + }" > +) > + > +(define_insn "*thumb2_ldrd_reg1" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) > + (set (match_operand:SI 1 "s_register_operand" "") > + (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 3 "const_int_operand" "Py"))))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], 0, operands[3], 1)" > + "* > + { > + HOST_WIDE_INT offset2 = INTVAL (operands[3]); > + if (offset2 == 4) > + { > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], 0, operands[3], 1)) > + return \"ldmia\\t%2, {%0, %1}\"; > + if (fix_cm3_ldrd && (operands[2] == operands[0])) > + { > + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); > + output_asm_insn (\"ldr\\t%0, [%2]\", operands); > + return \"\"; > + } > + return \"ldrd\\t%0, %1, [%2]\"; > + } > + else > + { > + if (fix_cm3_ldrd && (operands[2] == operands[1])) > + { > + output_asm_insn (\"ldr\\t%0, [%2]\", operands); > + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); > + } > + return \"ldrd\\t%1, %0, [%2, %3]\"; > + } > + }" > +) > + > +(define_insn "*thumb2_ldrd_reg2" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (mem:SI (plus:SI > + (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "Py")))) > + (set (match_operand:SI 1 "s_register_operand" "") > + (mem:SI (match_dup 2)))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], operands[3], 0, 1)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + if (offset1 == -4) > + { > + if (fix_cm3_ldrd && (operands[2] == operands[0])) > + { > + output_asm_insn (\"ldr\\t%1, [%2]\", operands); > + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); > + return \"\"; > + } > + return \"ldrd\\t%0, %1, [%2, %3]\"; > + } > + else > + { > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], operands[3], 0, 1)) > + return \"ldmia\\t%2, {%1, %0}\"; > + if (fix_cm3_ldrd && (operands[2] == operands[1])) > + { > + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); > + output_asm_insn (\"ldr\\t%1, [%2]\", operands); > + return \"\"; > + } > + return \"ldrd\\t%1, %0, [%2]\"; > + } > + }" > +) > + > +(define_peephole2 > + [(set (match_operand:SI 0 "s_register_operand" "") > + (match_operand:SI 2 "memory_operand" "")) > + (set (match_operand:SI 1 "s_register_operand" "") > + (match_operand:SI 3 "memory_operand" ""))] > + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], > + operands[2], operands[3], 1)" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (match_operand:SI 2 "memory_operand" "")) > + (set (match_operand:SI 1 "s_register_operand" "") > + (match_operand:SI 3 "memory_operand" ""))])] > + "" > +) > + > +(define_insn "*thumb2_strd" > + [(parallel [(set (mem:SI > + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "Py"))) > + (match_operand:SI 0 "s_register_operand" "")) > + (set (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 4 "const_int_operand" "Py"))) > + (match_operand:SI 1 "s_register_operand" ""))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], operands[3], operands[4], 0)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + HOST_WIDE_INT offset2 = INTVAL (operands[4]); > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], operands[3], operands[4], 0)) > + return \"stmdb\\t%2, {%0, %1}\"; > + if (offset1 < offset2 ) > + return \"strd\\t%0, %1, [%2, %3]\"; > + else > + return \"strd\\t%1, %0, [%2, %4]\"; > + }" > +) > + > +(define_insn "*thumb2_strd_reg1" > + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) > + (match_operand:SI 0 "s_register_operand" "")) > + (set (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 3 "const_int_operand" "Py"))) > + (match_operand:SI 1 "s_register_operand" ""))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], 0, operands[3], 0)" > + "* > + { > + HOST_WIDE_INT offset2 = INTVAL (operands[3]); > + if (offset2 == 4) > + { > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], 0, operands[3], 0)) > + return \"stmia\\t%2, {%0, %1}\"; > + return \"strd\\t%0, %1, [%2]\"; > + } > + else > + return \"strd\\t%1, %0, [%2, %3]\"; > + }" > +) > + > +(define_insn "*thumb2_strd_reg2" > + [(parallel [(set (mem:SI (plus:SI > + (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "Py"))) > + (match_operand:SI 0 "s_register_operand" "")) > + (set (mem:SI (match_dup 2)) > + (match_operand:SI 1 "s_register_operand" ""))])] > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], operands[3], 0, 0)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + if (offset1 == -4) > + return \"strd\\t%0, %1, [%2, %3]\"; > + else > + { > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], operands[3], 0, 0)) > + return \"stmia\\t%2, {%1, %0}\"; > + return \"strd\\t%1, %0, [%2]\"; > + } > + }" > +) > + > +(define_peephole2 > + [(set (match_operand:SI 2 "memory_operand" "") > + (match_operand:SI 0 "s_register_operand" "")) > + (set (match_operand:SI 3 "memory_operand" "") > + (match_operand:SI 1 "s_register_operand" ""))] > + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], > + operands[2], operands[3], 0)" > + [(parallel [(set (match_operand:SI 2 "memory_operand" "") > + (match_operand:SI 0 "s_register_operand" "")) > + (set (match_operand:SI 3 "memory_operand" "") > + (match_operand:SI 1 "s_register_operand" ""))])] > + "" > +) > Index: arm.c > =================================================================== > --- arm.c (revision 163853) > +++ arm.c (working copy) > @@ -22976,4 +22976,125 @@ arm_expand_sync (enum machine_mode mode, > } > } > > +/* Check the legality of operands in an ldrd/strd instruction. */ > +bool > +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, > + rtx off1, rtx off2, bool ldrd) > +{ > + HOST_WIDE_INT offset1 = 0; > + HOST_WIDE_INT offset2 = 0; > + > + if (off1 != NULL) > + offset1 = INTVAL (off1); > + if (off2 != NULL) > + offset2 = INTVAL (off2); > + > + if (ldrd && (reg1 == reg2)) > + return false; > + > + if ((offset1 + 4) == offset2) > + return true; > + if ((offset2 + 4) == offset1) > + return true; > + > + return false; > +} > + > +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. > + That is they use the same base register, and the gap between constant > + offsets should be 4. */ > +bool > +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) > +{ > + rtx base1, base2, op1; > + rtx addr1 = XEXP (mem1, 0); > + rtx addr2 = XEXP (mem2, 0); > + HOST_WIDE_INT offset1 = 0; > + HOST_WIDE_INT offset2 = 0; > + > + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) > + return false; > + > + if (REG_P (addr1)) > + base1 = addr1; > + else if (GET_CODE (addr1) == PLUS) > + { > + base1 = XEXP (addr1, 0); > + op1 = XEXP (addr1, 1); > + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) > + return false; > + offset1 = INTVAL (op1); > + } > + else > + return false; > + > + if (REG_P (addr2)) > + base2 = addr2; > + else if (GET_CODE (addr2) == PLUS) > + { > + base2 = XEXP (addr2, 0); > + op1 = XEXP (addr2, 1); > + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) > + return false; > + offset2 = INTVAL (op1); > + } > + else > + return false; > + > + if (base1 != base2) > + return false; > + > + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) > + return false; > + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) > + return false; > + > + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) > + return false; > + > + if ((offset1 + 4) == offset2) > + return true; > + if ((offset2 + 4) == offset1) > + return true; > + > + return false; > +} > + > +/* Check if the insn can be expressed as ldm/stm with less cost. */ > +bool > +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, > + rtx off1, rtx off2, bool ldrd) > +{ > + HOST_WIDE_INT offset1 = 0; > + HOST_WIDE_INT offset2 = 0; > + > + if (off1 != NULL) > + offset1 = INTVAL (off1); > + if (off2 != NULL) > + offset2 = INTVAL (off2); > + > + if (offset1 > offset2) > + { > + rtx tmp; > + HOST_WIDE_INT t = offset1; > + offset1 = offset2; > + offset2 = t; > + tmp = reg1; > + reg1 = reg2; > + reg2 = tmp; > + } > + > + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ > + if ((offset1 != -8) && (offset1 != 0)) > + return false; > + > + /* Lower register corresponds to lower memory. */ > + if (REGNO (reg1) > REGNO (reg2)) > + return false; > + > + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower > + cost. */ > + return false; > +} > + > #include "gt-arm.h" > > Index: arm-protos.h > =================================================================== > --- arm-protos.h (revision 163853) > +++ arm-protos.h (working copy) > @@ -149,7 +149,9 @@ extern void arm_expand_sync (enum machin > extern const char *arm_output_memory_barrier (rtx *); > extern const char *arm_output_sync_insn (rtx, rtx *); > extern unsigned int arm_sync_loop_insns (rtx , rtx *); > - > +extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool); > +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); > +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); > extern bool arm_output_addr_const_extra (FILE *, rtx); > > #if defined TREE_CODE > Index: ldmstm.md > =================================================================== > --- ldmstm.md (revision 163853) > +++ ldmstm.md (working copy) > @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" > (set (match_operand:SI 2 "arm_hard_register_operand" "") > (mem:SI (plus:SI (match_dup 3) > (const_int 4))))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "ldm%(ia%)\t%3, {%1, %2}" > [(set_attr "type" "load2") > (set_attr "predicable" "yes")]) > @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" > (match_operand:SI 1 "arm_hard_register_operand" "")) > (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) > (match_operand:SI 2 "arm_hard_register_operand" ""))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "stm%(ia%)\t%3, {%1, %2}" > [(set_attr "type" "store2") > (set_attr "predicable" "yes")]) > @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" > (set (match_operand:SI 2 "arm_hard_register_operand" "") > (mem:SI (plus:SI (match_dup 3) > (const_int -4))))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "ldm%(db%)\t%3, {%1, %2}" > [(set_attr "type" "load2") > (set_attr "predicable" "yes")]) > @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" > (match_operand:SI 1 "arm_hard_register_operand" "")) > (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) > (match_operand:SI 2 "arm_hard_register_operand" ""))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "stm%(db%)\t%3, {%1, %2}" > [(set_attr "type" "store2") > (set_attr "predicable" "yes")]) > Index: constraints.md > =================================================================== > --- constraints.md (revision 163853) > +++ constraints.md (working copy) > @@ -31,7 +31,7 @@ > ;; The following multi-letter normal constraints have been used: > ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz > ;; in Thumb-1 state: Pa, Pb, Pc, Pd > -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px > +;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py > > ;; The following memory constraints have been used: > ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us > @@ -189,6 +189,13 @@ (define_constraint "Px" > (and (match_code "const_int") > (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) > > +(define_constraint "Py" > + "@internal In Thumb-2 state a constant that is a multiple of 4 in the > + range -1020 to 1024" > + (and (match_code "const_int") > + (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024 > + && (ival & 3) == 0"))) > + > (define_constraint "G" > "In ARM/Thumb-2 state a valid FPA immediate constant." > (and (match_code "const_double") > > > Index: pr40457-1.c > =================================================================== > --- pr40457-1.c (revision 163853) > +++ pr40457-1.c (working copy) > @@ -1,9 +1,9 @@ > -/* { dg-options "-Os" } */ > +/* { dg-options "-O2" } */ > /* { dg-do compile } */ > > int bar(int* p) > { > - int x = p[0] + p[1]; > + int x = p[0] + p[1] + p[2]; > return x; > } > > Index: pr40457-2.c > =================================================================== > --- pr40457-2.c (revision 163853) > +++ pr40457-2.c (working copy) > @@ -5,6 +5,7 @@ void foo(int* p) > { > p[0] = 1; > p[1] = 0; > + p[2] = 2; > } > > /* { dg-final { scan-assembler "stm" } } */ > Index: pr45335.c > =================================================================== > --- pr45335.c (revision 0) > +++ pr45335.c (revision 0) > @@ -0,0 +1,22 @@ > +/* { dg-options "-mthumb -O2" } */ > +/* { dg-require-effective-target arm_thumb2_ok } */ > +/* { dg-final { scan-assembler "ldrd" } } */ > +/* { dg-final { scan-assembler "strd" } } */ > + > +struct S > +{ > + void* p1; > + void* p2; > + void* p3; > + void* p4; > +}; > + > +extern printf(char*, ...); > + > +void foo1(struct S* fp, struct S* otherSaveArea) > +{ > + struct S* saveA = fp - 1; > + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); > + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", > + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); > +} > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PING][PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-09-19 9:10 ` [PING][PATCH: " Carrot Wei @ 2010-09-25 19:25 ` Carrot Wei 2010-10-05 11:53 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-09-25 19:25 UTC (permalink / raw) To: Richard Earnshaw, Paul Brook, Nick Clifton Cc: ramana.radhakrishnan, gcc-patches Ping again. On Sun, Sep 19, 2010 at 1:59 PM, Carrot Wei <carrot@google.com> wrote: > Ping > > On Sat, Sep 4, 2010 at 8:41 PM, Carrot Wei <carrot@google.com> wrote: >> On Wed, Sep 1, 2010 at 11:22 PM, Richard Earnshaw <rearnsha@arm.com> wrote: >>> If you submit an updated patch, please re-include the changelog entry, >>> even if it's the same. >>> >>> There are two obvious problems with this patch: >>> >>> 1) You presume that ldrd is always cheaper than ldm(2 regs). This isn't >>> the case on Cortex-a9. I'm not expecting you to work out all the >>> details of when A9 should use LDM and when it should use ldrd, but your >>> code needs to ascertain the costs of each alternative and make a >>> decision based on that answer, not on a static choice. >>> >>> 2) Your code fails to check for volatile mems. These must not be >>> transformed and the original load/store instructions must be preserved. >>> >> >> 1. A new function thumb2_prefer_ldmstm is used to choose ldm/stm or ldrd/strd. >> The default behavior is to output ldrd/strd. One should update this function if >> ldm/stm is better. >> >> 2. Function thumb2_legitimate_ldrd_p is updated to check volatile memory access. >> >> Following is the new patch >> >> ChangeLog: >> 2010-09-04 Wei Guozhi <carrot@google.com> >> >> PR target/45335 >> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >> peephole2. >> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >> New insn pattern and related peephole2. >> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >> (thumb2_check_ldrd_operands): New function. >> (thumb2_prefer_ldmstm): New function. >> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. >> (thumb2_check_ldrd_operands): New prototype. >> (thumb2_prefer_ldmstm): New prototype. >> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >> Change the ldm/stm patterns with 2 words to ARM only. >> * gcc/config/arm/constraints.md (Py): New thumb2 constant constraint >> suitable to ldrd/strd instructions. >> >> >> 2010-09-04 Wei Guozhi <carrot@google.com> >> >> PR target/45335 >> * gcc.target/arm/pr45335.c: New test. >> * gcc.target/arm/pr40457-1.c: Changed to load 3 words. >> * gcc.target/arm/pr40457-2.c: Changed to store 3 words. >> >> >> >> Index: thumb2.md >> =================================================================== >> --- thumb2.md (revision 163853) >> +++ thumb2.md (working copy) >> @@ -1257,3 +1257,226 @@ (define_peephole2 >> " >> operands[2] = GEN_INT (32 - INTVAL (operands[2])); >> ") >> + >> +(define_insn "*thumb2_ldrd" >> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >> + (mem:SI (plus:SI >> + (match_operand:SI 2 "s_register_operand" "rk") >> + (match_operand:SI 3 "const_int_operand" "Py")))) >> + (set (match_operand:SI 1 "s_register_operand" "") >> + (mem:SI (plus:SI (match_dup 2) >> + (match_operand:SI 4 "const_int_operand" "Py"))))])] >> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >> + operands[2], operands[3], operands[4], 1)" >> + "* >> + { >> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >> + if (offset1 > offset2) >> + { >> + /* Swap the operands so that memory [base+offset1] is loaded into >> + operands[0]. */ >> + rtx tmp = operands[0]; >> + operands[0] = operands[1]; >> + operands[1] = tmp; >> + tmp = operands[3]; >> + operands[3] = operands[4]; >> + operands[4] = tmp; >> + offset1 = INTVAL (operands[3]); >> + offset2 = INTVAL (operands[4]); >> + } >> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >> + operands[2], operands[3], operands[4], 1)) >> + return \"ldmdb\\t%2, {%0, %1}\"; >> + else if (fix_cm3_ldrd && (operands[2] == operands[0])) >> + { >> + if (offset1 <= -256) >> + { >> + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); >> + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); >> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >> + } >> + else >> + { >> + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); >> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >> + } >> + return \"\"; >> + } >> + else >> + return \"ldrd\\t%0, %1, [%2, %3]\"; >> + }" >> +) >> + >> +(define_insn "*thumb2_ldrd_reg1" >> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >> + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) >> + (set (match_operand:SI 1 "s_register_operand" "") >> + (mem:SI (plus:SI (match_dup 2) >> + (match_operand:SI 3 "const_int_operand" "Py"))))])] >> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >> + operands[2], 0, operands[3], 1)" >> + "* >> + { >> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >> + if (offset2 == 4) >> + { >> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >> + operands[2], 0, operands[3], 1)) >> + return \"ldmia\\t%2, {%0, %1}\"; >> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >> + { >> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >> + return \"\"; >> + } >> + return \"ldrd\\t%0, %1, [%2]\"; >> + } >> + else >> + { >> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >> + { >> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >> + } >> + return \"ldrd\\t%1, %0, [%2, %3]\"; >> + } >> + }" >> +) >> + >> +(define_insn "*thumb2_ldrd_reg2" >> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >> + (mem:SI (plus:SI >> + (match_operand:SI 2 "s_register_operand" "rk") >> + (match_operand:SI 3 "const_int_operand" "Py")))) >> + (set (match_operand:SI 1 "s_register_operand" "") >> + (mem:SI (match_dup 2)))])] >> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >> + operands[2], operands[3], 0, 1)" >> + "* >> + { >> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >> + if (offset1 == -4) >> + { >> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >> + { >> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >> + return \"\"; >> + } >> + return \"ldrd\\t%0, %1, [%2, %3]\"; >> + } >> + else >> + { >> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >> + operands[2], operands[3], 0, 1)) >> + return \"ldmia\\t%2, {%1, %0}\"; >> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >> + { >> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >> + return \"\"; >> + } >> + return \"ldrd\\t%1, %0, [%2]\"; >> + } >> + }" >> +) >> + >> +(define_peephole2 >> + [(set (match_operand:SI 0 "s_register_operand" "") >> + (match_operand:SI 2 "memory_operand" "")) >> + (set (match_operand:SI 1 "s_register_operand" "") >> + (match_operand:SI 3 "memory_operand" ""))] >> + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], >> + operands[2], operands[3], 1)" >> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >> + (match_operand:SI 2 "memory_operand" "")) >> + (set (match_operand:SI 1 "s_register_operand" "") >> + (match_operand:SI 3 "memory_operand" ""))])] >> + "" >> +) >> + >> +(define_insn "*thumb2_strd" >> + [(parallel [(set (mem:SI >> + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") >> + (match_operand:SI 3 "const_int_operand" "Py"))) >> + (match_operand:SI 0 "s_register_operand" "")) >> + (set (mem:SI (plus:SI (match_dup 2) >> + (match_operand:SI 4 "const_int_operand" "Py"))) >> + (match_operand:SI 1 "s_register_operand" ""))])] >> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >> + operands[2], operands[3], operands[4], 0)" >> + "* >> + { >> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >> + operands[2], operands[3], operands[4], 0)) >> + return \"stmdb\\t%2, {%0, %1}\"; >> + if (offset1 < offset2 ) >> + return \"strd\\t%0, %1, [%2, %3]\"; >> + else >> + return \"strd\\t%1, %0, [%2, %4]\"; >> + }" >> +) >> + >> +(define_insn "*thumb2_strd_reg1" >> + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) >> + (match_operand:SI 0 "s_register_operand" "")) >> + (set (mem:SI (plus:SI (match_dup 2) >> + (match_operand:SI 3 "const_int_operand" "Py"))) >> + (match_operand:SI 1 "s_register_operand" ""))])] >> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >> + operands[2], 0, operands[3], 0)" >> + "* >> + { >> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >> + if (offset2 == 4) >> + { >> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >> + operands[2], 0, operands[3], 0)) >> + return \"stmia\\t%2, {%0, %1}\"; >> + return \"strd\\t%0, %1, [%2]\"; >> + } >> + else >> + return \"strd\\t%1, %0, [%2, %3]\"; >> + }" >> +) >> + >> +(define_insn "*thumb2_strd_reg2" >> + [(parallel [(set (mem:SI (plus:SI >> + (match_operand:SI 2 "s_register_operand" "rk") >> + (match_operand:SI 3 "const_int_operand" "Py"))) >> + (match_operand:SI 0 "s_register_operand" "")) >> + (set (mem:SI (match_dup 2)) >> + (match_operand:SI 1 "s_register_operand" ""))])] >> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >> + operands[2], operands[3], 0, 0)" >> + "* >> + { >> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >> + if (offset1 == -4) >> + return \"strd\\t%0, %1, [%2, %3]\"; >> + else >> + { >> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >> + operands[2], operands[3], 0, 0)) >> + return \"stmia\\t%2, {%1, %0}\"; >> + return \"strd\\t%1, %0, [%2]\"; >> + } >> + }" >> +) >> + >> +(define_peephole2 >> + [(set (match_operand:SI 2 "memory_operand" "") >> + (match_operand:SI 0 "s_register_operand" "")) >> + (set (match_operand:SI 3 "memory_operand" "") >> + (match_operand:SI 1 "s_register_operand" ""))] >> + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], >> + operands[2], operands[3], 0)" >> + [(parallel [(set (match_operand:SI 2 "memory_operand" "") >> + (match_operand:SI 0 "s_register_operand" "")) >> + (set (match_operand:SI 3 "memory_operand" "") >> + (match_operand:SI 1 "s_register_operand" ""))])] >> + "" >> +) >> Index: arm.c >> =================================================================== >> --- arm.c (revision 163853) >> +++ arm.c (working copy) >> @@ -22976,4 +22976,125 @@ arm_expand_sync (enum machine_mode mode, >> } >> } >> >> +/* Check the legality of operands in an ldrd/strd instruction. */ >> +bool >> +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, >> + rtx off1, rtx off2, bool ldrd) >> +{ >> + HOST_WIDE_INT offset1 = 0; >> + HOST_WIDE_INT offset2 = 0; >> + >> + if (off1 != NULL) >> + offset1 = INTVAL (off1); >> + if (off2 != NULL) >> + offset2 = INTVAL (off2); >> + >> + if (ldrd && (reg1 == reg2)) >> + return false; >> + >> + if ((offset1 + 4) == offset2) >> + return true; >> + if ((offset2 + 4) == offset1) >> + return true; >> + >> + return false; >> +} >> + >> +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. >> + That is they use the same base register, and the gap between constant >> + offsets should be 4. */ >> +bool >> +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) >> +{ >> + rtx base1, base2, op1; >> + rtx addr1 = XEXP (mem1, 0); >> + rtx addr2 = XEXP (mem2, 0); >> + HOST_WIDE_INT offset1 = 0; >> + HOST_WIDE_INT offset2 = 0; >> + >> + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) >> + return false; >> + >> + if (REG_P (addr1)) >> + base1 = addr1; >> + else if (GET_CODE (addr1) == PLUS) >> + { >> + base1 = XEXP (addr1, 0); >> + op1 = XEXP (addr1, 1); >> + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) >> + return false; >> + offset1 = INTVAL (op1); >> + } >> + else >> + return false; >> + >> + if (REG_P (addr2)) >> + base2 = addr2; >> + else if (GET_CODE (addr2) == PLUS) >> + { >> + base2 = XEXP (addr2, 0); >> + op1 = XEXP (addr2, 1); >> + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) >> + return false; >> + offset2 = INTVAL (op1); >> + } >> + else >> + return false; >> + >> + if (base1 != base2) >> + return false; >> + >> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >> + return false; >> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >> + return false; >> + >> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >> + return false; >> + >> + if ((offset1 + 4) == offset2) >> + return true; >> + if ((offset2 + 4) == offset1) >> + return true; >> + >> + return false; >> +} >> + >> +/* Check if the insn can be expressed as ldm/stm with less cost. */ >> +bool >> +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, >> + rtx off1, rtx off2, bool ldrd) >> +{ >> + HOST_WIDE_INT offset1 = 0; >> + HOST_WIDE_INT offset2 = 0; >> + >> + if (off1 != NULL) >> + offset1 = INTVAL (off1); >> + if (off2 != NULL) >> + offset2 = INTVAL (off2); >> + >> + if (offset1 > offset2) >> + { >> + rtx tmp; >> + HOST_WIDE_INT t = offset1; >> + offset1 = offset2; >> + offset2 = t; >> + tmp = reg1; >> + reg1 = reg2; >> + reg2 = tmp; >> + } >> + >> + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ >> + if ((offset1 != -8) && (offset1 != 0)) >> + return false; >> + >> + /* Lower register corresponds to lower memory. */ >> + if (REGNO (reg1) > REGNO (reg2)) >> + return false; >> + >> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >> + cost. */ >> + return false; >> +} >> + >> #include "gt-arm.h" >> >> Index: arm-protos.h >> =================================================================== >> --- arm-protos.h (revision 163853) >> +++ arm-protos.h (working copy) >> @@ -149,7 +149,9 @@ extern void arm_expand_sync (enum machin >> extern const char *arm_output_memory_barrier (rtx *); >> extern const char *arm_output_sync_insn (rtx, rtx *); >> extern unsigned int arm_sync_loop_insns (rtx , rtx *); >> - >> +extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool); >> +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); >> +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); >> extern bool arm_output_addr_const_extra (FILE *, rtx); >> >> #if defined TREE_CODE >> Index: ldmstm.md >> =================================================================== >> --- ldmstm.md (revision 163853) >> +++ ldmstm.md (working copy) >> @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" >> (set (match_operand:SI 2 "arm_hard_register_operand" "") >> (mem:SI (plus:SI (match_dup 3) >> (const_int 4))))])] >> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >> "ldm%(ia%)\t%3, {%1, %2}" >> [(set_attr "type" "load2") >> (set_attr "predicable" "yes")]) >> @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" >> (match_operand:SI 1 "arm_hard_register_operand" "")) >> (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) >> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >> "stm%(ia%)\t%3, {%1, %2}" >> [(set_attr "type" "store2") >> (set_attr "predicable" "yes")]) >> @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" >> (set (match_operand:SI 2 "arm_hard_register_operand" "") >> (mem:SI (plus:SI (match_dup 3) >> (const_int -4))))])] >> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >> "ldm%(db%)\t%3, {%1, %2}" >> [(set_attr "type" "load2") >> (set_attr "predicable" "yes")]) >> @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" >> (match_operand:SI 1 "arm_hard_register_operand" "")) >> (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) >> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >> "stm%(db%)\t%3, {%1, %2}" >> [(set_attr "type" "store2") >> (set_attr "predicable" "yes")]) >> Index: constraints.md >> =================================================================== >> --- constraints.md (revision 163853) >> +++ constraints.md (working copy) >> @@ -31,7 +31,7 @@ >> ;; The following multi-letter normal constraints have been used: >> ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz >> ;; in Thumb-1 state: Pa, Pb, Pc, Pd >> -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px >> +;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py >> >> ;; The following memory constraints have been used: >> ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us >> @@ -189,6 +189,13 @@ (define_constraint "Px" >> (and (match_code "const_int") >> (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) >> >> +(define_constraint "Py" >> + "@internal In Thumb-2 state a constant that is a multiple of 4 in the >> + range -1020 to 1024" >> + (and (match_code "const_int") >> + (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024 >> + && (ival & 3) == 0"))) >> + >> (define_constraint "G" >> "In ARM/Thumb-2 state a valid FPA immediate constant." >> (and (match_code "const_double") >> >> >> Index: pr40457-1.c >> =================================================================== >> --- pr40457-1.c (revision 163853) >> +++ pr40457-1.c (working copy) >> @@ -1,9 +1,9 @@ >> -/* { dg-options "-Os" } */ >> +/* { dg-options "-O2" } */ >> /* { dg-do compile } */ >> >> int bar(int* p) >> { >> - int x = p[0] + p[1]; >> + int x = p[0] + p[1] + p[2]; >> return x; >> } >> >> Index: pr40457-2.c >> =================================================================== >> --- pr40457-2.c (revision 163853) >> +++ pr40457-2.c (working copy) >> @@ -5,6 +5,7 @@ void foo(int* p) >> { >> p[0] = 1; >> p[1] = 0; >> + p[2] = 2; >> } >> >> /* { dg-final { scan-assembler "stm" } } */ >> Index: pr45335.c >> =================================================================== >> --- pr45335.c (revision 0) >> +++ pr45335.c (revision 0) >> @@ -0,0 +1,22 @@ >> +/* { dg-options "-mthumb -O2" } */ >> +/* { dg-require-effective-target arm_thumb2_ok } */ >> +/* { dg-final { scan-assembler "ldrd" } } */ >> +/* { dg-final { scan-assembler "strd" } } */ >> + >> +struct S >> +{ >> + void* p1; >> + void* p2; >> + void* p3; >> + void* p4; >> +}; >> + >> +extern printf(char*, ...); >> + >> +void foo1(struct S* fp, struct S* otherSaveArea) >> +{ >> + struct S* saveA = fp - 1; >> + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); >> + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", >> + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); >> +} >> > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PING][PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-09-25 19:25 ` Carrot Wei @ 2010-10-05 11:53 ` Carrot Wei 2010-10-12 9:00 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-10-05 11:53 UTC (permalink / raw) To: Richard Earnshaw, Paul Brook, Nick Clifton Cc: ramana.radhakrishnan, gcc-patches Ping ... On Sat, Sep 25, 2010 at 9:16 AM, Carrot Wei <carrot@google.com> wrote: > Ping again. > > On Sun, Sep 19, 2010 at 1:59 PM, Carrot Wei <carrot@google.com> wrote: >> Ping >> >> On Sat, Sep 4, 2010 at 8:41 PM, Carrot Wei <carrot@google.com> wrote: >>> On Wed, Sep 1, 2010 at 11:22 PM, Richard Earnshaw <rearnsha@arm.com> wrote: >>>> If you submit an updated patch, please re-include the changelog entry, >>>> even if it's the same. >>>> >>>> There are two obvious problems with this patch: >>>> >>>> 1) You presume that ldrd is always cheaper than ldm(2 regs). This isn't >>>> the case on Cortex-a9. I'm not expecting you to work out all the >>>> details of when A9 should use LDM and when it should use ldrd, but your >>>> code needs to ascertain the costs of each alternative and make a >>>> decision based on that answer, not on a static choice. >>>> >>>> 2) Your code fails to check for volatile mems. These must not be >>>> transformed and the original load/store instructions must be preserved. >>>> >>> >>> 1. A new function thumb2_prefer_ldmstm is used to choose ldm/stm or ldrd/strd. >>> The default behavior is to output ldrd/strd. One should update this function if >>> ldm/stm is better. >>> >>> 2. Function thumb2_legitimate_ldrd_p is updated to check volatile memory access. >>> >>> Following is the new patch >>> >>> ChangeLog: >>> 2010-09-04 Wei Guozhi <carrot@google.com> >>> >>> PR target/45335 >>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>> peephole2. >>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>> New insn pattern and related peephole2. >>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>> (thumb2_check_ldrd_operands): New function. >>> (thumb2_prefer_ldmstm): New function. >>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. >>> (thumb2_check_ldrd_operands): New prototype. >>> (thumb2_prefer_ldmstm): New prototype. >>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>> Change the ldm/stm patterns with 2 words to ARM only. >>> * gcc/config/arm/constraints.md (Py): New thumb2 constant constraint >>> suitable to ldrd/strd instructions. >>> >>> >>> 2010-09-04 Wei Guozhi <carrot@google.com> >>> >>> PR target/45335 >>> * gcc.target/arm/pr45335.c: New test. >>> * gcc.target/arm/pr40457-1.c: Changed to load 3 words. >>> * gcc.target/arm/pr40457-2.c: Changed to store 3 words. >>> >>> >>> >>> Index: thumb2.md >>> =================================================================== >>> --- thumb2.md (revision 163853) >>> +++ thumb2.md (working copy) >>> @@ -1257,3 +1257,226 @@ (define_peephole2 >>> " >>> operands[2] = GEN_INT (32 - INTVAL (operands[2])); >>> ") >>> + >>> +(define_insn "*thumb2_ldrd" >>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>> + (mem:SI (plus:SI >>> + (match_operand:SI 2 "s_register_operand" "rk") >>> + (match_operand:SI 3 "const_int_operand" "Py")))) >>> + (set (match_operand:SI 1 "s_register_operand" "") >>> + (mem:SI (plus:SI (match_dup 2) >>> + (match_operand:SI 4 "const_int_operand" "Py"))))])] >>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>> + operands[2], operands[3], operands[4], 1)" >>> + "* >>> + { >>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>> + if (offset1 > offset2) >>> + { >>> + /* Swap the operands so that memory [base+offset1] is loaded into >>> + operands[0]. */ >>> + rtx tmp = operands[0]; >>> + operands[0] = operands[1]; >>> + operands[1] = tmp; >>> + tmp = operands[3]; >>> + operands[3] = operands[4]; >>> + operands[4] = tmp; >>> + offset1 = INTVAL (operands[3]); >>> + offset2 = INTVAL (operands[4]); >>> + } >>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>> + operands[2], operands[3], operands[4], 1)) >>> + return \"ldmdb\\t%2, {%0, %1}\"; >>> + else if (fix_cm3_ldrd && (operands[2] == operands[0])) >>> + { >>> + if (offset1 <= -256) >>> + { >>> + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); >>> + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); >>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>> + } >>> + else >>> + { >>> + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); >>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>> + } >>> + return \"\"; >>> + } >>> + else >>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>> + }" >>> +) >>> + >>> +(define_insn "*thumb2_ldrd_reg1" >>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>> + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) >>> + (set (match_operand:SI 1 "s_register_operand" "") >>> + (mem:SI (plus:SI (match_dup 2) >>> + (match_operand:SI 3 "const_int_operand" "Py"))))])] >>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>> + operands[2], 0, operands[3], 1)" >>> + "* >>> + { >>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>> + if (offset2 == 4) >>> + { >>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>> + operands[2], 0, operands[3], 1)) >>> + return \"ldmia\\t%2, {%0, %1}\"; >>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>> + { >>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>> + return \"\"; >>> + } >>> + return \"ldrd\\t%0, %1, [%2]\"; >>> + } >>> + else >>> + { >>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>> + { >>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>> + } >>> + return \"ldrd\\t%1, %0, [%2, %3]\"; >>> + } >>> + }" >>> +) >>> + >>> +(define_insn "*thumb2_ldrd_reg2" >>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>> + (mem:SI (plus:SI >>> + (match_operand:SI 2 "s_register_operand" "rk") >>> + (match_operand:SI 3 "const_int_operand" "Py")))) >>> + (set (match_operand:SI 1 "s_register_operand" "") >>> + (mem:SI (match_dup 2)))])] >>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>> + operands[2], operands[3], 0, 1)" >>> + "* >>> + { >>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>> + if (offset1 == -4) >>> + { >>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>> + { >>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>> + return \"\"; >>> + } >>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>> + } >>> + else >>> + { >>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>> + operands[2], operands[3], 0, 1)) >>> + return \"ldmia\\t%2, {%1, %0}\"; >>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>> + { >>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>> + return \"\"; >>> + } >>> + return \"ldrd\\t%1, %0, [%2]\"; >>> + } >>> + }" >>> +) >>> + >>> +(define_peephole2 >>> + [(set (match_operand:SI 0 "s_register_operand" "") >>> + (match_operand:SI 2 "memory_operand" "")) >>> + (set (match_operand:SI 1 "s_register_operand" "") >>> + (match_operand:SI 3 "memory_operand" ""))] >>> + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>> + operands[2], operands[3], 1)" >>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>> + (match_operand:SI 2 "memory_operand" "")) >>> + (set (match_operand:SI 1 "s_register_operand" "") >>> + (match_operand:SI 3 "memory_operand" ""))])] >>> + "" >>> +) >>> + >>> +(define_insn "*thumb2_strd" >>> + [(parallel [(set (mem:SI >>> + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") >>> + (match_operand:SI 3 "const_int_operand" "Py"))) >>> + (match_operand:SI 0 "s_register_operand" "")) >>> + (set (mem:SI (plus:SI (match_dup 2) >>> + (match_operand:SI 4 "const_int_operand" "Py"))) >>> + (match_operand:SI 1 "s_register_operand" ""))])] >>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>> + operands[2], operands[3], operands[4], 0)" >>> + "* >>> + { >>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>> + operands[2], operands[3], operands[4], 0)) >>> + return \"stmdb\\t%2, {%0, %1}\"; >>> + if (offset1 < offset2 ) >>> + return \"strd\\t%0, %1, [%2, %3]\"; >>> + else >>> + return \"strd\\t%1, %0, [%2, %4]\"; >>> + }" >>> +) >>> + >>> +(define_insn "*thumb2_strd_reg1" >>> + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) >>> + (match_operand:SI 0 "s_register_operand" "")) >>> + (set (mem:SI (plus:SI (match_dup 2) >>> + (match_operand:SI 3 "const_int_operand" "Py"))) >>> + (match_operand:SI 1 "s_register_operand" ""))])] >>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>> + operands[2], 0, operands[3], 0)" >>> + "* >>> + { >>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>> + if (offset2 == 4) >>> + { >>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>> + operands[2], 0, operands[3], 0)) >>> + return \"stmia\\t%2, {%0, %1}\"; >>> + return \"strd\\t%0, %1, [%2]\"; >>> + } >>> + else >>> + return \"strd\\t%1, %0, [%2, %3]\"; >>> + }" >>> +) >>> + >>> +(define_insn "*thumb2_strd_reg2" >>> + [(parallel [(set (mem:SI (plus:SI >>> + (match_operand:SI 2 "s_register_operand" "rk") >>> + (match_operand:SI 3 "const_int_operand" "Py"))) >>> + (match_operand:SI 0 "s_register_operand" "")) >>> + (set (mem:SI (match_dup 2)) >>> + (match_operand:SI 1 "s_register_operand" ""))])] >>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>> + operands[2], operands[3], 0, 0)" >>> + "* >>> + { >>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>> + if (offset1 == -4) >>> + return \"strd\\t%0, %1, [%2, %3]\"; >>> + else >>> + { >>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>> + operands[2], operands[3], 0, 0)) >>> + return \"stmia\\t%2, {%1, %0}\"; >>> + return \"strd\\t%1, %0, [%2]\"; >>> + } >>> + }" >>> +) >>> + >>> +(define_peephole2 >>> + [(set (match_operand:SI 2 "memory_operand" "") >>> + (match_operand:SI 0 "s_register_operand" "")) >>> + (set (match_operand:SI 3 "memory_operand" "") >>> + (match_operand:SI 1 "s_register_operand" ""))] >>> + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>> + operands[2], operands[3], 0)" >>> + [(parallel [(set (match_operand:SI 2 "memory_operand" "") >>> + (match_operand:SI 0 "s_register_operand" "")) >>> + (set (match_operand:SI 3 "memory_operand" "") >>> + (match_operand:SI 1 "s_register_operand" ""))])] >>> + "" >>> +) >>> Index: arm.c >>> =================================================================== >>> --- arm.c (revision 163853) >>> +++ arm.c (working copy) >>> @@ -22976,4 +22976,125 @@ arm_expand_sync (enum machine_mode mode, >>> } >>> } >>> >>> +/* Check the legality of operands in an ldrd/strd instruction. */ >>> +bool >>> +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, >>> + rtx off1, rtx off2, bool ldrd) >>> +{ >>> + HOST_WIDE_INT offset1 = 0; >>> + HOST_WIDE_INT offset2 = 0; >>> + >>> + if (off1 != NULL) >>> + offset1 = INTVAL (off1); >>> + if (off2 != NULL) >>> + offset2 = INTVAL (off2); >>> + >>> + if (ldrd && (reg1 == reg2)) >>> + return false; >>> + >>> + if ((offset1 + 4) == offset2) >>> + return true; >>> + if ((offset2 + 4) == offset1) >>> + return true; >>> + >>> + return false; >>> +} >>> + >>> +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. >>> + That is they use the same base register, and the gap between constant >>> + offsets should be 4. */ >>> +bool >>> +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) >>> +{ >>> + rtx base1, base2, op1; >>> + rtx addr1 = XEXP (mem1, 0); >>> + rtx addr2 = XEXP (mem2, 0); >>> + HOST_WIDE_INT offset1 = 0; >>> + HOST_WIDE_INT offset2 = 0; >>> + >>> + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) >>> + return false; >>> + >>> + if (REG_P (addr1)) >>> + base1 = addr1; >>> + else if (GET_CODE (addr1) == PLUS) >>> + { >>> + base1 = XEXP (addr1, 0); >>> + op1 = XEXP (addr1, 1); >>> + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) >>> + return false; >>> + offset1 = INTVAL (op1); >>> + } >>> + else >>> + return false; >>> + >>> + if (REG_P (addr2)) >>> + base2 = addr2; >>> + else if (GET_CODE (addr2) == PLUS) >>> + { >>> + base2 = XEXP (addr2, 0); >>> + op1 = XEXP (addr2, 1); >>> + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) >>> + return false; >>> + offset2 = INTVAL (op1); >>> + } >>> + else >>> + return false; >>> + >>> + if (base1 != base2) >>> + return false; >>> + >>> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >>> + return false; >>> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >>> + return false; >>> + >>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>> + return false; >>> + >>> + if ((offset1 + 4) == offset2) >>> + return true; >>> + if ((offset2 + 4) == offset1) >>> + return true; >>> + >>> + return false; >>> +} >>> + >>> +/* Check if the insn can be expressed as ldm/stm with less cost. */ >>> +bool >>> +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, >>> + rtx off1, rtx off2, bool ldrd) >>> +{ >>> + HOST_WIDE_INT offset1 = 0; >>> + HOST_WIDE_INT offset2 = 0; >>> + >>> + if (off1 != NULL) >>> + offset1 = INTVAL (off1); >>> + if (off2 != NULL) >>> + offset2 = INTVAL (off2); >>> + >>> + if (offset1 > offset2) >>> + { >>> + rtx tmp; >>> + HOST_WIDE_INT t = offset1; >>> + offset1 = offset2; >>> + offset2 = t; >>> + tmp = reg1; >>> + reg1 = reg2; >>> + reg2 = tmp; >>> + } >>> + >>> + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ >>> + if ((offset1 != -8) && (offset1 != 0)) >>> + return false; >>> + >>> + /* Lower register corresponds to lower memory. */ >>> + if (REGNO (reg1) > REGNO (reg2)) >>> + return false; >>> + >>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>> + cost. */ >>> + return false; >>> +} >>> + >>> #include "gt-arm.h" >>> >>> Index: arm-protos.h >>> =================================================================== >>> --- arm-protos.h (revision 163853) >>> +++ arm-protos.h (working copy) >>> @@ -149,7 +149,9 @@ extern void arm_expand_sync (enum machin >>> extern const char *arm_output_memory_barrier (rtx *); >>> extern const char *arm_output_sync_insn (rtx, rtx *); >>> extern unsigned int arm_sync_loop_insns (rtx , rtx *); >>> - >>> +extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool); >>> +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); >>> +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); >>> extern bool arm_output_addr_const_extra (FILE *, rtx); >>> >>> #if defined TREE_CODE >>> Index: ldmstm.md >>> =================================================================== >>> --- ldmstm.md (revision 163853) >>> +++ ldmstm.md (working copy) >>> @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" >>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>> (mem:SI (plus:SI (match_dup 3) >>> (const_int 4))))])] >>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>> "ldm%(ia%)\t%3, {%1, %2}" >>> [(set_attr "type" "load2") >>> (set_attr "predicable" "yes")]) >>> @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" >>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>> (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) >>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>> "stm%(ia%)\t%3, {%1, %2}" >>> [(set_attr "type" "store2") >>> (set_attr "predicable" "yes")]) >>> @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" >>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>> (mem:SI (plus:SI (match_dup 3) >>> (const_int -4))))])] >>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>> "ldm%(db%)\t%3, {%1, %2}" >>> [(set_attr "type" "load2") >>> (set_attr "predicable" "yes")]) >>> @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" >>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>> (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) >>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>> "stm%(db%)\t%3, {%1, %2}" >>> [(set_attr "type" "store2") >>> (set_attr "predicable" "yes")]) >>> Index: constraints.md >>> =================================================================== >>> --- constraints.md (revision 163853) >>> +++ constraints.md (working copy) >>> @@ -31,7 +31,7 @@ >>> ;; The following multi-letter normal constraints have been used: >>> ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz >>> ;; in Thumb-1 state: Pa, Pb, Pc, Pd >>> -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px >>> +;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py >>> >>> ;; The following memory constraints have been used: >>> ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us >>> @@ -189,6 +189,13 @@ (define_constraint "Px" >>> (and (match_code "const_int") >>> (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) >>> >>> +(define_constraint "Py" >>> + "@internal In Thumb-2 state a constant that is a multiple of 4 in the >>> + range -1020 to 1024" >>> + (and (match_code "const_int") >>> + (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024 >>> + && (ival & 3) == 0"))) >>> + >>> (define_constraint "G" >>> "In ARM/Thumb-2 state a valid FPA immediate constant." >>> (and (match_code "const_double") >>> >>> >>> Index: pr40457-1.c >>> =================================================================== >>> --- pr40457-1.c (revision 163853) >>> +++ pr40457-1.c (working copy) >>> @@ -1,9 +1,9 @@ >>> -/* { dg-options "-Os" } */ >>> +/* { dg-options "-O2" } */ >>> /* { dg-do compile } */ >>> >>> int bar(int* p) >>> { >>> - int x = p[0] + p[1]; >>> + int x = p[0] + p[1] + p[2]; >>> return x; >>> } >>> >>> Index: pr40457-2.c >>> =================================================================== >>> --- pr40457-2.c (revision 163853) >>> +++ pr40457-2.c (working copy) >>> @@ -5,6 +5,7 @@ void foo(int* p) >>> { >>> p[0] = 1; >>> p[1] = 0; >>> + p[2] = 2; >>> } >>> >>> /* { dg-final { scan-assembler "stm" } } */ >>> Index: pr45335.c >>> =================================================================== >>> --- pr45335.c (revision 0) >>> +++ pr45335.c (revision 0) >>> @@ -0,0 +1,22 @@ >>> +/* { dg-options "-mthumb -O2" } */ >>> +/* { dg-require-effective-target arm_thumb2_ok } */ >>> +/* { dg-final { scan-assembler "ldrd" } } */ >>> +/* { dg-final { scan-assembler "strd" } } */ >>> + >>> +struct S >>> +{ >>> + void* p1; >>> + void* p2; >>> + void* p3; >>> + void* p4; >>> +}; >>> + >>> +extern printf(char*, ...); >>> + >>> +void foo1(struct S* fp, struct S* otherSaveArea) >>> +{ >>> + struct S* saveA = fp - 1; >>> + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); >>> + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", >>> + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); >>> +} >>> >> > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PING][PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-10-05 11:53 ` Carrot Wei @ 2010-10-12 9:00 ` Carrot Wei 2010-10-12 15:37 ` Ian Lance Taylor 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-10-12 9:00 UTC (permalink / raw) To: Richard Earnshaw, Paul Brook, Nick Clifton Cc: ramana.radhakrishnan, gcc-patches Ping On Tue, Oct 5, 2010 at 7:52 PM, Carrot Wei <carrot@google.com> wrote: > Ping ... > > On Sat, Sep 25, 2010 at 9:16 AM, Carrot Wei <carrot@google.com> wrote: >> Ping again. >> >> On Sun, Sep 19, 2010 at 1:59 PM, Carrot Wei <carrot@google.com> wrote: >>> Ping >>> >>> On Sat, Sep 4, 2010 at 8:41 PM, Carrot Wei <carrot@google.com> wrote: >>>> On Wed, Sep 1, 2010 at 11:22 PM, Richard Earnshaw <rearnsha@arm.com> wrote: >>>>> If you submit an updated patch, please re-include the changelog entry, >>>>> even if it's the same. >>>>> >>>>> There are two obvious problems with this patch: >>>>> >>>>> 1) You presume that ldrd is always cheaper than ldm(2 regs). This isn't >>>>> the case on Cortex-a9. I'm not expecting you to work out all the >>>>> details of when A9 should use LDM and when it should use ldrd, but your >>>>> code needs to ascertain the costs of each alternative and make a >>>>> decision based on that answer, not on a static choice. >>>>> >>>>> 2) Your code fails to check for volatile mems. These must not be >>>>> transformed and the original load/store instructions must be preserved. >>>>> >>>> >>>> 1. A new function thumb2_prefer_ldmstm is used to choose ldm/stm or ldrd/strd. >>>> The default behavior is to output ldrd/strd. One should update this function if >>>> ldm/stm is better. >>>> >>>> 2. Function thumb2_legitimate_ldrd_p is updated to check volatile memory access. >>>> >>>> Following is the new patch >>>> >>>> ChangeLog: >>>> 2010-09-04 Wei Guozhi <carrot@google.com> >>>> >>>> PR target/45335 >>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>>> peephole2. >>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>>> New insn pattern and related peephole2. >>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>>> (thumb2_check_ldrd_operands): New function. >>>> (thumb2_prefer_ldmstm): New function. >>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. >>>> (thumb2_check_ldrd_operands): New prototype. >>>> (thumb2_prefer_ldmstm): New prototype. >>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>>> Change the ldm/stm patterns with 2 words to ARM only. >>>> * gcc/config/arm/constraints.md (Py): New thumb2 constant constraint >>>> suitable to ldrd/strd instructions. >>>> >>>> >>>> 2010-09-04 Wei Guozhi <carrot@google.com> >>>> >>>> PR target/45335 >>>> * gcc.target/arm/pr45335.c: New test. >>>> * gcc.target/arm/pr40457-1.c: Changed to load 3 words. >>>> * gcc.target/arm/pr40457-2.c: Changed to store 3 words. >>>> >>>> >>>> >>>> Index: thumb2.md >>>> =================================================================== >>>> --- thumb2.md (revision 163853) >>>> +++ thumb2.md (working copy) >>>> @@ -1257,3 +1257,226 @@ (define_peephole2 >>>> " >>>> operands[2] = GEN_INT (32 - INTVAL (operands[2])); >>>> ") >>>> + >>>> +(define_insn "*thumb2_ldrd" >>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>> + (mem:SI (plus:SI >>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>> + (match_operand:SI 3 "const_int_operand" "Py")))) >>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>> + (mem:SI (plus:SI (match_dup 2) >>>> + (match_operand:SI 4 "const_int_operand" "Py"))))])] >>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>> + operands[2], operands[3], operands[4], 1)" >>>> + "* >>>> + { >>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>>> + if (offset1 > offset2) >>>> + { >>>> + /* Swap the operands so that memory [base+offset1] is loaded into >>>> + operands[0]. */ >>>> + rtx tmp = operands[0]; >>>> + operands[0] = operands[1]; >>>> + operands[1] = tmp; >>>> + tmp = operands[3]; >>>> + operands[3] = operands[4]; >>>> + operands[4] = tmp; >>>> + offset1 = INTVAL (operands[3]); >>>> + offset2 = INTVAL (operands[4]); >>>> + } >>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>> + operands[2], operands[3], operands[4], 1)) >>>> + return \"ldmdb\\t%2, {%0, %1}\"; >>>> + else if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>> + { >>>> + if (offset1 <= -256) >>>> + { >>>> + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); >>>> + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); >>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>> + } >>>> + else >>>> + { >>>> + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); >>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>> + } >>>> + return \"\"; >>>> + } >>>> + else >>>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>>> + }" >>>> +) >>>> + >>>> +(define_insn "*thumb2_ldrd_reg1" >>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>> + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) >>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>> + (mem:SI (plus:SI (match_dup 2) >>>> + (match_operand:SI 3 "const_int_operand" "Py"))))])] >>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>> + operands[2], 0, operands[3], 1)" >>>> + "* >>>> + { >>>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>>> + if (offset2 == 4) >>>> + { >>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>> + operands[2], 0, operands[3], 1)) >>>> + return \"ldmia\\t%2, {%0, %1}\"; >>>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>> + { >>>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>> + return \"\"; >>>> + } >>>> + return \"ldrd\\t%0, %1, [%2]\"; >>>> + } >>>> + else >>>> + { >>>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>>> + { >>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>>> + } >>>> + return \"ldrd\\t%1, %0, [%2, %3]\"; >>>> + } >>>> + }" >>>> +) >>>> + >>>> +(define_insn "*thumb2_ldrd_reg2" >>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>> + (mem:SI (plus:SI >>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>> + (match_operand:SI 3 "const_int_operand" "Py")))) >>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>> + (mem:SI (match_dup 2)))])] >>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>> + operands[2], operands[3], 0, 1)" >>>> + "* >>>> + { >>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>> + if (offset1 == -4) >>>> + { >>>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>> + { >>>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>> + return \"\"; >>>> + } >>>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>>> + } >>>> + else >>>> + { >>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>> + operands[2], operands[3], 0, 1)) >>>> + return \"ldmia\\t%2, {%1, %0}\"; >>>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>>> + { >>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>>> + return \"\"; >>>> + } >>>> + return \"ldrd\\t%1, %0, [%2]\"; >>>> + } >>>> + }" >>>> +) >>>> + >>>> +(define_peephole2 >>>> + [(set (match_operand:SI 0 "s_register_operand" "") >>>> + (match_operand:SI 2 "memory_operand" "")) >>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>> + (match_operand:SI 3 "memory_operand" ""))] >>>> + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>>> + operands[2], operands[3], 1)" >>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>> + (match_operand:SI 2 "memory_operand" "")) >>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>> + (match_operand:SI 3 "memory_operand" ""))])] >>>> + "" >>>> +) >>>> + >>>> +(define_insn "*thumb2_strd" >>>> + [(parallel [(set (mem:SI >>>> + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") >>>> + (match_operand:SI 3 "const_int_operand" "Py"))) >>>> + (match_operand:SI 0 "s_register_operand" "")) >>>> + (set (mem:SI (plus:SI (match_dup 2) >>>> + (match_operand:SI 4 "const_int_operand" "Py"))) >>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>> + operands[2], operands[3], operands[4], 0)" >>>> + "* >>>> + { >>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>> + operands[2], operands[3], operands[4], 0)) >>>> + return \"stmdb\\t%2, {%0, %1}\"; >>>> + if (offset1 < offset2 ) >>>> + return \"strd\\t%0, %1, [%2, %3]\"; >>>> + else >>>> + return \"strd\\t%1, %0, [%2, %4]\"; >>>> + }" >>>> +) >>>> + >>>> +(define_insn "*thumb2_strd_reg1" >>>> + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) >>>> + (match_operand:SI 0 "s_register_operand" "")) >>>> + (set (mem:SI (plus:SI (match_dup 2) >>>> + (match_operand:SI 3 "const_int_operand" "Py"))) >>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>> + operands[2], 0, operands[3], 0)" >>>> + "* >>>> + { >>>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>>> + if (offset2 == 4) >>>> + { >>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>> + operands[2], 0, operands[3], 0)) >>>> + return \"stmia\\t%2, {%0, %1}\"; >>>> + return \"strd\\t%0, %1, [%2]\"; >>>> + } >>>> + else >>>> + return \"strd\\t%1, %0, [%2, %3]\"; >>>> + }" >>>> +) >>>> + >>>> +(define_insn "*thumb2_strd_reg2" >>>> + [(parallel [(set (mem:SI (plus:SI >>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>> + (match_operand:SI 3 "const_int_operand" "Py"))) >>>> + (match_operand:SI 0 "s_register_operand" "")) >>>> + (set (mem:SI (match_dup 2)) >>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>> + operands[2], operands[3], 0, 0)" >>>> + "* >>>> + { >>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>> + if (offset1 == -4) >>>> + return \"strd\\t%0, %1, [%2, %3]\"; >>>> + else >>>> + { >>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>> + operands[2], operands[3], 0, 0)) >>>> + return \"stmia\\t%2, {%1, %0}\"; >>>> + return \"strd\\t%1, %0, [%2]\"; >>>> + } >>>> + }" >>>> +) >>>> + >>>> +(define_peephole2 >>>> + [(set (match_operand:SI 2 "memory_operand" "") >>>> + (match_operand:SI 0 "s_register_operand" "")) >>>> + (set (match_operand:SI 3 "memory_operand" "") >>>> + (match_operand:SI 1 "s_register_operand" ""))] >>>> + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>>> + operands[2], operands[3], 0)" >>>> + [(parallel [(set (match_operand:SI 2 "memory_operand" "") >>>> + (match_operand:SI 0 "s_register_operand" "")) >>>> + (set (match_operand:SI 3 "memory_operand" "") >>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>> + "" >>>> +) >>>> Index: arm.c >>>> =================================================================== >>>> --- arm.c (revision 163853) >>>> +++ arm.c (working copy) >>>> @@ -22976,4 +22976,125 @@ arm_expand_sync (enum machine_mode mode, >>>> } >>>> } >>>> >>>> +/* Check the legality of operands in an ldrd/strd instruction. */ >>>> +bool >>>> +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, >>>> + rtx off1, rtx off2, bool ldrd) >>>> +{ >>>> + HOST_WIDE_INT offset1 = 0; >>>> + HOST_WIDE_INT offset2 = 0; >>>> + >>>> + if (off1 != NULL) >>>> + offset1 = INTVAL (off1); >>>> + if (off2 != NULL) >>>> + offset2 = INTVAL (off2); >>>> + >>>> + if (ldrd && (reg1 == reg2)) >>>> + return false; >>>> + >>>> + if ((offset1 + 4) == offset2) >>>> + return true; >>>> + if ((offset2 + 4) == offset1) >>>> + return true; >>>> + >>>> + return false; >>>> +} >>>> + >>>> +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. >>>> + That is they use the same base register, and the gap between constant >>>> + offsets should be 4. */ >>>> +bool >>>> +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) >>>> +{ >>>> + rtx base1, base2, op1; >>>> + rtx addr1 = XEXP (mem1, 0); >>>> + rtx addr2 = XEXP (mem2, 0); >>>> + HOST_WIDE_INT offset1 = 0; >>>> + HOST_WIDE_INT offset2 = 0; >>>> + >>>> + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) >>>> + return false; >>>> + >>>> + if (REG_P (addr1)) >>>> + base1 = addr1; >>>> + else if (GET_CODE (addr1) == PLUS) >>>> + { >>>> + base1 = XEXP (addr1, 0); >>>> + op1 = XEXP (addr1, 1); >>>> + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) >>>> + return false; >>>> + offset1 = INTVAL (op1); >>>> + } >>>> + else >>>> + return false; >>>> + >>>> + if (REG_P (addr2)) >>>> + base2 = addr2; >>>> + else if (GET_CODE (addr2) == PLUS) >>>> + { >>>> + base2 = XEXP (addr2, 0); >>>> + op1 = XEXP (addr2, 1); >>>> + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) >>>> + return false; >>>> + offset2 = INTVAL (op1); >>>> + } >>>> + else >>>> + return false; >>>> + >>>> + if (base1 != base2) >>>> + return false; >>>> + >>>> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >>>> + return false; >>>> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >>>> + return false; >>>> + >>>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>>> + return false; >>>> + >>>> + if ((offset1 + 4) == offset2) >>>> + return true; >>>> + if ((offset2 + 4) == offset1) >>>> + return true; >>>> + >>>> + return false; >>>> +} >>>> + >>>> +/* Check if the insn can be expressed as ldm/stm with less cost. */ >>>> +bool >>>> +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, >>>> + rtx off1, rtx off2, bool ldrd) >>>> +{ >>>> + HOST_WIDE_INT offset1 = 0; >>>> + HOST_WIDE_INT offset2 = 0; >>>> + >>>> + if (off1 != NULL) >>>> + offset1 = INTVAL (off1); >>>> + if (off2 != NULL) >>>> + offset2 = INTVAL (off2); >>>> + >>>> + if (offset1 > offset2) >>>> + { >>>> + rtx tmp; >>>> + HOST_WIDE_INT t = offset1; >>>> + offset1 = offset2; >>>> + offset2 = t; >>>> + tmp = reg1; >>>> + reg1 = reg2; >>>> + reg2 = tmp; >>>> + } >>>> + >>>> + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ >>>> + if ((offset1 != -8) && (offset1 != 0)) >>>> + return false; >>>> + >>>> + /* Lower register corresponds to lower memory. */ >>>> + if (REGNO (reg1) > REGNO (reg2)) >>>> + return false; >>>> + >>>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>>> + cost. */ >>>> + return false; >>>> +} >>>> + >>>> #include "gt-arm.h" >>>> >>>> Index: arm-protos.h >>>> =================================================================== >>>> --- arm-protos.h (revision 163853) >>>> +++ arm-protos.h (working copy) >>>> @@ -149,7 +149,9 @@ extern void arm_expand_sync (enum machin >>>> extern const char *arm_output_memory_barrier (rtx *); >>>> extern const char *arm_output_sync_insn (rtx, rtx *); >>>> extern unsigned int arm_sync_loop_insns (rtx , rtx *); >>>> - >>>> +extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool); >>>> +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); >>>> +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); >>>> extern bool arm_output_addr_const_extra (FILE *, rtx); >>>> >>>> #if defined TREE_CODE >>>> Index: ldmstm.md >>>> =================================================================== >>>> --- ldmstm.md (revision 163853) >>>> +++ ldmstm.md (working copy) >>>> @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" >>>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>>> (mem:SI (plus:SI (match_dup 3) >>>> (const_int 4))))])] >>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>> "ldm%(ia%)\t%3, {%1, %2}" >>>> [(set_attr "type" "load2") >>>> (set_attr "predicable" "yes")]) >>>> @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" >>>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>>> (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) >>>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>> "stm%(ia%)\t%3, {%1, %2}" >>>> [(set_attr "type" "store2") >>>> (set_attr "predicable" "yes")]) >>>> @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" >>>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>>> (mem:SI (plus:SI (match_dup 3) >>>> (const_int -4))))])] >>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>> "ldm%(db%)\t%3, {%1, %2}" >>>> [(set_attr "type" "load2") >>>> (set_attr "predicable" "yes")]) >>>> @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" >>>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>>> (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) >>>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>> "stm%(db%)\t%3, {%1, %2}" >>>> [(set_attr "type" "store2") >>>> (set_attr "predicable" "yes")]) >>>> Index: constraints.md >>>> =================================================================== >>>> --- constraints.md (revision 163853) >>>> +++ constraints.md (working copy) >>>> @@ -31,7 +31,7 @@ >>>> ;; The following multi-letter normal constraints have been used: >>>> ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz >>>> ;; in Thumb-1 state: Pa, Pb, Pc, Pd >>>> -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px >>>> +;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py >>>> >>>> ;; The following memory constraints have been used: >>>> ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us >>>> @@ -189,6 +189,13 @@ (define_constraint "Px" >>>> (and (match_code "const_int") >>>> (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) >>>> >>>> +(define_constraint "Py" >>>> + "@internal In Thumb-2 state a constant that is a multiple of 4 in the >>>> + range -1020 to 1024" >>>> + (and (match_code "const_int") >>>> + (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024 >>>> + && (ival & 3) == 0"))) >>>> + >>>> (define_constraint "G" >>>> "In ARM/Thumb-2 state a valid FPA immediate constant." >>>> (and (match_code "const_double") >>>> >>>> >>>> Index: pr40457-1.c >>>> =================================================================== >>>> --- pr40457-1.c (revision 163853) >>>> +++ pr40457-1.c (working copy) >>>> @@ -1,9 +1,9 @@ >>>> -/* { dg-options "-Os" } */ >>>> +/* { dg-options "-O2" } */ >>>> /* { dg-do compile } */ >>>> >>>> int bar(int* p) >>>> { >>>> - int x = p[0] + p[1]; >>>> + int x = p[0] + p[1] + p[2]; >>>> return x; >>>> } >>>> >>>> Index: pr40457-2.c >>>> =================================================================== >>>> --- pr40457-2.c (revision 163853) >>>> +++ pr40457-2.c (working copy) >>>> @@ -5,6 +5,7 @@ void foo(int* p) >>>> { >>>> p[0] = 1; >>>> p[1] = 0; >>>> + p[2] = 2; >>>> } >>>> >>>> /* { dg-final { scan-assembler "stm" } } */ >>>> Index: pr45335.c >>>> =================================================================== >>>> --- pr45335.c (revision 0) >>>> +++ pr45335.c (revision 0) >>>> @@ -0,0 +1,22 @@ >>>> +/* { dg-options "-mthumb -O2" } */ >>>> +/* { dg-require-effective-target arm_thumb2_ok } */ >>>> +/* { dg-final { scan-assembler "ldrd" } } */ >>>> +/* { dg-final { scan-assembler "strd" } } */ >>>> + >>>> +struct S >>>> +{ >>>> + void* p1; >>>> + void* p2; >>>> + void* p3; >>>> + void* p4; >>>> +}; >>>> + >>>> +extern printf(char*, ...); >>>> + >>>> +void foo1(struct S* fp, struct S* otherSaveArea) >>>> +{ >>>> + struct S* saveA = fp - 1; >>>> + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); >>>> + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", >>>> + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); >>>> +} >>>> >>> >> > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PING][PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-10-12 9:00 ` Carrot Wei @ 2010-10-12 15:37 ` Ian Lance Taylor 0 siblings, 0 replies; 46+ messages in thread From: Ian Lance Taylor @ 2010-10-12 15:37 UTC (permalink / raw) To: Carrot Wei Cc: Richard Earnshaw, Paul Brook, Nick Clifton, ramana.radhakrishnan, gcc-patches Carrot is reasonably asking (internally at Google) how to get a patch reviewed. This is a localized ARM specific patch that's been outstanding for over a month. He's been pinging it regularly. I don't particularly want to review this patch myself both because i don't know Thumb2 and because I tend to not review non-obvious patches by people I work with. Carrot, I took a look at the patch and I have only two style comments. The first is that instead of writing condition && function(arg1, arg2 arg3, arg4) you should write condition && function(arg1, arg2, arg3, arg4) The second is that you should avoid using words like "legality" in your comment. All code is legal, in the sense that it is permitted by the law. The word you want here is "validity." Can some ARM maintainer please take a look some time this week? Thanks. Ian On Tue, Oct 12, 2010 at 1:52 AM, Carrot Wei <carrot@google.com> wrote: > Ping > > On Tue, Oct 5, 2010 at 7:52 PM, Carrot Wei <carrot@google.com> wrote: >> Ping ... >> >> On Sat, Sep 25, 2010 at 9:16 AM, Carrot Wei <carrot@google.com> wrote: >>> Ping again. >>> >>> On Sun, Sep 19, 2010 at 1:59 PM, Carrot Wei <carrot@google.com> wrote: >>>> Ping >>>> >>>> On Sat, Sep 4, 2010 at 8:41 PM, Carrot Wei <carrot@google.com> wrote: >>>>> On Wed, Sep 1, 2010 at 11:22 PM, Richard Earnshaw <rearnsha@arm.com> wrote: >>>>>> If you submit an updated patch, please re-include the changelog entry, >>>>>> even if it's the same. >>>>>> >>>>>> There are two obvious problems with this patch: >>>>>> >>>>>> 1) You presume that ldrd is always cheaper than ldm(2 regs). This isn't >>>>>> the case on Cortex-a9. I'm not expecting you to work out all the >>>>>> details of when A9 should use LDM and when it should use ldrd, but your >>>>>> code needs to ascertain the costs of each alternative and make a >>>>>> decision based on that answer, not on a static choice. >>>>>> >>>>>> 2) Your code fails to check for volatile mems. These must not be >>>>>> transformed and the original load/store instructions must be preserved. >>>>>> >>>>> >>>>> 1. A new function thumb2_prefer_ldmstm is used to choose ldm/stm or ldrd/strd. >>>>> The default behavior is to output ldrd/strd. One should update this function if >>>>> ldm/stm is better. >>>>> >>>>> 2. Function thumb2_legitimate_ldrd_p is updated to check volatile memory access. >>>>> >>>>> Following is the new patch >>>>> >>>>> ChangeLog: >>>>> 2010-09-04 Wei Guozhi <carrot@google.com> >>>>> >>>>> PR target/45335 >>>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>>>> peephole2. >>>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>>>> New insn pattern and related peephole2. >>>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>>>> (thumb2_check_ldrd_operands): New function. >>>>> (thumb2_prefer_ldmstm): New function. >>>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. >>>>> (thumb2_check_ldrd_operands): New prototype. >>>>> (thumb2_prefer_ldmstm): New prototype. >>>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>>>> Change the ldm/stm patterns with 2 words to ARM only. >>>>> * gcc/config/arm/constraints.md (Py): New thumb2 constant constraint >>>>> suitable to ldrd/strd instructions. >>>>> >>>>> >>>>> 2010-09-04 Wei Guozhi <carrot@google.com> >>>>> >>>>> PR target/45335 >>>>> * gcc.target/arm/pr45335.c: New test. >>>>> * gcc.target/arm/pr40457-1.c: Changed to load 3 words. >>>>> * gcc.target/arm/pr40457-2.c: Changed to store 3 words. >>>>> >>>>> >>>>> >>>>> Index: thumb2.md >>>>> =================================================================== >>>>> --- thumb2.md (revision 163853) >>>>> +++ thumb2.md (working copy) >>>>> @@ -1257,3 +1257,226 @@ (define_peephole2 >>>>> " >>>>> operands[2] = GEN_INT (32 - INTVAL (operands[2])); >>>>> ") >>>>> + >>>>> +(define_insn "*thumb2_ldrd" >>>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>>> + (mem:SI (plus:SI >>>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>>> + (match_operand:SI 3 "const_int_operand" "Py")))) >>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>> + (mem:SI (plus:SI (match_dup 2) >>>>> + (match_operand:SI 4 "const_int_operand" "Py"))))])] >>>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>>> + operands[2], operands[3], operands[4], 1)" >>>>> + "* >>>>> + { >>>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>>>> + if (offset1 > offset2) >>>>> + { >>>>> + /* Swap the operands so that memory [base+offset1] is loaded into >>>>> + operands[0]. */ >>>>> + rtx tmp = operands[0]; >>>>> + operands[0] = operands[1]; >>>>> + operands[1] = tmp; >>>>> + tmp = operands[3]; >>>>> + operands[3] = operands[4]; >>>>> + operands[4] = tmp; >>>>> + offset1 = INTVAL (operands[3]); >>>>> + offset2 = INTVAL (operands[4]); >>>>> + } >>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>> + operands[2], operands[3], operands[4], 1)) >>>>> + return \"ldmdb\\t%2, {%0, %1}\"; >>>>> + else if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>>> + { >>>>> + if (offset1 <= -256) >>>>> + { >>>>> + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); >>>>> + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); >>>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>>> + } >>>>> + else >>>>> + { >>>>> + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); >>>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>>> + } >>>>> + return \"\"; >>>>> + } >>>>> + else >>>>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>>>> + }" >>>>> +) >>>>> + >>>>> +(define_insn "*thumb2_ldrd_reg1" >>>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>>> + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) >>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>> + (mem:SI (plus:SI (match_dup 2) >>>>> + (match_operand:SI 3 "const_int_operand" "Py"))))])] >>>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>>> + operands[2], 0, operands[3], 1)" >>>>> + "* >>>>> + { >>>>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>>>> + if (offset2 == 4) >>>>> + { >>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>> + operands[2], 0, operands[3], 1)) >>>>> + return \"ldmia\\t%2, {%0, %1}\"; >>>>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>>> + { >>>>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>>> + return \"\"; >>>>> + } >>>>> + return \"ldrd\\t%0, %1, [%2]\"; >>>>> + } >>>>> + else >>>>> + { >>>>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>>>> + { >>>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>>>> + } >>>>> + return \"ldrd\\t%1, %0, [%2, %3]\"; >>>>> + } >>>>> + }" >>>>> +) >>>>> + >>>>> +(define_insn "*thumb2_ldrd_reg2" >>>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>>> + (mem:SI (plus:SI >>>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>>> + (match_operand:SI 3 "const_int_operand" "Py")))) >>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>> + (mem:SI (match_dup 2)))])] >>>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>>> + operands[2], operands[3], 0, 1)" >>>>> + "* >>>>> + { >>>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>>> + if (offset1 == -4) >>>>> + { >>>>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>>> + { >>>>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>>> + return \"\"; >>>>> + } >>>>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>>>> + } >>>>> + else >>>>> + { >>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>> + operands[2], operands[3], 0, 1)) >>>>> + return \"ldmia\\t%2, {%1, %0}\"; >>>>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>>>> + { >>>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>>>> + return \"\"; >>>>> + } >>>>> + return \"ldrd\\t%1, %0, [%2]\"; >>>>> + } >>>>> + }" >>>>> +) >>>>> + >>>>> +(define_peephole2 >>>>> + [(set (match_operand:SI 0 "s_register_operand" "") >>>>> + (match_operand:SI 2 "memory_operand" "")) >>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>> + (match_operand:SI 3 "memory_operand" ""))] >>>>> + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>>>> + operands[2], operands[3], 1)" >>>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>>> + (match_operand:SI 2 "memory_operand" "")) >>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>> + (match_operand:SI 3 "memory_operand" ""))])] >>>>> + "" >>>>> +) >>>>> + >>>>> +(define_insn "*thumb2_strd" >>>>> + [(parallel [(set (mem:SI >>>>> + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") >>>>> + (match_operand:SI 3 "const_int_operand" "Py"))) >>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>> + (set (mem:SI (plus:SI (match_dup 2) >>>>> + (match_operand:SI 4 "const_int_operand" "Py"))) >>>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>>> + operands[2], operands[3], operands[4], 0)" >>>>> + "* >>>>> + { >>>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>> + operands[2], operands[3], operands[4], 0)) >>>>> + return \"stmdb\\t%2, {%0, %1}\"; >>>>> + if (offset1 < offset2 ) >>>>> + return \"strd\\t%0, %1, [%2, %3]\"; >>>>> + else >>>>> + return \"strd\\t%1, %0, [%2, %4]\"; >>>>> + }" >>>>> +) >>>>> + >>>>> +(define_insn "*thumb2_strd_reg1" >>>>> + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) >>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>> + (set (mem:SI (plus:SI (match_dup 2) >>>>> + (match_operand:SI 3 "const_int_operand" "Py"))) >>>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>>> + operands[2], 0, operands[3], 0)" >>>>> + "* >>>>> + { >>>>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>>>> + if (offset2 == 4) >>>>> + { >>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>> + operands[2], 0, operands[3], 0)) >>>>> + return \"stmia\\t%2, {%0, %1}\"; >>>>> + return \"strd\\t%0, %1, [%2]\"; >>>>> + } >>>>> + else >>>>> + return \"strd\\t%1, %0, [%2, %3]\"; >>>>> + }" >>>>> +) >>>>> + >>>>> +(define_insn "*thumb2_strd_reg2" >>>>> + [(parallel [(set (mem:SI (plus:SI >>>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>>> + (match_operand:SI 3 "const_int_operand" "Py"))) >>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>> + (set (mem:SI (match_dup 2)) >>>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>>> + operands[2], operands[3], 0, 0)" >>>>> + "* >>>>> + { >>>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>>> + if (offset1 == -4) >>>>> + return \"strd\\t%0, %1, [%2, %3]\"; >>>>> + else >>>>> + { >>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>> + operands[2], operands[3], 0, 0)) >>>>> + return \"stmia\\t%2, {%1, %0}\"; >>>>> + return \"strd\\t%1, %0, [%2]\"; >>>>> + } >>>>> + }" >>>>> +) >>>>> + >>>>> +(define_peephole2 >>>>> + [(set (match_operand:SI 2 "memory_operand" "") >>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>> + (set (match_operand:SI 3 "memory_operand" "") >>>>> + (match_operand:SI 1 "s_register_operand" ""))] >>>>> + "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>>>> + operands[2], operands[3], 0)" >>>>> + [(parallel [(set (match_operand:SI 2 "memory_operand" "") >>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>> + (set (match_operand:SI 3 "memory_operand" "") >>>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>>> + "" >>>>> +) >>>>> Index: arm.c >>>>> =================================================================== >>>>> --- arm.c (revision 163853) >>>>> +++ arm.c (working copy) >>>>> @@ -22976,4 +22976,125 @@ arm_expand_sync (enum machine_mode mode, >>>>> } >>>>> } >>>>> >>>>> +/* Check the legality of operands in an ldrd/strd instruction. */ >>>>> +bool >>>>> +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, >>>>> + rtx off1, rtx off2, bool ldrd) >>>>> +{ >>>>> + HOST_WIDE_INT offset1 = 0; >>>>> + HOST_WIDE_INT offset2 = 0; >>>>> + >>>>> + if (off1 != NULL) >>>>> + offset1 = INTVAL (off1); >>>>> + if (off2 != NULL) >>>>> + offset2 = INTVAL (off2); >>>>> + >>>>> + if (ldrd && (reg1 == reg2)) >>>>> + return false; >>>>> + >>>>> + if ((offset1 + 4) == offset2) >>>>> + return true; >>>>> + if ((offset2 + 4) == offset1) >>>>> + return true; >>>>> + >>>>> + return false; >>>>> +} >>>>> + >>>>> +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. >>>>> + That is they use the same base register, and the gap between constant >>>>> + offsets should be 4. */ >>>>> +bool >>>>> +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) >>>>> +{ >>>>> + rtx base1, base2, op1; >>>>> + rtx addr1 = XEXP (mem1, 0); >>>>> + rtx addr2 = XEXP (mem2, 0); >>>>> + HOST_WIDE_INT offset1 = 0; >>>>> + HOST_WIDE_INT offset2 = 0; >>>>> + >>>>> + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) >>>>> + return false; >>>>> + >>>>> + if (REG_P (addr1)) >>>>> + base1 = addr1; >>>>> + else if (GET_CODE (addr1) == PLUS) >>>>> + { >>>>> + base1 = XEXP (addr1, 0); >>>>> + op1 = XEXP (addr1, 1); >>>>> + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) >>>>> + return false; >>>>> + offset1 = INTVAL (op1); >>>>> + } >>>>> + else >>>>> + return false; >>>>> + >>>>> + if (REG_P (addr2)) >>>>> + base2 = addr2; >>>>> + else if (GET_CODE (addr2) == PLUS) >>>>> + { >>>>> + base2 = XEXP (addr2, 0); >>>>> + op1 = XEXP (addr2, 1); >>>>> + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) >>>>> + return false; >>>>> + offset2 = INTVAL (op1); >>>>> + } >>>>> + else >>>>> + return false; >>>>> + >>>>> + if (base1 != base2) >>>>> + return false; >>>>> + >>>>> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >>>>> + return false; >>>>> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >>>>> + return false; >>>>> + >>>>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>>>> + return false; >>>>> + >>>>> + if ((offset1 + 4) == offset2) >>>>> + return true; >>>>> + if ((offset2 + 4) == offset1) >>>>> + return true; >>>>> + >>>>> + return false; >>>>> +} >>>>> + >>>>> +/* Check if the insn can be expressed as ldm/stm with less cost. */ >>>>> +bool >>>>> +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, >>>>> + rtx off1, rtx off2, bool ldrd) >>>>> +{ >>>>> + HOST_WIDE_INT offset1 = 0; >>>>> + HOST_WIDE_INT offset2 = 0; >>>>> + >>>>> + if (off1 != NULL) >>>>> + offset1 = INTVAL (off1); >>>>> + if (off2 != NULL) >>>>> + offset2 = INTVAL (off2); >>>>> + >>>>> + if (offset1 > offset2) >>>>> + { >>>>> + rtx tmp; >>>>> + HOST_WIDE_INT t = offset1; >>>>> + offset1 = offset2; >>>>> + offset2 = t; >>>>> + tmp = reg1; >>>>> + reg1 = reg2; >>>>> + reg2 = tmp; >>>>> + } >>>>> + >>>>> + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ >>>>> + if ((offset1 != -8) && (offset1 != 0)) >>>>> + return false; >>>>> + >>>>> + /* Lower register corresponds to lower memory. */ >>>>> + if (REGNO (reg1) > REGNO (reg2)) >>>>> + return false; >>>>> + >>>>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>>>> + cost. */ >>>>> + return false; >>>>> +} >>>>> + >>>>> #include "gt-arm.h" >>>>> >>>>> Index: arm-protos.h >>>>> =================================================================== >>>>> --- arm-protos.h (revision 163853) >>>>> +++ arm-protos.h (working copy) >>>>> @@ -149,7 +149,9 @@ extern void arm_expand_sync (enum machin >>>>> extern const char *arm_output_memory_barrier (rtx *); >>>>> extern const char *arm_output_sync_insn (rtx, rtx *); >>>>> extern unsigned int arm_sync_loop_insns (rtx , rtx *); >>>>> - >>>>> +extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool); >>>>> +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); >>>>> +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); >>>>> extern bool arm_output_addr_const_extra (FILE *, rtx); >>>>> >>>>> #if defined TREE_CODE >>>>> Index: ldmstm.md >>>>> =================================================================== >>>>> --- ldmstm.md (revision 163853) >>>>> +++ ldmstm.md (working copy) >>>>> @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" >>>>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>>>> (mem:SI (plus:SI (match_dup 3) >>>>> (const_int 4))))])] >>>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>>> "ldm%(ia%)\t%3, {%1, %2}" >>>>> [(set_attr "type" "load2") >>>>> (set_attr "predicable" "yes")]) >>>>> @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" >>>>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>>>> (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) >>>>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>>> "stm%(ia%)\t%3, {%1, %2}" >>>>> [(set_attr "type" "store2") >>>>> (set_attr "predicable" "yes")]) >>>>> @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" >>>>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>>>> (mem:SI (plus:SI (match_dup 3) >>>>> (const_int -4))))])] >>>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>>> "ldm%(db%)\t%3, {%1, %2}" >>>>> [(set_attr "type" "load2") >>>>> (set_attr "predicable" "yes")]) >>>>> @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" >>>>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>>>> (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) >>>>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>>> "stm%(db%)\t%3, {%1, %2}" >>>>> [(set_attr "type" "store2") >>>>> (set_attr "predicable" "yes")]) >>>>> Index: constraints.md >>>>> =================================================================== >>>>> --- constraints.md (revision 163853) >>>>> +++ constraints.md (working copy) >>>>> @@ -31,7 +31,7 @@ >>>>> ;; The following multi-letter normal constraints have been used: >>>>> ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz >>>>> ;; in Thumb-1 state: Pa, Pb, Pc, Pd >>>>> -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px >>>>> +;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py >>>>> >>>>> ;; The following memory constraints have been used: >>>>> ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us >>>>> @@ -189,6 +189,13 @@ (define_constraint "Px" >>>>> (and (match_code "const_int") >>>>> (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) >>>>> >>>>> +(define_constraint "Py" >>>>> + "@internal In Thumb-2 state a constant that is a multiple of 4 in the >>>>> + range -1020 to 1024" >>>>> + (and (match_code "const_int") >>>>> + (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024 >>>>> + && (ival & 3) == 0"))) >>>>> + >>>>> (define_constraint "G" >>>>> "In ARM/Thumb-2 state a valid FPA immediate constant." >>>>> (and (match_code "const_double") >>>>> >>>>> >>>>> Index: pr40457-1.c >>>>> =================================================================== >>>>> --- pr40457-1.c (revision 163853) >>>>> +++ pr40457-1.c (working copy) >>>>> @@ -1,9 +1,9 @@ >>>>> -/* { dg-options "-Os" } */ >>>>> +/* { dg-options "-O2" } */ >>>>> /* { dg-do compile } */ >>>>> >>>>> int bar(int* p) >>>>> { >>>>> - int x = p[0] + p[1]; >>>>> + int x = p[0] + p[1] + p[2]; >>>>> return x; >>>>> } >>>>> >>>>> Index: pr40457-2.c >>>>> =================================================================== >>>>> --- pr40457-2.c (revision 163853) >>>>> +++ pr40457-2.c (working copy) >>>>> @@ -5,6 +5,7 @@ void foo(int* p) >>>>> { >>>>> p[0] = 1; >>>>> p[1] = 0; >>>>> + p[2] = 2; >>>>> } >>>>> >>>>> /* { dg-final { scan-assembler "stm" } } */ >>>>> Index: pr45335.c >>>>> =================================================================== >>>>> --- pr45335.c (revision 0) >>>>> +++ pr45335.c (revision 0) >>>>> @@ -0,0 +1,22 @@ >>>>> +/* { dg-options "-mthumb -O2" } */ >>>>> +/* { dg-require-effective-target arm_thumb2_ok } */ >>>>> +/* { dg-final { scan-assembler "ldrd" } } */ >>>>> +/* { dg-final { scan-assembler "strd" } } */ >>>>> + >>>>> +struct S >>>>> +{ >>>>> + void* p1; >>>>> + void* p2; >>>>> + void* p3; >>>>> + void* p4; >>>>> +}; >>>>> + >>>>> +extern printf(char*, ...); >>>>> + >>>>> +void foo1(struct S* fp, struct S* otherSaveArea) >>>>> +{ >>>>> + struct S* saveA = fp - 1; >>>>> + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); >>>>> + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", >>>>> + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); >>>>> +} >>>>> >>>> >>> >> > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-09-04 13:15 ` Carrot Wei 2010-09-13 14:54 ` Carrot Wei 2010-09-19 9:10 ` [PING][PATCH: " Carrot Wei @ 2010-10-13 11:28 ` Paul Brook 2010-10-16 14:36 ` Carrot Wei 2 siblings, 1 reply; 46+ messages in thread From: Paul Brook @ 2010-10-13 11:28 UTC (permalink / raw) To: gcc-patches; +Cc: Carrot Wei, Richard Earnshaw, ramana.radhakrishnan > ChangeLog: > 2010-09-04 Wei Guozhi <carrot@google.com> > > PR target/45335 > * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, > thumb2_ldrd_reg2 and peephole2): New insn pattern and related > peephole2. > (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): > New insn pattern and related peephole2. > * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. > (thumb2_check_ldrd_operands): New function. > (thumb2_prefer_ldmstm): New function. > * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New > prototype. (thumb2_check_ldrd_operands): New prototype. > (thumb2_prefer_ldmstm): New prototype. > * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): > Change the ldm/stm patterns with 2 words to ARM only. > * gcc/config/arm/constraints.md (Py): New thumb2 constant > constraint suitable to ldrd/strd instructions. Not ok. Why is this restricted to Thumb mode? The ARM variant of ldrd isn't quite as flexible, but still provides a useful improvement over ldm. This transformation is only valid on ARMv7 cores. On earlier hardware (depending on system configuration) it may cause undefined behavior or an alignment trap. The range on -1020 to +1024 is used in several places, but without any apparent explanation of why it's different to the range of an ldrd instruction. I figured it out eventually, but it deserves a comment. > + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], > + operands[2], 0, operands[3], 1)" Passed operands do not match expected types. Specifically "0" is not an rtx (should be "NULL_RTX"), and "1" is not a boolean value (should be "true"). Many other occurrences. > +(define_constraint "Py" > + "@internal In Thumb-2 state a constant that is a multiple of 4 in the > + range -1020 to 1024" This comment seems particularly pointless. You should mention why this exists/where it is used. I think you're better off enforcing this in the insn condition, and remove this constraint. At least half the uses (the -reg[12] insns) are incorrect, and you already need the condition to enforce the dependency between the operands. > +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, >... > + if (ldrd && (reg1 == reg2)) > + return false; This function is part of the instruction condition. Instruction conditions must not be used to enforce register allocation. > +thumb2_legitimate_ldrd_p ( >... > + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) > + return false; You're incorrectly assuming offset1 < offset2, which might not be true at this point. > + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower > + cost. */ > + return false; Code clearly doesn't match the comment. In fact this function always returns false. Paul ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-10-13 11:28 ` [PATCH: " Paul Brook @ 2010-10-16 14:36 ` Carrot Wei 2010-10-24 16:59 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-10-16 14:36 UTC (permalink / raw) To: Paul Brook; +Cc: gcc-patches, Richard Earnshaw, ramana.radhakrishnan On Wed, Oct 13, 2010 at 7:01 PM, Paul Brook <paul@codesourcery.com> wrote: >> ChangeLog: >> 2010-09-04 Wei Guozhi <carrot@google.com> >> >> PR target/45335 >> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >> peephole2. >> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >> New insn pattern and related peephole2. >> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >> (thumb2_check_ldrd_operands): New function. >> (thumb2_prefer_ldmstm): New function. >> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New >> prototype. (thumb2_check_ldrd_operands): New prototype. >> (thumb2_prefer_ldmstm): New prototype. >> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >> Change the ldm/stm patterns with 2 words to ARM only. >> * gcc/config/arm/constraints.md (Py): New thumb2 constant >> constraint suitable to ldrd/strd instructions. > > Not ok. > > Why is this restricted to Thumb mode? The ARM variant of ldrd isn't quite as > flexible, but still provides a useful improvement over ldm. > I agree the ARM version is also useful. But it brings much less benefit with too much complexity (due to more restriction and insn pattern conflict with ldm). So I will leave it as a future improvement. > This transformation is only valid on ARMv7 cores. On earlier hardware > (depending on system configuration) it may cause undefined behavior or an > alignment trap. > done. > The range on -1020 to +1024 is used in several places, but without any > apparent explanation of why it's different to the range of an ldrd > instruction. I figured it out eventually, but it deserves a comment. > Comments added. >> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >> + operands[2], 0, operands[3], 1)" > > Passed operands do not match expected types. Specifically "0" is not an rtx > (should be "NULL_RTX"), and "1" is not a boolean value (should be "true"). > Many other occurrences. > Fixed. >> +(define_constraint "Py" >> + "@internal In Thumb-2 state a constant that is a multiple of 4 in the >> + range -1020 to 1024" > > This comment seems particularly pointless. You should mention why this > exists/where it is used. > > I think you're better off enforcing this in the insn condition, and remove > this constraint. At least half the uses (the -reg[12] insns) are incorrect, > and you already need the condition to enforce the dependency between the > operands. > I removed this constraint and add the check to insn condition. >> +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, >>... >> + if (ldrd && (reg1 == reg2)) >> + return false; > > This function is part of the instruction condition. Instruction conditions > must not be used to enforce register allocation. > removed. >> +thumb2_legitimate_ldrd_p ( >>... >> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >> + return false; > > You're incorrectly assuming offset1 < offset2, which might not be true at this > point. > The following check assumes offset1 < offset2 + if ((offset1 + 4) == offset2) + return true; And another check assumes offset2 < offset1, so both cases are covered. + if ((offset2 + 4) == offset1) + return true; >> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >> + cost. */ >> + return false; > > Code clearly doesn't match the comment. In fact this function always returns > false. > Richard mentioned that in some cases (specifically cortex A9) ldm has less cost than ldrd and we should model this in the insn pattern. This function is used for this. But I don't know the cortex A9 architecture detail, so it should be filled by somebody with more knowledge about it in future. Wei Guozhi ChangeLog: 2010-10-16 Wei Guozhi <carrot@google.com> PR target/45335 * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, thumb2_ldrd_reg2 and peephole2): New insn pattern and related peephole2. (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): New insn pattern and related peephole2. * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. (thumb2_check_ldrd_operands): New function. (thumb2_prefer_ldmstm): New function. * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. (thumb2_check_ldrd_operands): New prototype. (thumb2_prefer_ldmstm): New prototype. * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): Change the ldm/stm patterns with 2 words to ARM only. 2010-10-16 Wei Guozhi <carrot@google.com> PR target/45335 * gcc.target/arm/pr45335.c: New test. * gcc.target/arm/pr40457-1.c: Changed to load 3 words. * gcc.target/arm/pr40457-2.c: Changed to store 3 words. * gcc.target/arm/pr40457-3.c: Changed to store 3 words. Index: thumb2.md =================================================================== --- thumb2.md (revision 165492) +++ thumb2.md (working copy) @@ -1118,3 +1118,228 @@ (define_peephole2 " operands[2] = GEN_INT (32 - INTVAL (operands[2])); ") + +(define_insn "*thumb2_ldrd" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "")))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" ""))))])] + "TARGET_THUMB2 && arm_arch7 + && thumb2_check_ldrd_operands (operands[3], operands[4])" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + HOST_WIDE_INT offset2 = INTVAL (operands[4]); + if (offset1 > offset2) + { + /* Swap the operands so that memory [base+offset1] is loaded into + operands[0]. */ + rtx tmp = operands[0]; + operands[0] = operands[1]; + operands[1] = tmp; + tmp = operands[3]; + operands[3] = operands[4]; + operands[4] = tmp; + offset1 = INTVAL (operands[3]); + offset2 = INTVAL (operands[4]); + } + if (thumb2_prefer_ldmstm (operands[0], operands[1], + operands[2], operands[3], operands[4], true)) + return \"ldmdb\\t%2, {%0, %1}\"; + else if (fix_cm3_ldrd && (operands[2] == operands[0])) + { + if (offset1 <= -256) + { + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); + output_asm_insn (\"ldr\\t%0, [%2]\", operands); + } + else + { + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); + } + return \"\"; + } + else + return \"ldrd\\t%0, %1, [%2, %3]\"; + }" +) + +(define_insn "*thumb2_ldrd_reg1" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" ""))))])] + "TARGET_THUMB2 && arm_arch7 + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" + "* + { + HOST_WIDE_INT offset2 = INTVAL (operands[3]); + if (offset2 == 4) + { + if (thumb2_prefer_ldmstm (operands[0], operands[1], + operands[2], NULL_RTX, operands[3], true)) + return \"ldmia\\t%2, {%0, %1}\"; + if (fix_cm3_ldrd && (operands[2] == operands[0])) + { + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); + output_asm_insn (\"ldr\\t%0, [%2]\", operands); + return \"\"; + } + return \"ldrd\\t%0, %1, [%2]\"; + } + else + { + if (fix_cm3_ldrd && (operands[2] == operands[1])) + { + output_asm_insn (\"ldr\\t%0, [%2]\", operands); + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); + } + return \"ldrd\\t%1, %0, [%2, %3]\"; + } + }" +) + +(define_insn "*thumb2_ldrd_reg2" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "")))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (match_dup 2)))])] + "TARGET_THUMB2 && arm_arch7 + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + if (offset1 == -4) + { + if (fix_cm3_ldrd && (operands[2] == operands[0])) + { + output_asm_insn (\"ldr\\t%1, [%2]\", operands); + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); + return \"\"; + } + return \"ldrd\\t%0, %1, [%2, %3]\"; + } + else + { + if (thumb2_prefer_ldmstm (operands[0], operands[1], + operands[2], operands[3], NULL_RTX, true)) + return \"ldmia\\t%2, {%1, %0}\"; + if (fix_cm3_ldrd && (operands[2] == operands[1])) + { + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); + output_asm_insn (\"ldr\\t%1, [%2]\", operands); + return \"\"; + } + return \"ldrd\\t%1, %0, [%2]\"; + } + }" +) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "TARGET_THUMB2 && arm_arch7 + && thumb2_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], true)" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))])] + "" +) + +(define_insn "*thumb2_strd" + [(parallel [(set (mem:SI + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" ""))) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && arm_arch7 + && thumb2_check_ldrd_operands (operands[3], operands[4])" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + HOST_WIDE_INT offset2 = INTVAL (operands[4]); + if (thumb2_prefer_ldmstm (operands[0], operands[1], + operands[2], operands[3], operands[4], false)) + return \"stmdb\\t%2, {%0, %1}\"; + if (offset1 < offset2) + return \"strd\\t%0, %1, [%2, %3]\"; + else + return \"strd\\t%1, %0, [%2, %4]\"; + }" +) + +(define_insn "*thumb2_strd_reg1" + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && arm_arch7 + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" + "* + { + HOST_WIDE_INT offset2 = INTVAL (operands[3]); + if (offset2 == 4) + { + if (thumb2_prefer_ldmstm (operands[0], operands[1], + operands[2], NULL_RTX, operands[3], false)) + return \"stmia\\t%2, {%0, %1}\"; + return \"strd\\t%0, %1, [%2]\"; + } + else + return \"strd\\t%1, %0, [%2, %3]\"; + }" +) + +(define_insn "*thumb2_strd_reg2" + [(parallel [(set (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (match_dup 2)) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && arm_arch7 + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + if (offset1 == -4) + return \"strd\\t%0, %1, [%2, %3]\"; + else + { + if (thumb2_prefer_ldmstm (operands[0], operands[1], + operands[2], operands[3], NULL_RTX, false)) + return \"stmia\\t%2, {%1, %0}\"; + return \"strd\\t%1, %0, [%2]\"; + } + }" +) + +(define_peephole2 + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))] + "TARGET_THUMB2 && arm_arch7 + && thumb2_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], false)" + [(parallel [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))])] + "" +) Index: arm.c =================================================================== --- arm.c (revision 165492) +++ arm.c (working copy) @@ -23254,4 +23254,134 @@ arm_builtin_support_vector_misalignment is_packed); } +/* Check the validity of operands in an ldrd/strd instruction. */ +bool +thumb2_check_ldrd_operands (rtx off1, rtx off2) +{ + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (off1 != NULL_RTX) + offset1 = INTVAL (off1); + if (off2 != NULL_RTX) + offset2 = INTVAL (off2); + + /* The offset range of LDRD is [-1020, 1020]. Here we check if both + offsets lie in the range [-1020, 1024]. If one of the offsets is + 1024, the following condition ((offset1 + 4) == offset2) will ensure + offset1 to be 1020, suitable for instruction LDRD. */ + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) + return false; + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) + return false; + + if ((offset1 + 4) == offset2) + return true; + if ((offset2 + 4) == offset1) + return true; + + return false; +} + +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. + That is they use the same base register, and the gap between constant + offsets should be 4. */ +bool +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) +{ + rtx base1, base2, op1; + rtx addr1 = XEXP (mem1, 0); + rtx addr2 = XEXP (mem2, 0); + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) + return false; + + if (REG_P (addr1)) + base1 = addr1; + else if (GET_CODE (addr1) == PLUS) + { + base1 = XEXP (addr1, 0); + op1 = XEXP (addr1, 1); + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) + return false; + offset1 = INTVAL (op1); + } + else + return false; + + if (REG_P (addr2)) + base2 = addr2; + else if (GET_CODE (addr2) == PLUS) + { + base2 = XEXP (addr2, 0); + op1 = XEXP (addr2, 1); + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) + return false; + offset2 = INTVAL (op1); + } + else + return false; + + if (base1 != base2) + return false; + + /* The offset range of LDRD is [-1020, 1020]. Here we check if both + offsets lie in the range [-1020, 1024]. If one of the offsets is + 1024, the following condition ((offset1 + 4) == offset2) will ensure + offset1 to be 1020, suitable for instruction LDRD. */ + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) + return false; + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) + return false; + + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) + return false; + + if ((offset1 + 4) == offset2) + return true; + if ((offset2 + 4) == offset1) + return true; + + return false; +} + +/* Check if the insn can be expressed as ldm/stm with less cost. */ +bool +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, + rtx off1, rtx off2, bool ldrd) +{ + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (off1 != NULL_RTX) + offset1 = INTVAL (off1); + if (off2 != NULL_RTX) + offset2 = INTVAL (off2); + + if (offset1 > offset2) + { + rtx tmp; + HOST_WIDE_INT t = offset1; + offset1 = offset2; + offset2 = t; + tmp = reg1; + reg1 = reg2; + reg2 = tmp; + } + + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ + if ((offset1 != -8) && (offset1 != 0)) + return false; + + /* Lower register corresponds to lower memory. */ + if (REGNO (reg1) > REGNO (reg2)) + return false; + + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower + cost. */ + return false; +} + #include "gt-arm.h" Index: arm-protos.h =================================================================== --- arm-protos.h (revision 165492) +++ arm-protos.h (working copy) @@ -150,6 +150,9 @@ extern void arm_expand_sync (enum machin extern const char *arm_output_memory_barrier (rtx *); extern const char *arm_output_sync_insn (rtx, rtx *); extern unsigned int arm_sync_loop_insns (rtx , rtx *); +extern bool thumb2_check_ldrd_operands (rtx, rtx); +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); #if defined TREE_CODE extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); Index: ldmstm.md =================================================================== --- ldmstm.md (revision 165492) +++ ldmstm.md (working copy) @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int 4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "ldm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "stm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int -4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "ldm%(db%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" "stm%(db%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) Index: pr40457-3.c =================================================================== --- pr40457-3.c (revision 165492) +++ pr40457-3.c (working copy) @@ -5,6 +5,7 @@ void foo(int* p) { p[0] = 1; p[1] = 0; + p[2] = 2; } /* { dg-final { scan-assembler "stm" } } */ Index: pr40457-1.c =================================================================== --- pr40457-1.c (revision 165492) +++ pr40457-1.c (working copy) @@ -1,9 +1,9 @@ -/* { dg-options "-Os" } */ +/* { dg-options "-O2" } */ /* { dg-do compile } */ int bar(int* p) { - int x = p[0] + p[1]; + int x = p[0] + p[1] + p[2]; return x; } Index: pr40457-2.c =================================================================== --- pr40457-2.c (revision 165492) +++ pr40457-2.c (working copy) @@ -5,6 +5,7 @@ void foo(int* p) { p[0] = 1; p[1] = 0; + p[2] = 2; } /* { dg-final { scan-assembler "stm" } } */ Index: pr45335.c =================================================================== --- pr45335.c (revision 0) +++ pr45335.c (revision 0) @@ -0,0 +1,22 @@ +/* { dg-options "-mthumb -O2" } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-final { scan-assembler "ldrd" } } */ +/* { dg-final { scan-assembler "strd" } } */ + +struct S +{ + void* p1; + void* p2; + void* p3; + void* p4; +}; + +extern printf(char*, ...); + +void foo1(struct S* fp, struct S* otherSaveArea) +{ + struct S* saveA = fp - 1; + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); +} ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-10-16 14:36 ` Carrot Wei @ 2010-10-24 16:59 ` Carrot Wei 2010-10-31 17:55 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-10-24 16:59 UTC (permalink / raw) To: Paul Brook, Richard Earnshaw, Nick Clifton Cc: gcc-patches, ramana.radhakrishnan Ping On Sat, Oct 16, 2010 at 8:27 PM, Carrot Wei <carrot@google.com> wrote: > On Wed, Oct 13, 2010 at 7:01 PM, Paul Brook <paul@codesourcery.com> wrote: >>> ChangeLog: >>> 2010-09-04 Wei Guozhi <carrot@google.com> >>> >>> PR target/45335 >>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>> peephole2. >>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>> New insn pattern and related peephole2. >>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>> (thumb2_check_ldrd_operands): New function. >>> (thumb2_prefer_ldmstm): New function. >>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New >>> prototype. (thumb2_check_ldrd_operands): New prototype. >>> (thumb2_prefer_ldmstm): New prototype. >>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>> Change the ldm/stm patterns with 2 words to ARM only. >>> * gcc/config/arm/constraints.md (Py): New thumb2 constant >>> constraint suitable to ldrd/strd instructions. >> >> Not ok. >> >> Why is this restricted to Thumb mode? The ARM variant of ldrd isn't quite as >> flexible, but still provides a useful improvement over ldm. >> > I agree the ARM version is also useful. But it brings much less > benefit with too much complexity (due to more restriction and insn > pattern conflict with ldm). So I will leave it as a future > improvement. > >> This transformation is only valid on ARMv7 cores. On earlier hardware >> (depending on system configuration) it may cause undefined behavior or an >> alignment trap. >> > done. > >> The range on -1020 to +1024 is used in several places, but without any >> apparent explanation of why it's different to the range of an ldrd >> instruction. I figured it out eventually, but it deserves a comment. >> > Comments added. > >>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>> + operands[2], 0, operands[3], 1)" >> >> Passed operands do not match expected types. Specifically "0" is not an rtx >> (should be "NULL_RTX"), and "1" is not a boolean value (should be "true"). >> Many other occurrences. >> > Fixed. > >>> +(define_constraint "Py" >>> + "@internal In Thumb-2 state a constant that is a multiple of 4 in the >>> + range -1020 to 1024" >> >> This comment seems particularly pointless. You should mention why this >> exists/where it is used. >> >> I think you're better off enforcing this in the insn condition, and remove >> this constraint. At least half the uses (the -reg[12] insns) are incorrect, >> and you already need the condition to enforce the dependency between the >> operands. >> > I removed this constraint and add the check to insn condition. > >>> +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, >>>... >>> + if (ldrd && (reg1 == reg2)) >>> + return false; >> >> This function is part of the instruction condition. Instruction conditions >> must not be used to enforce register allocation. >> > removed. > >>> +thumb2_legitimate_ldrd_p ( >>>... >>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>> + return false; >> >> You're incorrectly assuming offset1 < offset2, which might not be true at this >> point. >> > The following check assumes offset1 < offset2 > + if ((offset1 + 4) == offset2) > + return true; > > And another check assumes offset2 < offset1, so both cases are covered. > + if ((offset2 + 4) == offset1) > + return true; > >>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>> + cost. */ >>> + return false; >> >> Code clearly doesn't match the comment. In fact this function always returns >> false. >> > Richard mentioned that in some cases (specifically cortex A9) ldm has > less cost than ldrd and we should model this in the insn pattern. This > function is used for this. But I don't know the cortex A9 architecture > detail, so it should be filled by somebody with more knowledge about > it in future. > > Wei Guozhi > > > ChangeLog: > 2010-10-16 Wei Guozhi <carrot@google.com> > > PR target/45335 > * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, > thumb2_ldrd_reg2 and peephole2): New insn pattern and related > peephole2. > (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): > New insn pattern and related peephole2. > * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. > (thumb2_check_ldrd_operands): New function. > (thumb2_prefer_ldmstm): New function. > * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. > (thumb2_check_ldrd_operands): New prototype. > (thumb2_prefer_ldmstm): New prototype. > * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): > Change the ldm/stm patterns with 2 words to ARM only. > > > 2010-10-16 Wei Guozhi <carrot@google.com> > > PR target/45335 > * gcc.target/arm/pr45335.c: New test. > * gcc.target/arm/pr40457-1.c: Changed to load 3 words. > * gcc.target/arm/pr40457-2.c: Changed to store 3 words. > * gcc.target/arm/pr40457-3.c: Changed to store 3 words. > > > Index: thumb2.md > =================================================================== > --- thumb2.md (revision 165492) > +++ thumb2.md (working copy) > @@ -1118,3 +1118,228 @@ (define_peephole2 > " > operands[2] = GEN_INT (32 - INTVAL (operands[2])); > ") > + > +(define_insn "*thumb2_ldrd" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (mem:SI (plus:SI > + (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "")))) > + (set (match_operand:SI 1 "s_register_operand" "") > + (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 4 "const_int_operand" ""))))])] > + "TARGET_THUMB2 && arm_arch7 > + && thumb2_check_ldrd_operands (operands[3], operands[4])" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + HOST_WIDE_INT offset2 = INTVAL (operands[4]); > + if (offset1 > offset2) > + { > + /* Swap the operands so that memory [base+offset1] is loaded into > + operands[0]. */ > + rtx tmp = operands[0]; > + operands[0] = operands[1]; > + operands[1] = tmp; > + tmp = operands[3]; > + operands[3] = operands[4]; > + operands[4] = tmp; > + offset1 = INTVAL (operands[3]); > + offset2 = INTVAL (operands[4]); > + } > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], operands[3], operands[4], true)) > + return \"ldmdb\\t%2, {%0, %1}\"; > + else if (fix_cm3_ldrd && (operands[2] == operands[0])) > + { > + if (offset1 <= -256) > + { > + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); > + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); > + output_asm_insn (\"ldr\\t%0, [%2]\", operands); > + } > + else > + { > + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); > + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); > + } > + return \"\"; > + } > + else > + return \"ldrd\\t%0, %1, [%2, %3]\"; > + }" > +) > + > +(define_insn "*thumb2_ldrd_reg1" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) > + (set (match_operand:SI 1 "s_register_operand" "") > + (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 3 "const_int_operand" ""))))])] > + "TARGET_THUMB2 && arm_arch7 > + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" > + "* > + { > + HOST_WIDE_INT offset2 = INTVAL (operands[3]); > + if (offset2 == 4) > + { > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], NULL_RTX, operands[3], true)) > + return \"ldmia\\t%2, {%0, %1}\"; > + if (fix_cm3_ldrd && (operands[2] == operands[0])) > + { > + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); > + output_asm_insn (\"ldr\\t%0, [%2]\", operands); > + return \"\"; > + } > + return \"ldrd\\t%0, %1, [%2]\"; > + } > + else > + { > + if (fix_cm3_ldrd && (operands[2] == operands[1])) > + { > + output_asm_insn (\"ldr\\t%0, [%2]\", operands); > + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); > + } > + return \"ldrd\\t%1, %0, [%2, %3]\"; > + } > + }" > +) > + > +(define_insn "*thumb2_ldrd_reg2" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (mem:SI (plus:SI > + (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" "")))) > + (set (match_operand:SI 1 "s_register_operand" "") > + (mem:SI (match_dup 2)))])] > + "TARGET_THUMB2 && arm_arch7 > + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + if (offset1 == -4) > + { > + if (fix_cm3_ldrd && (operands[2] == operands[0])) > + { > + output_asm_insn (\"ldr\\t%1, [%2]\", operands); > + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); > + return \"\"; > + } > + return \"ldrd\\t%0, %1, [%2, %3]\"; > + } > + else > + { > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], operands[3], NULL_RTX, true)) > + return \"ldmia\\t%2, {%1, %0}\"; > + if (fix_cm3_ldrd && (operands[2] == operands[1])) > + { > + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); > + output_asm_insn (\"ldr\\t%1, [%2]\", operands); > + return \"\"; > + } > + return \"ldrd\\t%1, %0, [%2]\"; > + } > + }" > +) > + > +(define_peephole2 > + [(set (match_operand:SI 0 "s_register_operand" "") > + (match_operand:SI 2 "memory_operand" "")) > + (set (match_operand:SI 1 "s_register_operand" "") > + (match_operand:SI 3 "memory_operand" ""))] > + "TARGET_THUMB2 && arm_arch7 > + && thumb2_legitimate_ldrd_p (operands[0], operands[1], > + operands[2], operands[3], true)" > + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") > + (match_operand:SI 2 "memory_operand" "")) > + (set (match_operand:SI 1 "s_register_operand" "") > + (match_operand:SI 3 "memory_operand" ""))])] > + "" > +) > + > +(define_insn "*thumb2_strd" > + [(parallel [(set (mem:SI > + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" ""))) > + (match_operand:SI 0 "s_register_operand" "")) > + (set (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 4 "const_int_operand" ""))) > + (match_operand:SI 1 "s_register_operand" ""))])] > + "TARGET_THUMB2 && arm_arch7 > + && thumb2_check_ldrd_operands (operands[3], operands[4])" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + HOST_WIDE_INT offset2 = INTVAL (operands[4]); > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], operands[3], operands[4], false)) > + return \"stmdb\\t%2, {%0, %1}\"; > + if (offset1 < offset2) > + return \"strd\\t%0, %1, [%2, %3]\"; > + else > + return \"strd\\t%1, %0, [%2, %4]\"; > + }" > +) > + > +(define_insn "*thumb2_strd_reg1" > + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) > + (match_operand:SI 0 "s_register_operand" "")) > + (set (mem:SI (plus:SI (match_dup 2) > + (match_operand:SI 3 "const_int_operand" ""))) > + (match_operand:SI 1 "s_register_operand" ""))])] > + "TARGET_THUMB2 && arm_arch7 > + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" > + "* > + { > + HOST_WIDE_INT offset2 = INTVAL (operands[3]); > + if (offset2 == 4) > + { > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], NULL_RTX, operands[3], false)) > + return \"stmia\\t%2, {%0, %1}\"; > + return \"strd\\t%0, %1, [%2]\"; > + } > + else > + return \"strd\\t%1, %0, [%2, %3]\"; > + }" > +) > + > +(define_insn "*thumb2_strd_reg2" > + [(parallel [(set (mem:SI (plus:SI > + (match_operand:SI 2 "s_register_operand" "rk") > + (match_operand:SI 3 "const_int_operand" ""))) > + (match_operand:SI 0 "s_register_operand" "")) > + (set (mem:SI (match_dup 2)) > + (match_operand:SI 1 "s_register_operand" ""))])] > + "TARGET_THUMB2 && arm_arch7 > + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" > + "* > + { > + HOST_WIDE_INT offset1 = INTVAL (operands[3]); > + if (offset1 == -4) > + return \"strd\\t%0, %1, [%2, %3]\"; > + else > + { > + if (thumb2_prefer_ldmstm (operands[0], operands[1], > + operands[2], operands[3], NULL_RTX, false)) > + return \"stmia\\t%2, {%1, %0}\"; > + return \"strd\\t%1, %0, [%2]\"; > + } > + }" > +) > + > +(define_peephole2 > + [(set (match_operand:SI 2 "memory_operand" "") > + (match_operand:SI 0 "s_register_operand" "")) > + (set (match_operand:SI 3 "memory_operand" "") > + (match_operand:SI 1 "s_register_operand" ""))] > + "TARGET_THUMB2 && arm_arch7 > + && thumb2_legitimate_ldrd_p (operands[0], operands[1], > + operands[2], operands[3], false)" > + [(parallel [(set (match_operand:SI 2 "memory_operand" "") > + (match_operand:SI 0 "s_register_operand" "")) > + (set (match_operand:SI 3 "memory_operand" "") > + (match_operand:SI 1 "s_register_operand" ""))])] > + "" > +) > Index: arm.c > =================================================================== > --- arm.c (revision 165492) > +++ arm.c (working copy) > @@ -23254,4 +23254,134 @@ arm_builtin_support_vector_misalignment > is_packed); > } > > +/* Check the validity of operands in an ldrd/strd instruction. */ > +bool > +thumb2_check_ldrd_operands (rtx off1, rtx off2) > +{ > + HOST_WIDE_INT offset1 = 0; > + HOST_WIDE_INT offset2 = 0; > + > + if (off1 != NULL_RTX) > + offset1 = INTVAL (off1); > + if (off2 != NULL_RTX) > + offset2 = INTVAL (off2); > + > + /* The offset range of LDRD is [-1020, 1020]. Here we check if both > + offsets lie in the range [-1020, 1024]. If one of the offsets is > + 1024, the following condition ((offset1 + 4) == offset2) will ensure > + offset1 to be 1020, suitable for instruction LDRD. */ > + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) > + return false; > + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) > + return false; > + > + if ((offset1 + 4) == offset2) > + return true; > + if ((offset2 + 4) == offset1) > + return true; > + > + return false; > +} > + > +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. > + That is they use the same base register, and the gap between constant > + offsets should be 4. */ > +bool > +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) > +{ > + rtx base1, base2, op1; > + rtx addr1 = XEXP (mem1, 0); > + rtx addr2 = XEXP (mem2, 0); > + HOST_WIDE_INT offset1 = 0; > + HOST_WIDE_INT offset2 = 0; > + > + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) > + return false; > + > + if (REG_P (addr1)) > + base1 = addr1; > + else if (GET_CODE (addr1) == PLUS) > + { > + base1 = XEXP (addr1, 0); > + op1 = XEXP (addr1, 1); > + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) > + return false; > + offset1 = INTVAL (op1); > + } > + else > + return false; > + > + if (REG_P (addr2)) > + base2 = addr2; > + else if (GET_CODE (addr2) == PLUS) > + { > + base2 = XEXP (addr2, 0); > + op1 = XEXP (addr2, 1); > + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) > + return false; > + offset2 = INTVAL (op1); > + } > + else > + return false; > + > + if (base1 != base2) > + return false; > + > + /* The offset range of LDRD is [-1020, 1020]. Here we check if both > + offsets lie in the range [-1020, 1024]. If one of the offsets is > + 1024, the following condition ((offset1 + 4) == offset2) will ensure > + offset1 to be 1020, suitable for instruction LDRD. */ > + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) > + return false; > + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) > + return false; > + > + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) > + return false; > + > + if ((offset1 + 4) == offset2) > + return true; > + if ((offset2 + 4) == offset1) > + return true; > + > + return false; > +} > + > +/* Check if the insn can be expressed as ldm/stm with less cost. */ > +bool > +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, > + rtx off1, rtx off2, bool ldrd) > +{ > + HOST_WIDE_INT offset1 = 0; > + HOST_WIDE_INT offset2 = 0; > + > + if (off1 != NULL_RTX) > + offset1 = INTVAL (off1); > + if (off2 != NULL_RTX) > + offset2 = INTVAL (off2); > + > + if (offset1 > offset2) > + { > + rtx tmp; > + HOST_WIDE_INT t = offset1; > + offset1 = offset2; > + offset2 = t; > + tmp = reg1; > + reg1 = reg2; > + reg2 = tmp; > + } > + > + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ > + if ((offset1 != -8) && (offset1 != 0)) > + return false; > + > + /* Lower register corresponds to lower memory. */ > + if (REGNO (reg1) > REGNO (reg2)) > + return false; > + > + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower > + cost. */ > + return false; > +} > + > #include "gt-arm.h" > Index: arm-protos.h > =================================================================== > --- arm-protos.h (revision 165492) > +++ arm-protos.h (working copy) > @@ -150,6 +150,9 @@ extern void arm_expand_sync (enum machin > extern const char *arm_output_memory_barrier (rtx *); > extern const char *arm_output_sync_insn (rtx, rtx *); > extern unsigned int arm_sync_loop_insns (rtx , rtx *); > +extern bool thumb2_check_ldrd_operands (rtx, rtx); > +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); > +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); > > #if defined TREE_CODE > extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); > Index: ldmstm.md > =================================================================== > --- ldmstm.md (revision 165492) > +++ ldmstm.md (working copy) > @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" > (set (match_operand:SI 2 "arm_hard_register_operand" "") > (mem:SI (plus:SI (match_dup 3) > (const_int 4))))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "ldm%(ia%)\t%3, {%1, %2}" > [(set_attr "type" "load2") > (set_attr "predicable" "yes")]) > @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" > (match_operand:SI 1 "arm_hard_register_operand" "")) > (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) > (match_operand:SI 2 "arm_hard_register_operand" ""))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "stm%(ia%)\t%3, {%1, %2}" > [(set_attr "type" "store2") > (set_attr "predicable" "yes")]) > @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" > (set (match_operand:SI 2 "arm_hard_register_operand" "") > (mem:SI (plus:SI (match_dup 3) > (const_int -4))))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "ldm%(db%)\t%3, {%1, %2}" > [(set_attr "type" "load2") > (set_attr "predicable" "yes")]) > @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" > (match_operand:SI 1 "arm_hard_register_operand" "")) > (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) > (match_operand:SI 2 "arm_hard_register_operand" ""))])] > - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" > + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" > "stm%(db%)\t%3, {%1, %2}" > [(set_attr "type" "store2") > (set_attr "predicable" "yes")]) > > > Index: pr40457-3.c > =================================================================== > --- pr40457-3.c (revision 165492) > +++ pr40457-3.c (working copy) > @@ -5,6 +5,7 @@ void foo(int* p) > { > p[0] = 1; > p[1] = 0; > + p[2] = 2; > } > > /* { dg-final { scan-assembler "stm" } } */ > Index: pr40457-1.c > =================================================================== > --- pr40457-1.c (revision 165492) > +++ pr40457-1.c (working copy) > @@ -1,9 +1,9 @@ > -/* { dg-options "-Os" } */ > +/* { dg-options "-O2" } */ > /* { dg-do compile } */ > > int bar(int* p) > { > - int x = p[0] + p[1]; > + int x = p[0] + p[1] + p[2]; > return x; > } > > Index: pr40457-2.c > =================================================================== > --- pr40457-2.c (revision 165492) > +++ pr40457-2.c (working copy) > @@ -5,6 +5,7 @@ void foo(int* p) > { > p[0] = 1; > p[1] = 0; > + p[2] = 2; > } > > /* { dg-final { scan-assembler "stm" } } */ > Index: pr45335.c > =================================================================== > --- pr45335.c (revision 0) > +++ pr45335.c (revision 0) > @@ -0,0 +1,22 @@ > +/* { dg-options "-mthumb -O2" } */ > +/* { dg-require-effective-target arm_thumb2_ok } */ > +/* { dg-final { scan-assembler "ldrd" } } */ > +/* { dg-final { scan-assembler "strd" } } */ > + > +struct S > +{ > + void* p1; > + void* p2; > + void* p3; > + void* p4; > +}; > + > +extern printf(char*, ...); > + > +void foo1(struct S* fp, struct S* otherSaveArea) > +{ > + struct S* saveA = fp - 1; > + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); > + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", > + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); > +} > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-10-24 16:59 ` Carrot Wei @ 2010-10-31 17:55 ` Carrot Wei 2010-11-23 0:23 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-10-31 17:55 UTC (permalink / raw) To: Paul Brook, Richard Earnshaw, Nick Clifton Cc: gcc-patches, ramana.radhakrishnan Ping On Sun, Oct 24, 2010 at 9:46 PM, Carrot Wei <carrot@google.com> wrote: > Ping > > On Sat, Oct 16, 2010 at 8:27 PM, Carrot Wei <carrot@google.com> wrote: >> On Wed, Oct 13, 2010 at 7:01 PM, Paul Brook <paul@codesourcery.com> wrote: >>>> ChangeLog: >>>> 2010-09-04 Wei Guozhi <carrot@google.com> >>>> >>>> PR target/45335 >>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>>> peephole2. >>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>>> New insn pattern and related peephole2. >>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>>> (thumb2_check_ldrd_operands): New function. >>>> (thumb2_prefer_ldmstm): New function. >>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New >>>> prototype. (thumb2_check_ldrd_operands): New prototype. >>>> (thumb2_prefer_ldmstm): New prototype. >>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>>> Change the ldm/stm patterns with 2 words to ARM only. >>>> * gcc/config/arm/constraints.md (Py): New thumb2 constant >>>> constraint suitable to ldrd/strd instructions. >>> >>> Not ok. >>> >>> Why is this restricted to Thumb mode? The ARM variant of ldrd isn't quite as >>> flexible, but still provides a useful improvement over ldm. >>> >> I agree the ARM version is also useful. But it brings much less >> benefit with too much complexity (due to more restriction and insn >> pattern conflict with ldm). So I will leave it as a future >> improvement. >> >>> This transformation is only valid on ARMv7 cores. On earlier hardware >>> (depending on system configuration) it may cause undefined behavior or an >>> alignment trap. >>> >> done. >> >>> The range on -1020 to +1024 is used in several places, but without any >>> apparent explanation of why it's different to the range of an ldrd >>> instruction. I figured it out eventually, but it deserves a comment. >>> >> Comments added. >> >>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>> + operands[2], 0, operands[3], 1)" >>> >>> Passed operands do not match expected types. Specifically "0" is not an rtx >>> (should be "NULL_RTX"), and "1" is not a boolean value (should be "true"). >>> Many other occurrences. >>> >> Fixed. >> >>>> +(define_constraint "Py" >>>> + "@internal In Thumb-2 state a constant that is a multiple of 4 in the >>>> + range -1020 to 1024" >>> >>> This comment seems particularly pointless. You should mention why this >>> exists/where it is used. >>> >>> I think you're better off enforcing this in the insn condition, and remove >>> this constraint. At least half the uses (the -reg[12] insns) are incorrect, >>> and you already need the condition to enforce the dependency between the >>> operands. >>> >> I removed this constraint and add the check to insn condition. >> >>>> +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, >>>>... >>>> + if (ldrd && (reg1 == reg2)) >>>> + return false; >>> >>> This function is part of the instruction condition. Instruction conditions >>> must not be used to enforce register allocation. >>> >> removed. >> >>>> +thumb2_legitimate_ldrd_p ( >>>>... >>>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>>> + return false; >>> >>> You're incorrectly assuming offset1 < offset2, which might not be true at this >>> point. >>> >> The following check assumes offset1 < offset2 >> + if ((offset1 + 4) == offset2) >> + return true; >> >> And another check assumes offset2 < offset1, so both cases are covered. >> + if ((offset2 + 4) == offset1) >> + return true; >> >>>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>>> + cost. */ >>>> + return false; >>> >>> Code clearly doesn't match the comment. In fact this function always returns >>> false. >>> >> Richard mentioned that in some cases (specifically cortex A9) ldm has >> less cost than ldrd and we should model this in the insn pattern. This >> function is used for this. But I don't know the cortex A9 architecture >> detail, so it should be filled by somebody with more knowledge about >> it in future. >> >> Wei Guozhi >> >> >> ChangeLog: >> 2010-10-16 Wei Guozhi <carrot@google.com> >> >> PR target/45335 >> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >> peephole2. >> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >> New insn pattern and related peephole2. >> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >> (thumb2_check_ldrd_operands): New function. >> (thumb2_prefer_ldmstm): New function. >> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. >> (thumb2_check_ldrd_operands): New prototype. >> (thumb2_prefer_ldmstm): New prototype. >> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >> Change the ldm/stm patterns with 2 words to ARM only. >> >> >> 2010-10-16 Wei Guozhi <carrot@google.com> >> >> PR target/45335 >> * gcc.target/arm/pr45335.c: New test. >> * gcc.target/arm/pr40457-1.c: Changed to load 3 words. >> * gcc.target/arm/pr40457-2.c: Changed to store 3 words. >> * gcc.target/arm/pr40457-3.c: Changed to store 3 words. >> >> >> Index: thumb2.md >> =================================================================== >> --- thumb2.md (revision 165492) >> +++ thumb2.md (working copy) >> @@ -1118,3 +1118,228 @@ (define_peephole2 >> " >> operands[2] = GEN_INT (32 - INTVAL (operands[2])); >> ") >> + >> +(define_insn "*thumb2_ldrd" >> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >> + (mem:SI (plus:SI >> + (match_operand:SI 2 "s_register_operand" "rk") >> + (match_operand:SI 3 "const_int_operand" "")))) >> + (set (match_operand:SI 1 "s_register_operand" "") >> + (mem:SI (plus:SI (match_dup 2) >> + (match_operand:SI 4 "const_int_operand" ""))))])] >> + "TARGET_THUMB2 && arm_arch7 >> + && thumb2_check_ldrd_operands (operands[3], operands[4])" >> + "* >> + { >> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >> + if (offset1 > offset2) >> + { >> + /* Swap the operands so that memory [base+offset1] is loaded into >> + operands[0]. */ >> + rtx tmp = operands[0]; >> + operands[0] = operands[1]; >> + operands[1] = tmp; >> + tmp = operands[3]; >> + operands[3] = operands[4]; >> + operands[4] = tmp; >> + offset1 = INTVAL (operands[3]); >> + offset2 = INTVAL (operands[4]); >> + } >> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >> + operands[2], operands[3], operands[4], true)) >> + return \"ldmdb\\t%2, {%0, %1}\"; >> + else if (fix_cm3_ldrd && (operands[2] == operands[0])) >> + { >> + if (offset1 <= -256) >> + { >> + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); >> + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); >> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >> + } >> + else >> + { >> + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); >> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >> + } >> + return \"\"; >> + } >> + else >> + return \"ldrd\\t%0, %1, [%2, %3]\"; >> + }" >> +) >> + >> +(define_insn "*thumb2_ldrd_reg1" >> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >> + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) >> + (set (match_operand:SI 1 "s_register_operand" "") >> + (mem:SI (plus:SI (match_dup 2) >> + (match_operand:SI 3 "const_int_operand" ""))))])] >> + "TARGET_THUMB2 && arm_arch7 >> + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" >> + "* >> + { >> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >> + if (offset2 == 4) >> + { >> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >> + operands[2], NULL_RTX, operands[3], true)) >> + return \"ldmia\\t%2, {%0, %1}\"; >> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >> + { >> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >> + return \"\"; >> + } >> + return \"ldrd\\t%0, %1, [%2]\"; >> + } >> + else >> + { >> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >> + { >> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >> + } >> + return \"ldrd\\t%1, %0, [%2, %3]\"; >> + } >> + }" >> +) >> + >> +(define_insn "*thumb2_ldrd_reg2" >> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >> + (mem:SI (plus:SI >> + (match_operand:SI 2 "s_register_operand" "rk") >> + (match_operand:SI 3 "const_int_operand" "")))) >> + (set (match_operand:SI 1 "s_register_operand" "") >> + (mem:SI (match_dup 2)))])] >> + "TARGET_THUMB2 && arm_arch7 >> + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" >> + "* >> + { >> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >> + if (offset1 == -4) >> + { >> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >> + { >> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >> + return \"\"; >> + } >> + return \"ldrd\\t%0, %1, [%2, %3]\"; >> + } >> + else >> + { >> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >> + operands[2], operands[3], NULL_RTX, true)) >> + return \"ldmia\\t%2, {%1, %0}\"; >> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >> + { >> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >> + return \"\"; >> + } >> + return \"ldrd\\t%1, %0, [%2]\"; >> + } >> + }" >> +) >> + >> +(define_peephole2 >> + [(set (match_operand:SI 0 "s_register_operand" "") >> + (match_operand:SI 2 "memory_operand" "")) >> + (set (match_operand:SI 1 "s_register_operand" "") >> + (match_operand:SI 3 "memory_operand" ""))] >> + "TARGET_THUMB2 && arm_arch7 >> + && thumb2_legitimate_ldrd_p (operands[0], operands[1], >> + operands[2], operands[3], true)" >> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >> + (match_operand:SI 2 "memory_operand" "")) >> + (set (match_operand:SI 1 "s_register_operand" "") >> + (match_operand:SI 3 "memory_operand" ""))])] >> + "" >> +) >> + >> +(define_insn "*thumb2_strd" >> + [(parallel [(set (mem:SI >> + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") >> + (match_operand:SI 3 "const_int_operand" ""))) >> + (match_operand:SI 0 "s_register_operand" "")) >> + (set (mem:SI (plus:SI (match_dup 2) >> + (match_operand:SI 4 "const_int_operand" ""))) >> + (match_operand:SI 1 "s_register_operand" ""))])] >> + "TARGET_THUMB2 && arm_arch7 >> + && thumb2_check_ldrd_operands (operands[3], operands[4])" >> + "* >> + { >> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >> + operands[2], operands[3], operands[4], false)) >> + return \"stmdb\\t%2, {%0, %1}\"; >> + if (offset1 < offset2) >> + return \"strd\\t%0, %1, [%2, %3]\"; >> + else >> + return \"strd\\t%1, %0, [%2, %4]\"; >> + }" >> +) >> + >> +(define_insn "*thumb2_strd_reg1" >> + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) >> + (match_operand:SI 0 "s_register_operand" "")) >> + (set (mem:SI (plus:SI (match_dup 2) >> + (match_operand:SI 3 "const_int_operand" ""))) >> + (match_operand:SI 1 "s_register_operand" ""))])] >> + "TARGET_THUMB2 && arm_arch7 >> + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" >> + "* >> + { >> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >> + if (offset2 == 4) >> + { >> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >> + operands[2], NULL_RTX, operands[3], false)) >> + return \"stmia\\t%2, {%0, %1}\"; >> + return \"strd\\t%0, %1, [%2]\"; >> + } >> + else >> + return \"strd\\t%1, %0, [%2, %3]\"; >> + }" >> +) >> + >> +(define_insn "*thumb2_strd_reg2" >> + [(parallel [(set (mem:SI (plus:SI >> + (match_operand:SI 2 "s_register_operand" "rk") >> + (match_operand:SI 3 "const_int_operand" ""))) >> + (match_operand:SI 0 "s_register_operand" "")) >> + (set (mem:SI (match_dup 2)) >> + (match_operand:SI 1 "s_register_operand" ""))])] >> + "TARGET_THUMB2 && arm_arch7 >> + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" >> + "* >> + { >> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >> + if (offset1 == -4) >> + return \"strd\\t%0, %1, [%2, %3]\"; >> + else >> + { >> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >> + operands[2], operands[3], NULL_RTX, false)) >> + return \"stmia\\t%2, {%1, %0}\"; >> + return \"strd\\t%1, %0, [%2]\"; >> + } >> + }" >> +) >> + >> +(define_peephole2 >> + [(set (match_operand:SI 2 "memory_operand" "") >> + (match_operand:SI 0 "s_register_operand" "")) >> + (set (match_operand:SI 3 "memory_operand" "") >> + (match_operand:SI 1 "s_register_operand" ""))] >> + "TARGET_THUMB2 && arm_arch7 >> + && thumb2_legitimate_ldrd_p (operands[0], operands[1], >> + operands[2], operands[3], false)" >> + [(parallel [(set (match_operand:SI 2 "memory_operand" "") >> + (match_operand:SI 0 "s_register_operand" "")) >> + (set (match_operand:SI 3 "memory_operand" "") >> + (match_operand:SI 1 "s_register_operand" ""))])] >> + "" >> +) >> Index: arm.c >> =================================================================== >> --- arm.c (revision 165492) >> +++ arm.c (working copy) >> @@ -23254,4 +23254,134 @@ arm_builtin_support_vector_misalignment >> is_packed); >> } >> >> +/* Check the validity of operands in an ldrd/strd instruction. */ >> +bool >> +thumb2_check_ldrd_operands (rtx off1, rtx off2) >> +{ >> + HOST_WIDE_INT offset1 = 0; >> + HOST_WIDE_INT offset2 = 0; >> + >> + if (off1 != NULL_RTX) >> + offset1 = INTVAL (off1); >> + if (off2 != NULL_RTX) >> + offset2 = INTVAL (off2); >> + >> + /* The offset range of LDRD is [-1020, 1020]. Here we check if both >> + offsets lie in the range [-1020, 1024]. If one of the offsets is >> + 1024, the following condition ((offset1 + 4) == offset2) will ensure >> + offset1 to be 1020, suitable for instruction LDRD. */ >> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >> + return false; >> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >> + return false; >> + >> + if ((offset1 + 4) == offset2) >> + return true; >> + if ((offset2 + 4) == offset1) >> + return true; >> + >> + return false; >> +} >> + >> +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. >> + That is they use the same base register, and the gap between constant >> + offsets should be 4. */ >> +bool >> +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) >> +{ >> + rtx base1, base2, op1; >> + rtx addr1 = XEXP (mem1, 0); >> + rtx addr2 = XEXP (mem2, 0); >> + HOST_WIDE_INT offset1 = 0; >> + HOST_WIDE_INT offset2 = 0; >> + >> + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) >> + return false; >> + >> + if (REG_P (addr1)) >> + base1 = addr1; >> + else if (GET_CODE (addr1) == PLUS) >> + { >> + base1 = XEXP (addr1, 0); >> + op1 = XEXP (addr1, 1); >> + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) >> + return false; >> + offset1 = INTVAL (op1); >> + } >> + else >> + return false; >> + >> + if (REG_P (addr2)) >> + base2 = addr2; >> + else if (GET_CODE (addr2) == PLUS) >> + { >> + base2 = XEXP (addr2, 0); >> + op1 = XEXP (addr2, 1); >> + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) >> + return false; >> + offset2 = INTVAL (op1); >> + } >> + else >> + return false; >> + >> + if (base1 != base2) >> + return false; >> + >> + /* The offset range of LDRD is [-1020, 1020]. Here we check if both >> + offsets lie in the range [-1020, 1024]. If one of the offsets is >> + 1024, the following condition ((offset1 + 4) == offset2) will ensure >> + offset1 to be 1020, suitable for instruction LDRD. */ >> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >> + return false; >> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >> + return false; >> + >> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >> + return false; >> + >> + if ((offset1 + 4) == offset2) >> + return true; >> + if ((offset2 + 4) == offset1) >> + return true; >> + >> + return false; >> +} >> + >> +/* Check if the insn can be expressed as ldm/stm with less cost. */ >> +bool >> +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, >> + rtx off1, rtx off2, bool ldrd) >> +{ >> + HOST_WIDE_INT offset1 = 0; >> + HOST_WIDE_INT offset2 = 0; >> + >> + if (off1 != NULL_RTX) >> + offset1 = INTVAL (off1); >> + if (off2 != NULL_RTX) >> + offset2 = INTVAL (off2); >> + >> + if (offset1 > offset2) >> + { >> + rtx tmp; >> + HOST_WIDE_INT t = offset1; >> + offset1 = offset2; >> + offset2 = t; >> + tmp = reg1; >> + reg1 = reg2; >> + reg2 = tmp; >> + } >> + >> + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ >> + if ((offset1 != -8) && (offset1 != 0)) >> + return false; >> + >> + /* Lower register corresponds to lower memory. */ >> + if (REGNO (reg1) > REGNO (reg2)) >> + return false; >> + >> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >> + cost. */ >> + return false; >> +} >> + >> #include "gt-arm.h" >> Index: arm-protos.h >> =================================================================== >> --- arm-protos.h (revision 165492) >> +++ arm-protos.h (working copy) >> @@ -150,6 +150,9 @@ extern void arm_expand_sync (enum machin >> extern const char *arm_output_memory_barrier (rtx *); >> extern const char *arm_output_sync_insn (rtx, rtx *); >> extern unsigned int arm_sync_loop_insns (rtx , rtx *); >> +extern bool thumb2_check_ldrd_operands (rtx, rtx); >> +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); >> +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); >> >> #if defined TREE_CODE >> extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); >> Index: ldmstm.md >> =================================================================== >> --- ldmstm.md (revision 165492) >> +++ ldmstm.md (working copy) >> @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" >> (set (match_operand:SI 2 "arm_hard_register_operand" "") >> (mem:SI (plus:SI (match_dup 3) >> (const_int 4))))])] >> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >> "ldm%(ia%)\t%3, {%1, %2}" >> [(set_attr "type" "load2") >> (set_attr "predicable" "yes")]) >> @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" >> (match_operand:SI 1 "arm_hard_register_operand" "")) >> (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) >> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >> "stm%(ia%)\t%3, {%1, %2}" >> [(set_attr "type" "store2") >> (set_attr "predicable" "yes")]) >> @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" >> (set (match_operand:SI 2 "arm_hard_register_operand" "") >> (mem:SI (plus:SI (match_dup 3) >> (const_int -4))))])] >> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >> "ldm%(db%)\t%3, {%1, %2}" >> [(set_attr "type" "load2") >> (set_attr "predicable" "yes")]) >> @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" >> (match_operand:SI 1 "arm_hard_register_operand" "")) >> (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) >> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >> "stm%(db%)\t%3, {%1, %2}" >> [(set_attr "type" "store2") >> (set_attr "predicable" "yes")]) >> >> >> Index: pr40457-3.c >> =================================================================== >> --- pr40457-3.c (revision 165492) >> +++ pr40457-3.c (working copy) >> @@ -5,6 +5,7 @@ void foo(int* p) >> { >> p[0] = 1; >> p[1] = 0; >> + p[2] = 2; >> } >> >> /* { dg-final { scan-assembler "stm" } } */ >> Index: pr40457-1.c >> =================================================================== >> --- pr40457-1.c (revision 165492) >> +++ pr40457-1.c (working copy) >> @@ -1,9 +1,9 @@ >> -/* { dg-options "-Os" } */ >> +/* { dg-options "-O2" } */ >> /* { dg-do compile } */ >> >> int bar(int* p) >> { >> - int x = p[0] + p[1]; >> + int x = p[0] + p[1] + p[2]; >> return x; >> } >> >> Index: pr40457-2.c >> =================================================================== >> --- pr40457-2.c (revision 165492) >> +++ pr40457-2.c (working copy) >> @@ -5,6 +5,7 @@ void foo(int* p) >> { >> p[0] = 1; >> p[1] = 0; >> + p[2] = 2; >> } >> >> /* { dg-final { scan-assembler "stm" } } */ >> Index: pr45335.c >> =================================================================== >> --- pr45335.c (revision 0) >> +++ pr45335.c (revision 0) >> @@ -0,0 +1,22 @@ >> +/* { dg-options "-mthumb -O2" } */ >> +/* { dg-require-effective-target arm_thumb2_ok } */ >> +/* { dg-final { scan-assembler "ldrd" } } */ >> +/* { dg-final { scan-assembler "strd" } } */ >> + >> +struct S >> +{ >> + void* p1; >> + void* p2; >> + void* p3; >> + void* p4; >> +}; >> + >> +extern printf(char*, ...); >> + >> +void foo1(struct S* fp, struct S* otherSaveArea) >> +{ >> + struct S* saveA = fp - 1; >> + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); >> + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", >> + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); >> +} >> > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-10-31 17:55 ` Carrot Wei @ 2010-11-23 0:23 ` Carrot Wei 2010-11-30 0:01 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-11-23 0:23 UTC (permalink / raw) To: Paul Brook, Richard Earnshaw, Nick Clifton Cc: gcc-patches, ramana.radhakrishnan ping On Sun, Oct 31, 2010 at 2:22 AM, Carrot Wei <carrot@google.com> wrote: > Ping > > On Sun, Oct 24, 2010 at 9:46 PM, Carrot Wei <carrot@google.com> wrote: >> Ping >> >> On Sat, Oct 16, 2010 at 8:27 PM, Carrot Wei <carrot@google.com> wrote: >>> On Wed, Oct 13, 2010 at 7:01 PM, Paul Brook <paul@codesourcery.com> wrote: >>>>> ChangeLog: >>>>> 2010-09-04 Wei Guozhi <carrot@google.com> >>>>> >>>>> PR target/45335 >>>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>>>> peephole2. >>>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>>>> New insn pattern and related peephole2. >>>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>>>> (thumb2_check_ldrd_operands): New function. >>>>> (thumb2_prefer_ldmstm): New function. >>>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New >>>>> prototype. (thumb2_check_ldrd_operands): New prototype. >>>>> (thumb2_prefer_ldmstm): New prototype. >>>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>>>> Change the ldm/stm patterns with 2 words to ARM only. >>>>> * gcc/config/arm/constraints.md (Py): New thumb2 constant >>>>> constraint suitable to ldrd/strd instructions. >>>> >>>> Not ok. >>>> >>>> Why is this restricted to Thumb mode? The ARM variant of ldrd isn't quite as >>>> flexible, but still provides a useful improvement over ldm. >>>> >>> I agree the ARM version is also useful. But it brings much less >>> benefit with too much complexity (due to more restriction and insn >>> pattern conflict with ldm). So I will leave it as a future >>> improvement. >>> >>>> This transformation is only valid on ARMv7 cores. On earlier hardware >>>> (depending on system configuration) it may cause undefined behavior or an >>>> alignment trap. >>>> >>> done. >>> >>>> The range on -1020 to +1024 is used in several places, but without any >>>> apparent explanation of why it's different to the range of an ldrd >>>> instruction. I figured it out eventually, but it deserves a comment. >>>> >>> Comments added. >>> >>>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>>> + operands[2], 0, operands[3], 1)" >>>> >>>> Passed operands do not match expected types. Specifically "0" is not an rtx >>>> (should be "NULL_RTX"), and "1" is not a boolean value (should be "true"). >>>> Many other occurrences. >>>> >>> Fixed. >>> >>>>> +(define_constraint "Py" >>>>> + "@internal In Thumb-2 state a constant that is a multiple of 4 in the >>>>> + range -1020 to 1024" >>>> >>>> This comment seems particularly pointless. You should mention why this >>>> exists/where it is used. >>>> >>>> I think you're better off enforcing this in the insn condition, and remove >>>> this constraint. At least half the uses (the -reg[12] insns) are incorrect, >>>> and you already need the condition to enforce the dependency between the >>>> operands. >>>> >>> I removed this constraint and add the check to insn condition. >>> >>>>> +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, >>>>>... >>>>> + if (ldrd && (reg1 == reg2)) >>>>> + return false; >>>> >>>> This function is part of the instruction condition. Instruction conditions >>>> must not be used to enforce register allocation. >>>> >>> removed. >>> >>>>> +thumb2_legitimate_ldrd_p ( >>>>>... >>>>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>>>> + return false; >>>> >>>> You're incorrectly assuming offset1 < offset2, which might not be true at this >>>> point. >>>> >>> The following check assumes offset1 < offset2 >>> + if ((offset1 + 4) == offset2) >>> + return true; >>> >>> And another check assumes offset2 < offset1, so both cases are covered. >>> + if ((offset2 + 4) == offset1) >>> + return true; >>> >>>>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>>>> + cost. */ >>>>> + return false; >>>> >>>> Code clearly doesn't match the comment. In fact this function always returns >>>> false. >>>> >>> Richard mentioned that in some cases (specifically cortex A9) ldm has >>> less cost than ldrd and we should model this in the insn pattern. This >>> function is used for this. But I don't know the cortex A9 architecture >>> detail, so it should be filled by somebody with more knowledge about >>> it in future. >>> >>> Wei Guozhi >>> >>> >>> ChangeLog: >>> 2010-10-16 Wei Guozhi <carrot@google.com> >>> >>> PR target/45335 >>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>> peephole2. >>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>> New insn pattern and related peephole2. >>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>> (thumb2_check_ldrd_operands): New function. >>> (thumb2_prefer_ldmstm): New function. >>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. >>> (thumb2_check_ldrd_operands): New prototype. >>> (thumb2_prefer_ldmstm): New prototype. >>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>> Change the ldm/stm patterns with 2 words to ARM only. >>> >>> >>> 2010-10-16 Wei Guozhi <carrot@google.com> >>> >>> PR target/45335 >>> * gcc.target/arm/pr45335.c: New test. >>> * gcc.target/arm/pr40457-1.c: Changed to load 3 words. >>> * gcc.target/arm/pr40457-2.c: Changed to store 3 words. >>> * gcc.target/arm/pr40457-3.c: Changed to store 3 words. >>> >>> >>> Index: thumb2.md >>> =================================================================== >>> --- thumb2.md (revision 165492) >>> +++ thumb2.md (working copy) >>> @@ -1118,3 +1118,228 @@ (define_peephole2 >>> " >>> operands[2] = GEN_INT (32 - INTVAL (operands[2])); >>> ") >>> + >>> +(define_insn "*thumb2_ldrd" >>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>> + (mem:SI (plus:SI >>> + (match_operand:SI 2 "s_register_operand" "rk") >>> + (match_operand:SI 3 "const_int_operand" "")))) >>> + (set (match_operand:SI 1 "s_register_operand" "") >>> + (mem:SI (plus:SI (match_dup 2) >>> + (match_operand:SI 4 "const_int_operand" ""))))])] >>> + "TARGET_THUMB2 && arm_arch7 >>> + && thumb2_check_ldrd_operands (operands[3], operands[4])" >>> + "* >>> + { >>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>> + if (offset1 > offset2) >>> + { >>> + /* Swap the operands so that memory [base+offset1] is loaded into >>> + operands[0]. */ >>> + rtx tmp = operands[0]; >>> + operands[0] = operands[1]; >>> + operands[1] = tmp; >>> + tmp = operands[3]; >>> + operands[3] = operands[4]; >>> + operands[4] = tmp; >>> + offset1 = INTVAL (operands[3]); >>> + offset2 = INTVAL (operands[4]); >>> + } >>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>> + operands[2], operands[3], operands[4], true)) >>> + return \"ldmdb\\t%2, {%0, %1}\"; >>> + else if (fix_cm3_ldrd && (operands[2] == operands[0])) >>> + { >>> + if (offset1 <= -256) >>> + { >>> + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); >>> + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); >>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>> + } >>> + else >>> + { >>> + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); >>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>> + } >>> + return \"\"; >>> + } >>> + else >>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>> + }" >>> +) >>> + >>> +(define_insn "*thumb2_ldrd_reg1" >>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>> + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) >>> + (set (match_operand:SI 1 "s_register_operand" "") >>> + (mem:SI (plus:SI (match_dup 2) >>> + (match_operand:SI 3 "const_int_operand" ""))))])] >>> + "TARGET_THUMB2 && arm_arch7 >>> + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" >>> + "* >>> + { >>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>> + if (offset2 == 4) >>> + { >>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>> + operands[2], NULL_RTX, operands[3], true)) >>> + return \"ldmia\\t%2, {%0, %1}\"; >>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>> + { >>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>> + return \"\"; >>> + } >>> + return \"ldrd\\t%0, %1, [%2]\"; >>> + } >>> + else >>> + { >>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>> + { >>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>> + } >>> + return \"ldrd\\t%1, %0, [%2, %3]\"; >>> + } >>> + }" >>> +) >>> + >>> +(define_insn "*thumb2_ldrd_reg2" >>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>> + (mem:SI (plus:SI >>> + (match_operand:SI 2 "s_register_operand" "rk") >>> + (match_operand:SI 3 "const_int_operand" "")))) >>> + (set (match_operand:SI 1 "s_register_operand" "") >>> + (mem:SI (match_dup 2)))])] >>> + "TARGET_THUMB2 && arm_arch7 >>> + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" >>> + "* >>> + { >>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>> + if (offset1 == -4) >>> + { >>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>> + { >>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>> + return \"\"; >>> + } >>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>> + } >>> + else >>> + { >>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>> + operands[2], operands[3], NULL_RTX, true)) >>> + return \"ldmia\\t%2, {%1, %0}\"; >>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>> + { >>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>> + return \"\"; >>> + } >>> + return \"ldrd\\t%1, %0, [%2]\"; >>> + } >>> + }" >>> +) >>> + >>> +(define_peephole2 >>> + [(set (match_operand:SI 0 "s_register_operand" "") >>> + (match_operand:SI 2 "memory_operand" "")) >>> + (set (match_operand:SI 1 "s_register_operand" "") >>> + (match_operand:SI 3 "memory_operand" ""))] >>> + "TARGET_THUMB2 && arm_arch7 >>> + && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>> + operands[2], operands[3], true)" >>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>> + (match_operand:SI 2 "memory_operand" "")) >>> + (set (match_operand:SI 1 "s_register_operand" "") >>> + (match_operand:SI 3 "memory_operand" ""))])] >>> + "" >>> +) >>> + >>> +(define_insn "*thumb2_strd" >>> + [(parallel [(set (mem:SI >>> + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") >>> + (match_operand:SI 3 "const_int_operand" ""))) >>> + (match_operand:SI 0 "s_register_operand" "")) >>> + (set (mem:SI (plus:SI (match_dup 2) >>> + (match_operand:SI 4 "const_int_operand" ""))) >>> + (match_operand:SI 1 "s_register_operand" ""))])] >>> + "TARGET_THUMB2 && arm_arch7 >>> + && thumb2_check_ldrd_operands (operands[3], operands[4])" >>> + "* >>> + { >>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>> + operands[2], operands[3], operands[4], false)) >>> + return \"stmdb\\t%2, {%0, %1}\"; >>> + if (offset1 < offset2) >>> + return \"strd\\t%0, %1, [%2, %3]\"; >>> + else >>> + return \"strd\\t%1, %0, [%2, %4]\"; >>> + }" >>> +) >>> + >>> +(define_insn "*thumb2_strd_reg1" >>> + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) >>> + (match_operand:SI 0 "s_register_operand" "")) >>> + (set (mem:SI (plus:SI (match_dup 2) >>> + (match_operand:SI 3 "const_int_operand" ""))) >>> + (match_operand:SI 1 "s_register_operand" ""))])] >>> + "TARGET_THUMB2 && arm_arch7 >>> + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" >>> + "* >>> + { >>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>> + if (offset2 == 4) >>> + { >>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>> + operands[2], NULL_RTX, operands[3], false)) >>> + return \"stmia\\t%2, {%0, %1}\"; >>> + return \"strd\\t%0, %1, [%2]\"; >>> + } >>> + else >>> + return \"strd\\t%1, %0, [%2, %3]\"; >>> + }" >>> +) >>> + >>> +(define_insn "*thumb2_strd_reg2" >>> + [(parallel [(set (mem:SI (plus:SI >>> + (match_operand:SI 2 "s_register_operand" "rk") >>> + (match_operand:SI 3 "const_int_operand" ""))) >>> + (match_operand:SI 0 "s_register_operand" "")) >>> + (set (mem:SI (match_dup 2)) >>> + (match_operand:SI 1 "s_register_operand" ""))])] >>> + "TARGET_THUMB2 && arm_arch7 >>> + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" >>> + "* >>> + { >>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>> + if (offset1 == -4) >>> + return \"strd\\t%0, %1, [%2, %3]\"; >>> + else >>> + { >>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>> + operands[2], operands[3], NULL_RTX, false)) >>> + return \"stmia\\t%2, {%1, %0}\"; >>> + return \"strd\\t%1, %0, [%2]\"; >>> + } >>> + }" >>> +) >>> + >>> +(define_peephole2 >>> + [(set (match_operand:SI 2 "memory_operand" "") >>> + (match_operand:SI 0 "s_register_operand" "")) >>> + (set (match_operand:SI 3 "memory_operand" "") >>> + (match_operand:SI 1 "s_register_operand" ""))] >>> + "TARGET_THUMB2 && arm_arch7 >>> + && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>> + operands[2], operands[3], false)" >>> + [(parallel [(set (match_operand:SI 2 "memory_operand" "") >>> + (match_operand:SI 0 "s_register_operand" "")) >>> + (set (match_operand:SI 3 "memory_operand" "") >>> + (match_operand:SI 1 "s_register_operand" ""))])] >>> + "" >>> +) >>> Index: arm.c >>> =================================================================== >>> --- arm.c (revision 165492) >>> +++ arm.c (working copy) >>> @@ -23254,4 +23254,134 @@ arm_builtin_support_vector_misalignment >>> is_packed); >>> } >>> >>> +/* Check the validity of operands in an ldrd/strd instruction. */ >>> +bool >>> +thumb2_check_ldrd_operands (rtx off1, rtx off2) >>> +{ >>> + HOST_WIDE_INT offset1 = 0; >>> + HOST_WIDE_INT offset2 = 0; >>> + >>> + if (off1 != NULL_RTX) >>> + offset1 = INTVAL (off1); >>> + if (off2 != NULL_RTX) >>> + offset2 = INTVAL (off2); >>> + >>> + /* The offset range of LDRD is [-1020, 1020]. Here we check if both >>> + offsets lie in the range [-1020, 1024]. If one of the offsets is >>> + 1024, the following condition ((offset1 + 4) == offset2) will ensure >>> + offset1 to be 1020, suitable for instruction LDRD. */ >>> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >>> + return false; >>> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >>> + return false; >>> + >>> + if ((offset1 + 4) == offset2) >>> + return true; >>> + if ((offset2 + 4) == offset1) >>> + return true; >>> + >>> + return false; >>> +} >>> + >>> +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. >>> + That is they use the same base register, and the gap between constant >>> + offsets should be 4. */ >>> +bool >>> +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) >>> +{ >>> + rtx base1, base2, op1; >>> + rtx addr1 = XEXP (mem1, 0); >>> + rtx addr2 = XEXP (mem2, 0); >>> + HOST_WIDE_INT offset1 = 0; >>> + HOST_WIDE_INT offset2 = 0; >>> + >>> + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) >>> + return false; >>> + >>> + if (REG_P (addr1)) >>> + base1 = addr1; >>> + else if (GET_CODE (addr1) == PLUS) >>> + { >>> + base1 = XEXP (addr1, 0); >>> + op1 = XEXP (addr1, 1); >>> + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) >>> + return false; >>> + offset1 = INTVAL (op1); >>> + } >>> + else >>> + return false; >>> + >>> + if (REG_P (addr2)) >>> + base2 = addr2; >>> + else if (GET_CODE (addr2) == PLUS) >>> + { >>> + base2 = XEXP (addr2, 0); >>> + op1 = XEXP (addr2, 1); >>> + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) >>> + return false; >>> + offset2 = INTVAL (op1); >>> + } >>> + else >>> + return false; >>> + >>> + if (base1 != base2) >>> + return false; >>> + >>> + /* The offset range of LDRD is [-1020, 1020]. Here we check if both >>> + offsets lie in the range [-1020, 1024]. If one of the offsets is >>> + 1024, the following condition ((offset1 + 4) == offset2) will ensure >>> + offset1 to be 1020, suitable for instruction LDRD. */ >>> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >>> + return false; >>> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >>> + return false; >>> + >>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>> + return false; >>> + >>> + if ((offset1 + 4) == offset2) >>> + return true; >>> + if ((offset2 + 4) == offset1) >>> + return true; >>> + >>> + return false; >>> +} >>> + >>> +/* Check if the insn can be expressed as ldm/stm with less cost. */ >>> +bool >>> +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, >>> + rtx off1, rtx off2, bool ldrd) >>> +{ >>> + HOST_WIDE_INT offset1 = 0; >>> + HOST_WIDE_INT offset2 = 0; >>> + >>> + if (off1 != NULL_RTX) >>> + offset1 = INTVAL (off1); >>> + if (off2 != NULL_RTX) >>> + offset2 = INTVAL (off2); >>> + >>> + if (offset1 > offset2) >>> + { >>> + rtx tmp; >>> + HOST_WIDE_INT t = offset1; >>> + offset1 = offset2; >>> + offset2 = t; >>> + tmp = reg1; >>> + reg1 = reg2; >>> + reg2 = tmp; >>> + } >>> + >>> + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ >>> + if ((offset1 != -8) && (offset1 != 0)) >>> + return false; >>> + >>> + /* Lower register corresponds to lower memory. */ >>> + if (REGNO (reg1) > REGNO (reg2)) >>> + return false; >>> + >>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>> + cost. */ >>> + return false; >>> +} >>> + >>> #include "gt-arm.h" >>> Index: arm-protos.h >>> =================================================================== >>> --- arm-protos.h (revision 165492) >>> +++ arm-protos.h (working copy) >>> @@ -150,6 +150,9 @@ extern void arm_expand_sync (enum machin >>> extern const char *arm_output_memory_barrier (rtx *); >>> extern const char *arm_output_sync_insn (rtx, rtx *); >>> extern unsigned int arm_sync_loop_insns (rtx , rtx *); >>> +extern bool thumb2_check_ldrd_operands (rtx, rtx); >>> +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); >>> +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); >>> >>> #if defined TREE_CODE >>> extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); >>> Index: ldmstm.md >>> =================================================================== >>> --- ldmstm.md (revision 165492) >>> +++ ldmstm.md (working copy) >>> @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" >>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>> (mem:SI (plus:SI (match_dup 3) >>> (const_int 4))))])] >>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>> "ldm%(ia%)\t%3, {%1, %2}" >>> [(set_attr "type" "load2") >>> (set_attr "predicable" "yes")]) >>> @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" >>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>> (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) >>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>> "stm%(ia%)\t%3, {%1, %2}" >>> [(set_attr "type" "store2") >>> (set_attr "predicable" "yes")]) >>> @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" >>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>> (mem:SI (plus:SI (match_dup 3) >>> (const_int -4))))])] >>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>> "ldm%(db%)\t%3, {%1, %2}" >>> [(set_attr "type" "load2") >>> (set_attr "predicable" "yes")]) >>> @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" >>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>> (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) >>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>> "stm%(db%)\t%3, {%1, %2}" >>> [(set_attr "type" "store2") >>> (set_attr "predicable" "yes")]) >>> >>> >>> Index: pr40457-3.c >>> =================================================================== >>> --- pr40457-3.c (revision 165492) >>> +++ pr40457-3.c (working copy) >>> @@ -5,6 +5,7 @@ void foo(int* p) >>> { >>> p[0] = 1; >>> p[1] = 0; >>> + p[2] = 2; >>> } >>> >>> /* { dg-final { scan-assembler "stm" } } */ >>> Index: pr40457-1.c >>> =================================================================== >>> --- pr40457-1.c (revision 165492) >>> +++ pr40457-1.c (working copy) >>> @@ -1,9 +1,9 @@ >>> -/* { dg-options "-Os" } */ >>> +/* { dg-options "-O2" } */ >>> /* { dg-do compile } */ >>> >>> int bar(int* p) >>> { >>> - int x = p[0] + p[1]; >>> + int x = p[0] + p[1] + p[2]; >>> return x; >>> } >>> >>> Index: pr40457-2.c >>> =================================================================== >>> --- pr40457-2.c (revision 165492) >>> +++ pr40457-2.c (working copy) >>> @@ -5,6 +5,7 @@ void foo(int* p) >>> { >>> p[0] = 1; >>> p[1] = 0; >>> + p[2] = 2; >>> } >>> >>> /* { dg-final { scan-assembler "stm" } } */ >>> Index: pr45335.c >>> =================================================================== >>> --- pr45335.c (revision 0) >>> +++ pr45335.c (revision 0) >>> @@ -0,0 +1,22 @@ >>> +/* { dg-options "-mthumb -O2" } */ >>> +/* { dg-require-effective-target arm_thumb2_ok } */ >>> +/* { dg-final { scan-assembler "ldrd" } } */ >>> +/* { dg-final { scan-assembler "strd" } } */ >>> + >>> +struct S >>> +{ >>> + void* p1; >>> + void* p2; >>> + void* p3; >>> + void* p4; >>> +}; >>> + >>> +extern printf(char*, ...); >>> + >>> +void foo1(struct S* fp, struct S* otherSaveArea) >>> +{ >>> + struct S* saveA = fp - 1; >>> + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); >>> + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", >>> + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); >>> +} >>> >> > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-11-23 0:23 ` Carrot Wei @ 2010-11-30 0:01 ` Carrot Wei 2010-12-14 22:58 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-11-30 0:01 UTC (permalink / raw) To: Paul Brook, Richard Earnshaw, Nick Clifton Cc: gcc-patches, ramana.radhakrishnan ping On Mon, Nov 22, 2010 at 3:16 PM, Carrot Wei <carrot@google.com> wrote: > ping > > On Sun, Oct 31, 2010 at 2:22 AM, Carrot Wei <carrot@google.com> wrote: >> Ping >> >> On Sun, Oct 24, 2010 at 9:46 PM, Carrot Wei <carrot@google.com> wrote: >>> Ping >>> >>> On Sat, Oct 16, 2010 at 8:27 PM, Carrot Wei <carrot@google.com> wrote: >>>> On Wed, Oct 13, 2010 at 7:01 PM, Paul Brook <paul@codesourcery.com> wrote: >>>>>> ChangeLog: >>>>>> 2010-09-04 Wei Guozhi <carrot@google.com> >>>>>> >>>>>> PR target/45335 >>>>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>>>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>>>>> peephole2. >>>>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>>>>> New insn pattern and related peephole2. >>>>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>>>>> (thumb2_check_ldrd_operands): New function. >>>>>> (thumb2_prefer_ldmstm): New function. >>>>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New >>>>>> prototype. (thumb2_check_ldrd_operands): New prototype. >>>>>> (thumb2_prefer_ldmstm): New prototype. >>>>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>>>>> Change the ldm/stm patterns with 2 words to ARM only. >>>>>> * gcc/config/arm/constraints.md (Py): New thumb2 constant >>>>>> constraint suitable to ldrd/strd instructions. >>>>> >>>>> Not ok. >>>>> >>>>> Why is this restricted to Thumb mode? The ARM variant of ldrd isn't quite as >>>>> flexible, but still provides a useful improvement over ldm. >>>>> >>>> I agree the ARM version is also useful. But it brings much less >>>> benefit with too much complexity (due to more restriction and insn >>>> pattern conflict with ldm). So I will leave it as a future >>>> improvement. >>>> >>>>> This transformation is only valid on ARMv7 cores. On earlier hardware >>>>> (depending on system configuration) it may cause undefined behavior or an >>>>> alignment trap. >>>>> >>>> done. >>>> >>>>> The range on -1020 to +1024 is used in several places, but without any >>>>> apparent explanation of why it's different to the range of an ldrd >>>>> instruction. I figured it out eventually, but it deserves a comment. >>>>> >>>> Comments added. >>>> >>>>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>>>> + operands[2], 0, operands[3], 1)" >>>>> >>>>> Passed operands do not match expected types. Specifically "0" is not an rtx >>>>> (should be "NULL_RTX"), and "1" is not a boolean value (should be "true"). >>>>> Many other occurrences. >>>>> >>>> Fixed. >>>> >>>>>> +(define_constraint "Py" >>>>>> + "@internal In Thumb-2 state a constant that is a multiple of 4 in the >>>>>> + range -1020 to 1024" >>>>> >>>>> This comment seems particularly pointless. You should mention why this >>>>> exists/where it is used. >>>>> >>>>> I think you're better off enforcing this in the insn condition, and remove >>>>> this constraint. At least half the uses (the -reg[12] insns) are incorrect, >>>>> and you already need the condition to enforce the dependency between the >>>>> operands. >>>>> >>>> I removed this constraint and add the check to insn condition. >>>> >>>>>> +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, >>>>>>... >>>>>> + if (ldrd && (reg1 == reg2)) >>>>>> + return false; >>>>> >>>>> This function is part of the instruction condition. Instruction conditions >>>>> must not be used to enforce register allocation. >>>>> >>>> removed. >>>> >>>>>> +thumb2_legitimate_ldrd_p ( >>>>>>... >>>>>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>>>>> + return false; >>>>> >>>>> You're incorrectly assuming offset1 < offset2, which might not be true at this >>>>> point. >>>>> >>>> The following check assumes offset1 < offset2 >>>> + if ((offset1 + 4) == offset2) >>>> + return true; >>>> >>>> And another check assumes offset2 < offset1, so both cases are covered. >>>> + if ((offset2 + 4) == offset1) >>>> + return true; >>>> >>>>>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>>>>> + cost. */ >>>>>> + return false; >>>>> >>>>> Code clearly doesn't match the comment. In fact this function always returns >>>>> false. >>>>> >>>> Richard mentioned that in some cases (specifically cortex A9) ldm has >>>> less cost than ldrd and we should model this in the insn pattern. This >>>> function is used for this. But I don't know the cortex A9 architecture >>>> detail, so it should be filled by somebody with more knowledge about >>>> it in future. >>>> >>>> Wei Guozhi >>>> >>>> >>>> ChangeLog: >>>> 2010-10-16 Wei Guozhi <carrot@google.com> >>>> >>>> PR target/45335 >>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>>> peephole2. >>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>>> New insn pattern and related peephole2. >>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>>> (thumb2_check_ldrd_operands): New function. >>>> (thumb2_prefer_ldmstm): New function. >>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. >>>> (thumb2_check_ldrd_operands): New prototype. >>>> (thumb2_prefer_ldmstm): New prototype. >>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>>> Change the ldm/stm patterns with 2 words to ARM only. >>>> >>>> >>>> 2010-10-16 Wei Guozhi <carrot@google.com> >>>> >>>> PR target/45335 >>>> * gcc.target/arm/pr45335.c: New test. >>>> * gcc.target/arm/pr40457-1.c: Changed to load 3 words. >>>> * gcc.target/arm/pr40457-2.c: Changed to store 3 words. >>>> * gcc.target/arm/pr40457-3.c: Changed to store 3 words. >>>> >>>> >>>> Index: thumb2.md >>>> =================================================================== >>>> --- thumb2.md (revision 165492) >>>> +++ thumb2.md (working copy) >>>> @@ -1118,3 +1118,228 @@ (define_peephole2 >>>> " >>>> operands[2] = GEN_INT (32 - INTVAL (operands[2])); >>>> ") >>>> + >>>> +(define_insn "*thumb2_ldrd" >>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>> + (mem:SI (plus:SI >>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>> + (match_operand:SI 3 "const_int_operand" "")))) >>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>> + (mem:SI (plus:SI (match_dup 2) >>>> + (match_operand:SI 4 "const_int_operand" ""))))])] >>>> + "TARGET_THUMB2 && arm_arch7 >>>> + && thumb2_check_ldrd_operands (operands[3], operands[4])" >>>> + "* >>>> + { >>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>>> + if (offset1 > offset2) >>>> + { >>>> + /* Swap the operands so that memory [base+offset1] is loaded into >>>> + operands[0]. */ >>>> + rtx tmp = operands[0]; >>>> + operands[0] = operands[1]; >>>> + operands[1] = tmp; >>>> + tmp = operands[3]; >>>> + operands[3] = operands[4]; >>>> + operands[4] = tmp; >>>> + offset1 = INTVAL (operands[3]); >>>> + offset2 = INTVAL (operands[4]); >>>> + } >>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>> + operands[2], operands[3], operands[4], true)) >>>> + return \"ldmdb\\t%2, {%0, %1}\"; >>>> + else if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>> + { >>>> + if (offset1 <= -256) >>>> + { >>>> + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); >>>> + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); >>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>> + } >>>> + else >>>> + { >>>> + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); >>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>> + } >>>> + return \"\"; >>>> + } >>>> + else >>>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>>> + }" >>>> +) >>>> + >>>> +(define_insn "*thumb2_ldrd_reg1" >>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>> + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) >>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>> + (mem:SI (plus:SI (match_dup 2) >>>> + (match_operand:SI 3 "const_int_operand" ""))))])] >>>> + "TARGET_THUMB2 && arm_arch7 >>>> + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" >>>> + "* >>>> + { >>>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>>> + if (offset2 == 4) >>>> + { >>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>> + operands[2], NULL_RTX, operands[3], true)) >>>> + return \"ldmia\\t%2, {%0, %1}\"; >>>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>> + { >>>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>> + return \"\"; >>>> + } >>>> + return \"ldrd\\t%0, %1, [%2]\"; >>>> + } >>>> + else >>>> + { >>>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>>> + { >>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>>> + } >>>> + return \"ldrd\\t%1, %0, [%2, %3]\"; >>>> + } >>>> + }" >>>> +) >>>> + >>>> +(define_insn "*thumb2_ldrd_reg2" >>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>> + (mem:SI (plus:SI >>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>> + (match_operand:SI 3 "const_int_operand" "")))) >>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>> + (mem:SI (match_dup 2)))])] >>>> + "TARGET_THUMB2 && arm_arch7 >>>> + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" >>>> + "* >>>> + { >>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>> + if (offset1 == -4) >>>> + { >>>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>> + { >>>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>> + return \"\"; >>>> + } >>>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>>> + } >>>> + else >>>> + { >>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>> + operands[2], operands[3], NULL_RTX, true)) >>>> + return \"ldmia\\t%2, {%1, %0}\"; >>>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>>> + { >>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>>> + return \"\"; >>>> + } >>>> + return \"ldrd\\t%1, %0, [%2]\"; >>>> + } >>>> + }" >>>> +) >>>> + >>>> +(define_peephole2 >>>> + [(set (match_operand:SI 0 "s_register_operand" "") >>>> + (match_operand:SI 2 "memory_operand" "")) >>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>> + (match_operand:SI 3 "memory_operand" ""))] >>>> + "TARGET_THUMB2 && arm_arch7 >>>> + && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>>> + operands[2], operands[3], true)" >>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>> + (match_operand:SI 2 "memory_operand" "")) >>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>> + (match_operand:SI 3 "memory_operand" ""))])] >>>> + "" >>>> +) >>>> + >>>> +(define_insn "*thumb2_strd" >>>> + [(parallel [(set (mem:SI >>>> + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") >>>> + (match_operand:SI 3 "const_int_operand" ""))) >>>> + (match_operand:SI 0 "s_register_operand" "")) >>>> + (set (mem:SI (plus:SI (match_dup 2) >>>> + (match_operand:SI 4 "const_int_operand" ""))) >>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>> + "TARGET_THUMB2 && arm_arch7 >>>> + && thumb2_check_ldrd_operands (operands[3], operands[4])" >>>> + "* >>>> + { >>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>> + operands[2], operands[3], operands[4], false)) >>>> + return \"stmdb\\t%2, {%0, %1}\"; >>>> + if (offset1 < offset2) >>>> + return \"strd\\t%0, %1, [%2, %3]\"; >>>> + else >>>> + return \"strd\\t%1, %0, [%2, %4]\"; >>>> + }" >>>> +) >>>> + >>>> +(define_insn "*thumb2_strd_reg1" >>>> + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) >>>> + (match_operand:SI 0 "s_register_operand" "")) >>>> + (set (mem:SI (plus:SI (match_dup 2) >>>> + (match_operand:SI 3 "const_int_operand" ""))) >>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>> + "TARGET_THUMB2 && arm_arch7 >>>> + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" >>>> + "* >>>> + { >>>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>>> + if (offset2 == 4) >>>> + { >>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>> + operands[2], NULL_RTX, operands[3], false)) >>>> + return \"stmia\\t%2, {%0, %1}\"; >>>> + return \"strd\\t%0, %1, [%2]\"; >>>> + } >>>> + else >>>> + return \"strd\\t%1, %0, [%2, %3]\"; >>>> + }" >>>> +) >>>> + >>>> +(define_insn "*thumb2_strd_reg2" >>>> + [(parallel [(set (mem:SI (plus:SI >>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>> + (match_operand:SI 3 "const_int_operand" ""))) >>>> + (match_operand:SI 0 "s_register_operand" "")) >>>> + (set (mem:SI (match_dup 2)) >>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>> + "TARGET_THUMB2 && arm_arch7 >>>> + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" >>>> + "* >>>> + { >>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>> + if (offset1 == -4) >>>> + return \"strd\\t%0, %1, [%2, %3]\"; >>>> + else >>>> + { >>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>> + operands[2], operands[3], NULL_RTX, false)) >>>> + return \"stmia\\t%2, {%1, %0}\"; >>>> + return \"strd\\t%1, %0, [%2]\"; >>>> + } >>>> + }" >>>> +) >>>> + >>>> +(define_peephole2 >>>> + [(set (match_operand:SI 2 "memory_operand" "") >>>> + (match_operand:SI 0 "s_register_operand" "")) >>>> + (set (match_operand:SI 3 "memory_operand" "") >>>> + (match_operand:SI 1 "s_register_operand" ""))] >>>> + "TARGET_THUMB2 && arm_arch7 >>>> + && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>>> + operands[2], operands[3], false)" >>>> + [(parallel [(set (match_operand:SI 2 "memory_operand" "") >>>> + (match_operand:SI 0 "s_register_operand" "")) >>>> + (set (match_operand:SI 3 "memory_operand" "") >>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>> + "" >>>> +) >>>> Index: arm.c >>>> =================================================================== >>>> --- arm.c (revision 165492) >>>> +++ arm.c (working copy) >>>> @@ -23254,4 +23254,134 @@ arm_builtin_support_vector_misalignment >>>> is_packed); >>>> } >>>> >>>> +/* Check the validity of operands in an ldrd/strd instruction. */ >>>> +bool >>>> +thumb2_check_ldrd_operands (rtx off1, rtx off2) >>>> +{ >>>> + HOST_WIDE_INT offset1 = 0; >>>> + HOST_WIDE_INT offset2 = 0; >>>> + >>>> + if (off1 != NULL_RTX) >>>> + offset1 = INTVAL (off1); >>>> + if (off2 != NULL_RTX) >>>> + offset2 = INTVAL (off2); >>>> + >>>> + /* The offset range of LDRD is [-1020, 1020]. Here we check if both >>>> + offsets lie in the range [-1020, 1024]. If one of the offsets is >>>> + 1024, the following condition ((offset1 + 4) == offset2) will ensure >>>> + offset1 to be 1020, suitable for instruction LDRD. */ >>>> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >>>> + return false; >>>> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >>>> + return false; >>>> + >>>> + if ((offset1 + 4) == offset2) >>>> + return true; >>>> + if ((offset2 + 4) == offset1) >>>> + return true; >>>> + >>>> + return false; >>>> +} >>>> + >>>> +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. >>>> + That is they use the same base register, and the gap between constant >>>> + offsets should be 4. */ >>>> +bool >>>> +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) >>>> +{ >>>> + rtx base1, base2, op1; >>>> + rtx addr1 = XEXP (mem1, 0); >>>> + rtx addr2 = XEXP (mem2, 0); >>>> + HOST_WIDE_INT offset1 = 0; >>>> + HOST_WIDE_INT offset2 = 0; >>>> + >>>> + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) >>>> + return false; >>>> + >>>> + if (REG_P (addr1)) >>>> + base1 = addr1; >>>> + else if (GET_CODE (addr1) == PLUS) >>>> + { >>>> + base1 = XEXP (addr1, 0); >>>> + op1 = XEXP (addr1, 1); >>>> + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) >>>> + return false; >>>> + offset1 = INTVAL (op1); >>>> + } >>>> + else >>>> + return false; >>>> + >>>> + if (REG_P (addr2)) >>>> + base2 = addr2; >>>> + else if (GET_CODE (addr2) == PLUS) >>>> + { >>>> + base2 = XEXP (addr2, 0); >>>> + op1 = XEXP (addr2, 1); >>>> + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) >>>> + return false; >>>> + offset2 = INTVAL (op1); >>>> + } >>>> + else >>>> + return false; >>>> + >>>> + if (base1 != base2) >>>> + return false; >>>> + >>>> + /* The offset range of LDRD is [-1020, 1020]. Here we check if both >>>> + offsets lie in the range [-1020, 1024]. If one of the offsets is >>>> + 1024, the following condition ((offset1 + 4) == offset2) will ensure >>>> + offset1 to be 1020, suitable for instruction LDRD. */ >>>> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >>>> + return false; >>>> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >>>> + return false; >>>> + >>>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>>> + return false; >>>> + >>>> + if ((offset1 + 4) == offset2) >>>> + return true; >>>> + if ((offset2 + 4) == offset1) >>>> + return true; >>>> + >>>> + return false; >>>> +} >>>> + >>>> +/* Check if the insn can be expressed as ldm/stm with less cost. */ >>>> +bool >>>> +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, >>>> + rtx off1, rtx off2, bool ldrd) >>>> +{ >>>> + HOST_WIDE_INT offset1 = 0; >>>> + HOST_WIDE_INT offset2 = 0; >>>> + >>>> + if (off1 != NULL_RTX) >>>> + offset1 = INTVAL (off1); >>>> + if (off2 != NULL_RTX) >>>> + offset2 = INTVAL (off2); >>>> + >>>> + if (offset1 > offset2) >>>> + { >>>> + rtx tmp; >>>> + HOST_WIDE_INT t = offset1; >>>> + offset1 = offset2; >>>> + offset2 = t; >>>> + tmp = reg1; >>>> + reg1 = reg2; >>>> + reg2 = tmp; >>>> + } >>>> + >>>> + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ >>>> + if ((offset1 != -8) && (offset1 != 0)) >>>> + return false; >>>> + >>>> + /* Lower register corresponds to lower memory. */ >>>> + if (REGNO (reg1) > REGNO (reg2)) >>>> + return false; >>>> + >>>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>>> + cost. */ >>>> + return false; >>>> +} >>>> + >>>> #include "gt-arm.h" >>>> Index: arm-protos.h >>>> =================================================================== >>>> --- arm-protos.h (revision 165492) >>>> +++ arm-protos.h (working copy) >>>> @@ -150,6 +150,9 @@ extern void arm_expand_sync (enum machin >>>> extern const char *arm_output_memory_barrier (rtx *); >>>> extern const char *arm_output_sync_insn (rtx, rtx *); >>>> extern unsigned int arm_sync_loop_insns (rtx , rtx *); >>>> +extern bool thumb2_check_ldrd_operands (rtx, rtx); >>>> +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); >>>> +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); >>>> >>>> #if defined TREE_CODE >>>> extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); >>>> Index: ldmstm.md >>>> =================================================================== >>>> --- ldmstm.md (revision 165492) >>>> +++ ldmstm.md (working copy) >>>> @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" >>>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>>> (mem:SI (plus:SI (match_dup 3) >>>> (const_int 4))))])] >>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>> "ldm%(ia%)\t%3, {%1, %2}" >>>> [(set_attr "type" "load2") >>>> (set_attr "predicable" "yes")]) >>>> @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" >>>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>>> (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) >>>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>> "stm%(ia%)\t%3, {%1, %2}" >>>> [(set_attr "type" "store2") >>>> (set_attr "predicable" "yes")]) >>>> @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" >>>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>>> (mem:SI (plus:SI (match_dup 3) >>>> (const_int -4))))])] >>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>> "ldm%(db%)\t%3, {%1, %2}" >>>> [(set_attr "type" "load2") >>>> (set_attr "predicable" "yes")]) >>>> @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" >>>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>>> (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) >>>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>> "stm%(db%)\t%3, {%1, %2}" >>>> [(set_attr "type" "store2") >>>> (set_attr "predicable" "yes")]) >>>> >>>> >>>> Index: pr40457-3.c >>>> =================================================================== >>>> --- pr40457-3.c (revision 165492) >>>> +++ pr40457-3.c (working copy) >>>> @@ -5,6 +5,7 @@ void foo(int* p) >>>> { >>>> p[0] = 1; >>>> p[1] = 0; >>>> + p[2] = 2; >>>> } >>>> >>>> /* { dg-final { scan-assembler "stm" } } */ >>>> Index: pr40457-1.c >>>> =================================================================== >>>> --- pr40457-1.c (revision 165492) >>>> +++ pr40457-1.c (working copy) >>>> @@ -1,9 +1,9 @@ >>>> -/* { dg-options "-Os" } */ >>>> +/* { dg-options "-O2" } */ >>>> /* { dg-do compile } */ >>>> >>>> int bar(int* p) >>>> { >>>> - int x = p[0] + p[1]; >>>> + int x = p[0] + p[1] + p[2]; >>>> return x; >>>> } >>>> >>>> Index: pr40457-2.c >>>> =================================================================== >>>> --- pr40457-2.c (revision 165492) >>>> +++ pr40457-2.c (working copy) >>>> @@ -5,6 +5,7 @@ void foo(int* p) >>>> { >>>> p[0] = 1; >>>> p[1] = 0; >>>> + p[2] = 2; >>>> } >>>> >>>> /* { dg-final { scan-assembler "stm" } } */ >>>> Index: pr45335.c >>>> =================================================================== >>>> --- pr45335.c (revision 0) >>>> +++ pr45335.c (revision 0) >>>> @@ -0,0 +1,22 @@ >>>> +/* { dg-options "-mthumb -O2" } */ >>>> +/* { dg-require-effective-target arm_thumb2_ok } */ >>>> +/* { dg-final { scan-assembler "ldrd" } } */ >>>> +/* { dg-final { scan-assembler "strd" } } */ >>>> + >>>> +struct S >>>> +{ >>>> + void* p1; >>>> + void* p2; >>>> + void* p3; >>>> + void* p4; >>>> +}; >>>> + >>>> +extern printf(char*, ...); >>>> + >>>> +void foo1(struct S* fp, struct S* otherSaveArea) >>>> +{ >>>> + struct S* saveA = fp - 1; >>>> + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); >>>> + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", >>>> + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); >>>> +} >>>> >>> >> > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-11-30 0:01 ` Carrot Wei @ 2010-12-14 22:58 ` Carrot Wei 2011-01-04 8:57 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2010-12-14 22:58 UTC (permalink / raw) To: Paul Brook, Richard Earnshaw, Nick Clifton Cc: gcc-patches, ramana.radhakrishnan ping On Mon, Nov 29, 2010 at 2:32 PM, Carrot Wei <carrot@google.com> wrote: > ping > > On Mon, Nov 22, 2010 at 3:16 PM, Carrot Wei <carrot@google.com> wrote: >> ping >> >> On Sun, Oct 31, 2010 at 2:22 AM, Carrot Wei <carrot@google.com> wrote: >>> Ping >>> >>> On Sun, Oct 24, 2010 at 9:46 PM, Carrot Wei <carrot@google.com> wrote: >>>> Ping >>>> >>>> On Sat, Oct 16, 2010 at 8:27 PM, Carrot Wei <carrot@google.com> wrote: >>>>> On Wed, Oct 13, 2010 at 7:01 PM, Paul Brook <paul@codesourcery.com> wrote: >>>>>>> ChangeLog: >>>>>>> 2010-09-04 Wei Guozhi <carrot@google.com> >>>>>>> >>>>>>> PR target/45335 >>>>>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>>>>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>>>>>> peephole2. >>>>>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>>>>>> New insn pattern and related peephole2. >>>>>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>>>>>> (thumb2_check_ldrd_operands): New function. >>>>>>> (thumb2_prefer_ldmstm): New function. >>>>>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New >>>>>>> prototype. (thumb2_check_ldrd_operands): New prototype. >>>>>>> (thumb2_prefer_ldmstm): New prototype. >>>>>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>>>>>> Change the ldm/stm patterns with 2 words to ARM only. >>>>>>> * gcc/config/arm/constraints.md (Py): New thumb2 constant >>>>>>> constraint suitable to ldrd/strd instructions. >>>>>> >>>>>> Not ok. >>>>>> >>>>>> Why is this restricted to Thumb mode? The ARM variant of ldrd isn't quite as >>>>>> flexible, but still provides a useful improvement over ldm. >>>>>> >>>>> I agree the ARM version is also useful. But it brings much less >>>>> benefit with too much complexity (due to more restriction and insn >>>>> pattern conflict with ldm). So I will leave it as a future >>>>> improvement. >>>>> >>>>>> This transformation is only valid on ARMv7 cores. On earlier hardware >>>>>> (depending on system configuration) it may cause undefined behavior or an >>>>>> alignment trap. >>>>>> >>>>> done. >>>>> >>>>>> The range on -1020 to +1024 is used in several places, but without any >>>>>> apparent explanation of why it's different to the range of an ldrd >>>>>> instruction. I figured it out eventually, but it deserves a comment. >>>>>> >>>>> Comments added. >>>>> >>>>>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>>>>> + operands[2], 0, operands[3], 1)" >>>>>> >>>>>> Passed operands do not match expected types. Specifically "0" is not an rtx >>>>>> (should be "NULL_RTX"), and "1" is not a boolean value (should be "true"). >>>>>> Many other occurrences. >>>>>> >>>>> Fixed. >>>>> >>>>>>> +(define_constraint "Py" >>>>>>> + "@internal In Thumb-2 state a constant that is a multiple of 4 in the >>>>>>> + range -1020 to 1024" >>>>>> >>>>>> This comment seems particularly pointless. You should mention why this >>>>>> exists/where it is used. >>>>>> >>>>>> I think you're better off enforcing this in the insn condition, and remove >>>>>> this constraint. At least half the uses (the -reg[12] insns) are incorrect, >>>>>> and you already need the condition to enforce the dependency between the >>>>>> operands. >>>>>> >>>>> I removed this constraint and add the check to insn condition. >>>>> >>>>>>> +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, >>>>>>>... >>>>>>> + if (ldrd && (reg1 == reg2)) >>>>>>> + return false; >>>>>> >>>>>> This function is part of the instruction condition. Instruction conditions >>>>>> must not be used to enforce register allocation. >>>>>> >>>>> removed. >>>>> >>>>>>> +thumb2_legitimate_ldrd_p ( >>>>>>>... >>>>>>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>>>>>> + return false; >>>>>> >>>>>> You're incorrectly assuming offset1 < offset2, which might not be true at this >>>>>> point. >>>>>> >>>>> The following check assumes offset1 < offset2 >>>>> + if ((offset1 + 4) == offset2) >>>>> + return true; >>>>> >>>>> And another check assumes offset2 < offset1, so both cases are covered. >>>>> + if ((offset2 + 4) == offset1) >>>>> + return true; >>>>> >>>>>>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>>>>>> + cost. */ >>>>>>> + return false; >>>>>> >>>>>> Code clearly doesn't match the comment. In fact this function always returns >>>>>> false. >>>>>> >>>>> Richard mentioned that in some cases (specifically cortex A9) ldm has >>>>> less cost than ldrd and we should model this in the insn pattern. This >>>>> function is used for this. But I don't know the cortex A9 architecture >>>>> detail, so it should be filled by somebody with more knowledge about >>>>> it in future. >>>>> >>>>> Wei Guozhi >>>>> >>>>> >>>>> ChangeLog: >>>>> 2010-10-16 Wei Guozhi <carrot@google.com> >>>>> >>>>> PR target/45335 >>>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>>>> peephole2. >>>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>>>> New insn pattern and related peephole2. >>>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>>>> (thumb2_check_ldrd_operands): New function. >>>>> (thumb2_prefer_ldmstm): New function. >>>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. >>>>> (thumb2_check_ldrd_operands): New prototype. >>>>> (thumb2_prefer_ldmstm): New prototype. >>>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>>>> Change the ldm/stm patterns with 2 words to ARM only. >>>>> >>>>> >>>>> 2010-10-16 Wei Guozhi <carrot@google.com> >>>>> >>>>> PR target/45335 >>>>> * gcc.target/arm/pr45335.c: New test. >>>>> * gcc.target/arm/pr40457-1.c: Changed to load 3 words. >>>>> * gcc.target/arm/pr40457-2.c: Changed to store 3 words. >>>>> * gcc.target/arm/pr40457-3.c: Changed to store 3 words. >>>>> >>>>> >>>>> Index: thumb2.md >>>>> =================================================================== >>>>> --- thumb2.md (revision 165492) >>>>> +++ thumb2.md (working copy) >>>>> @@ -1118,3 +1118,228 @@ (define_peephole2 >>>>> " >>>>> operands[2] = GEN_INT (32 - INTVAL (operands[2])); >>>>> ") >>>>> + >>>>> +(define_insn "*thumb2_ldrd" >>>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>>> + (mem:SI (plus:SI >>>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>>> + (match_operand:SI 3 "const_int_operand" "")))) >>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>> + (mem:SI (plus:SI (match_dup 2) >>>>> + (match_operand:SI 4 "const_int_operand" ""))))])] >>>>> + "TARGET_THUMB2 && arm_arch7 >>>>> + && thumb2_check_ldrd_operands (operands[3], operands[4])" >>>>> + "* >>>>> + { >>>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>>>> + if (offset1 > offset2) >>>>> + { >>>>> + /* Swap the operands so that memory [base+offset1] is loaded into >>>>> + operands[0]. */ >>>>> + rtx tmp = operands[0]; >>>>> + operands[0] = operands[1]; >>>>> + operands[1] = tmp; >>>>> + tmp = operands[3]; >>>>> + operands[3] = operands[4]; >>>>> + operands[4] = tmp; >>>>> + offset1 = INTVAL (operands[3]); >>>>> + offset2 = INTVAL (operands[4]); >>>>> + } >>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>> + operands[2], operands[3], operands[4], true)) >>>>> + return \"ldmdb\\t%2, {%0, %1}\"; >>>>> + else if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>>> + { >>>>> + if (offset1 <= -256) >>>>> + { >>>>> + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); >>>>> + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); >>>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>>> + } >>>>> + else >>>>> + { >>>>> + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); >>>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>>> + } >>>>> + return \"\"; >>>>> + } >>>>> + else >>>>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>>>> + }" >>>>> +) >>>>> + >>>>> +(define_insn "*thumb2_ldrd_reg1" >>>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>>> + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) >>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>> + (mem:SI (plus:SI (match_dup 2) >>>>> + (match_operand:SI 3 "const_int_operand" ""))))])] >>>>> + "TARGET_THUMB2 && arm_arch7 >>>>> + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" >>>>> + "* >>>>> + { >>>>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>>>> + if (offset2 == 4) >>>>> + { >>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>> + operands[2], NULL_RTX, operands[3], true)) >>>>> + return \"ldmia\\t%2, {%0, %1}\"; >>>>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>>> + { >>>>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>>> + return \"\"; >>>>> + } >>>>> + return \"ldrd\\t%0, %1, [%2]\"; >>>>> + } >>>>> + else >>>>> + { >>>>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>>>> + { >>>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>>>> + } >>>>> + return \"ldrd\\t%1, %0, [%2, %3]\"; >>>>> + } >>>>> + }" >>>>> +) >>>>> + >>>>> +(define_insn "*thumb2_ldrd_reg2" >>>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>>> + (mem:SI (plus:SI >>>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>>> + (match_operand:SI 3 "const_int_operand" "")))) >>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>> + (mem:SI (match_dup 2)))])] >>>>> + "TARGET_THUMB2 && arm_arch7 >>>>> + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" >>>>> + "* >>>>> + { >>>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>>> + if (offset1 == -4) >>>>> + { >>>>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>>> + { >>>>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>>> + return \"\"; >>>>> + } >>>>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>>>> + } >>>>> + else >>>>> + { >>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>> + operands[2], operands[3], NULL_RTX, true)) >>>>> + return \"ldmia\\t%2, {%1, %0}\"; >>>>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>>>> + { >>>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>>>> + return \"\"; >>>>> + } >>>>> + return \"ldrd\\t%1, %0, [%2]\"; >>>>> + } >>>>> + }" >>>>> +) >>>>> + >>>>> +(define_peephole2 >>>>> + [(set (match_operand:SI 0 "s_register_operand" "") >>>>> + (match_operand:SI 2 "memory_operand" "")) >>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>> + (match_operand:SI 3 "memory_operand" ""))] >>>>> + "TARGET_THUMB2 && arm_arch7 >>>>> + && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>>>> + operands[2], operands[3], true)" >>>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>>> + (match_operand:SI 2 "memory_operand" "")) >>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>> + (match_operand:SI 3 "memory_operand" ""))])] >>>>> + "" >>>>> +) >>>>> + >>>>> +(define_insn "*thumb2_strd" >>>>> + [(parallel [(set (mem:SI >>>>> + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") >>>>> + (match_operand:SI 3 "const_int_operand" ""))) >>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>> + (set (mem:SI (plus:SI (match_dup 2) >>>>> + (match_operand:SI 4 "const_int_operand" ""))) >>>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>>> + "TARGET_THUMB2 && arm_arch7 >>>>> + && thumb2_check_ldrd_operands (operands[3], operands[4])" >>>>> + "* >>>>> + { >>>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>> + operands[2], operands[3], operands[4], false)) >>>>> + return \"stmdb\\t%2, {%0, %1}\"; >>>>> + if (offset1 < offset2) >>>>> + return \"strd\\t%0, %1, [%2, %3]\"; >>>>> + else >>>>> + return \"strd\\t%1, %0, [%2, %4]\"; >>>>> + }" >>>>> +) >>>>> + >>>>> +(define_insn "*thumb2_strd_reg1" >>>>> + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) >>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>> + (set (mem:SI (plus:SI (match_dup 2) >>>>> + (match_operand:SI 3 "const_int_operand" ""))) >>>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>>> + "TARGET_THUMB2 && arm_arch7 >>>>> + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" >>>>> + "* >>>>> + { >>>>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>>>> + if (offset2 == 4) >>>>> + { >>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>> + operands[2], NULL_RTX, operands[3], false)) >>>>> + return \"stmia\\t%2, {%0, %1}\"; >>>>> + return \"strd\\t%0, %1, [%2]\"; >>>>> + } >>>>> + else >>>>> + return \"strd\\t%1, %0, [%2, %3]\"; >>>>> + }" >>>>> +) >>>>> + >>>>> +(define_insn "*thumb2_strd_reg2" >>>>> + [(parallel [(set (mem:SI (plus:SI >>>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>>> + (match_operand:SI 3 "const_int_operand" ""))) >>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>> + (set (mem:SI (match_dup 2)) >>>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>>> + "TARGET_THUMB2 && arm_arch7 >>>>> + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" >>>>> + "* >>>>> + { >>>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>>> + if (offset1 == -4) >>>>> + return \"strd\\t%0, %1, [%2, %3]\"; >>>>> + else >>>>> + { >>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>> + operands[2], operands[3], NULL_RTX, false)) >>>>> + return \"stmia\\t%2, {%1, %0}\"; >>>>> + return \"strd\\t%1, %0, [%2]\"; >>>>> + } >>>>> + }" >>>>> +) >>>>> + >>>>> +(define_peephole2 >>>>> + [(set (match_operand:SI 2 "memory_operand" "") >>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>> + (set (match_operand:SI 3 "memory_operand" "") >>>>> + (match_operand:SI 1 "s_register_operand" ""))] >>>>> + "TARGET_THUMB2 && arm_arch7 >>>>> + && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>>>> + operands[2], operands[3], false)" >>>>> + [(parallel [(set (match_operand:SI 2 "memory_operand" "") >>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>> + (set (match_operand:SI 3 "memory_operand" "") >>>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>>> + "" >>>>> +) >>>>> Index: arm.c >>>>> =================================================================== >>>>> --- arm.c (revision 165492) >>>>> +++ arm.c (working copy) >>>>> @@ -23254,4 +23254,134 @@ arm_builtin_support_vector_misalignment >>>>> is_packed); >>>>> } >>>>> >>>>> +/* Check the validity of operands in an ldrd/strd instruction. */ >>>>> +bool >>>>> +thumb2_check_ldrd_operands (rtx off1, rtx off2) >>>>> +{ >>>>> + HOST_WIDE_INT offset1 = 0; >>>>> + HOST_WIDE_INT offset2 = 0; >>>>> + >>>>> + if (off1 != NULL_RTX) >>>>> + offset1 = INTVAL (off1); >>>>> + if (off2 != NULL_RTX) >>>>> + offset2 = INTVAL (off2); >>>>> + >>>>> + /* The offset range of LDRD is [-1020, 1020]. Here we check if both >>>>> + offsets lie in the range [-1020, 1024]. If one of the offsets is >>>>> + 1024, the following condition ((offset1 + 4) == offset2) will ensure >>>>> + offset1 to be 1020, suitable for instruction LDRD. */ >>>>> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >>>>> + return false; >>>>> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >>>>> + return false; >>>>> + >>>>> + if ((offset1 + 4) == offset2) >>>>> + return true; >>>>> + if ((offset2 + 4) == offset1) >>>>> + return true; >>>>> + >>>>> + return false; >>>>> +} >>>>> + >>>>> +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. >>>>> + That is they use the same base register, and the gap between constant >>>>> + offsets should be 4. */ >>>>> +bool >>>>> +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) >>>>> +{ >>>>> + rtx base1, base2, op1; >>>>> + rtx addr1 = XEXP (mem1, 0); >>>>> + rtx addr2 = XEXP (mem2, 0); >>>>> + HOST_WIDE_INT offset1 = 0; >>>>> + HOST_WIDE_INT offset2 = 0; >>>>> + >>>>> + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) >>>>> + return false; >>>>> + >>>>> + if (REG_P (addr1)) >>>>> + base1 = addr1; >>>>> + else if (GET_CODE (addr1) == PLUS) >>>>> + { >>>>> + base1 = XEXP (addr1, 0); >>>>> + op1 = XEXP (addr1, 1); >>>>> + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) >>>>> + return false; >>>>> + offset1 = INTVAL (op1); >>>>> + } >>>>> + else >>>>> + return false; >>>>> + >>>>> + if (REG_P (addr2)) >>>>> + base2 = addr2; >>>>> + else if (GET_CODE (addr2) == PLUS) >>>>> + { >>>>> + base2 = XEXP (addr2, 0); >>>>> + op1 = XEXP (addr2, 1); >>>>> + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) >>>>> + return false; >>>>> + offset2 = INTVAL (op1); >>>>> + } >>>>> + else >>>>> + return false; >>>>> + >>>>> + if (base1 != base2) >>>>> + return false; >>>>> + >>>>> + /* The offset range of LDRD is [-1020, 1020]. Here we check if both >>>>> + offsets lie in the range [-1020, 1024]. If one of the offsets is >>>>> + 1024, the following condition ((offset1 + 4) == offset2) will ensure >>>>> + offset1 to be 1020, suitable for instruction LDRD. */ >>>>> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >>>>> + return false; >>>>> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >>>>> + return false; >>>>> + >>>>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>>>> + return false; >>>>> + >>>>> + if ((offset1 + 4) == offset2) >>>>> + return true; >>>>> + if ((offset2 + 4) == offset1) >>>>> + return true; >>>>> + >>>>> + return false; >>>>> +} >>>>> + >>>>> +/* Check if the insn can be expressed as ldm/stm with less cost. */ >>>>> +bool >>>>> +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, >>>>> + rtx off1, rtx off2, bool ldrd) >>>>> +{ >>>>> + HOST_WIDE_INT offset1 = 0; >>>>> + HOST_WIDE_INT offset2 = 0; >>>>> + >>>>> + if (off1 != NULL_RTX) >>>>> + offset1 = INTVAL (off1); >>>>> + if (off2 != NULL_RTX) >>>>> + offset2 = INTVAL (off2); >>>>> + >>>>> + if (offset1 > offset2) >>>>> + { >>>>> + rtx tmp; >>>>> + HOST_WIDE_INT t = offset1; >>>>> + offset1 = offset2; >>>>> + offset2 = t; >>>>> + tmp = reg1; >>>>> + reg1 = reg2; >>>>> + reg2 = tmp; >>>>> + } >>>>> + >>>>> + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ >>>>> + if ((offset1 != -8) && (offset1 != 0)) >>>>> + return false; >>>>> + >>>>> + /* Lower register corresponds to lower memory. */ >>>>> + if (REGNO (reg1) > REGNO (reg2)) >>>>> + return false; >>>>> + >>>>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>>>> + cost. */ >>>>> + return false; >>>>> +} >>>>> + >>>>> #include "gt-arm.h" >>>>> Index: arm-protos.h >>>>> =================================================================== >>>>> --- arm-protos.h (revision 165492) >>>>> +++ arm-protos.h (working copy) >>>>> @@ -150,6 +150,9 @@ extern void arm_expand_sync (enum machin >>>>> extern const char *arm_output_memory_barrier (rtx *); >>>>> extern const char *arm_output_sync_insn (rtx, rtx *); >>>>> extern unsigned int arm_sync_loop_insns (rtx , rtx *); >>>>> +extern bool thumb2_check_ldrd_operands (rtx, rtx); >>>>> +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); >>>>> +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); >>>>> >>>>> #if defined TREE_CODE >>>>> extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); >>>>> Index: ldmstm.md >>>>> =================================================================== >>>>> --- ldmstm.md (revision 165492) >>>>> +++ ldmstm.md (working copy) >>>>> @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" >>>>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>>>> (mem:SI (plus:SI (match_dup 3) >>>>> (const_int 4))))])] >>>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>>> "ldm%(ia%)\t%3, {%1, %2}" >>>>> [(set_attr "type" "load2") >>>>> (set_attr "predicable" "yes")]) >>>>> @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" >>>>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>>>> (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) >>>>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>>> "stm%(ia%)\t%3, {%1, %2}" >>>>> [(set_attr "type" "store2") >>>>> (set_attr "predicable" "yes")]) >>>>> @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" >>>>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>>>> (mem:SI (plus:SI (match_dup 3) >>>>> (const_int -4))))])] >>>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>>> "ldm%(db%)\t%3, {%1, %2}" >>>>> [(set_attr "type" "load2") >>>>> (set_attr "predicable" "yes")]) >>>>> @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" >>>>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>>>> (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) >>>>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>>> "stm%(db%)\t%3, {%1, %2}" >>>>> [(set_attr "type" "store2") >>>>> (set_attr "predicable" "yes")]) >>>>> >>>>> >>>>> Index: pr40457-3.c >>>>> =================================================================== >>>>> --- pr40457-3.c (revision 165492) >>>>> +++ pr40457-3.c (working copy) >>>>> @@ -5,6 +5,7 @@ void foo(int* p) >>>>> { >>>>> p[0] = 1; >>>>> p[1] = 0; >>>>> + p[2] = 2; >>>>> } >>>>> >>>>> /* { dg-final { scan-assembler "stm" } } */ >>>>> Index: pr40457-1.c >>>>> =================================================================== >>>>> --- pr40457-1.c (revision 165492) >>>>> +++ pr40457-1.c (working copy) >>>>> @@ -1,9 +1,9 @@ >>>>> -/* { dg-options "-Os" } */ >>>>> +/* { dg-options "-O2" } */ >>>>> /* { dg-do compile } */ >>>>> >>>>> int bar(int* p) >>>>> { >>>>> - int x = p[0] + p[1]; >>>>> + int x = p[0] + p[1] + p[2]; >>>>> return x; >>>>> } >>>>> >>>>> Index: pr40457-2.c >>>>> =================================================================== >>>>> --- pr40457-2.c (revision 165492) >>>>> +++ pr40457-2.c (working copy) >>>>> @@ -5,6 +5,7 @@ void foo(int* p) >>>>> { >>>>> p[0] = 1; >>>>> p[1] = 0; >>>>> + p[2] = 2; >>>>> } >>>>> >>>>> /* { dg-final { scan-assembler "stm" } } */ >>>>> Index: pr45335.c >>>>> =================================================================== >>>>> --- pr45335.c (revision 0) >>>>> +++ pr45335.c (revision 0) >>>>> @@ -0,0 +1,22 @@ >>>>> +/* { dg-options "-mthumb -O2" } */ >>>>> +/* { dg-require-effective-target arm_thumb2_ok } */ >>>>> +/* { dg-final { scan-assembler "ldrd" } } */ >>>>> +/* { dg-final { scan-assembler "strd" } } */ >>>>> + >>>>> +struct S >>>>> +{ >>>>> + void* p1; >>>>> + void* p2; >>>>> + void* p3; >>>>> + void* p4; >>>>> +}; >>>>> + >>>>> +extern printf(char*, ...); >>>>> + >>>>> +void foo1(struct S* fp, struct S* otherSaveArea) >>>>> +{ >>>>> + struct S* saveA = fp - 1; >>>>> + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); >>>>> + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", >>>>> + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); >>>>> +} >>>>> >>>> >>> >> > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2010-12-14 22:58 ` Carrot Wei @ 2011-01-04 8:57 ` Carrot Wei 2011-01-11 14:43 ` Nick Clifton 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2011-01-04 8:57 UTC (permalink / raw) To: Paul Brook, Richard Earnshaw, Nick Clifton Cc: gcc-patches, ramana.radhakrishnan Happy new year! Hope I can check in this patch in 2011 On Wed, Dec 15, 2010 at 6:00 AM, Carrot Wei <carrot@google.com> wrote: > ping > > On Mon, Nov 29, 2010 at 2:32 PM, Carrot Wei <carrot@google.com> wrote: >> ping >> >> On Mon, Nov 22, 2010 at 3:16 PM, Carrot Wei <carrot@google.com> wrote: >>> ping >>> >>> On Sun, Oct 31, 2010 at 2:22 AM, Carrot Wei <carrot@google.com> wrote: >>>> Ping >>>> >>>> On Sun, Oct 24, 2010 at 9:46 PM, Carrot Wei <carrot@google.com> wrote: >>>>> Ping >>>>> >>>>> On Sat, Oct 16, 2010 at 8:27 PM, Carrot Wei <carrot@google.com> wrote: >>>>>> On Wed, Oct 13, 2010 at 7:01 PM, Paul Brook <paul@codesourcery.com> wrote: >>>>>>>> ChangeLog: >>>>>>>> 2010-09-04 Wei Guozhi <carrot@google.com> >>>>>>>> >>>>>>>> PR target/45335 >>>>>>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>>>>>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>>>>>>> peephole2. >>>>>>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>>>>>>> New insn pattern and related peephole2. >>>>>>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>>>>>>> (thumb2_check_ldrd_operands): New function. >>>>>>>> (thumb2_prefer_ldmstm): New function. >>>>>>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New >>>>>>>> prototype. (thumb2_check_ldrd_operands): New prototype. >>>>>>>> (thumb2_prefer_ldmstm): New prototype. >>>>>>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>>>>>>> Change the ldm/stm patterns with 2 words to ARM only. >>>>>>>> * gcc/config/arm/constraints.md (Py): New thumb2 constant >>>>>>>> constraint suitable to ldrd/strd instructions. >>>>>>> >>>>>>> Not ok. >>>>>>> >>>>>>> Why is this restricted to Thumb mode? The ARM variant of ldrd isn't quite as >>>>>>> flexible, but still provides a useful improvement over ldm. >>>>>>> >>>>>> I agree the ARM version is also useful. But it brings much less >>>>>> benefit with too much complexity (due to more restriction and insn >>>>>> pattern conflict with ldm). So I will leave it as a future >>>>>> improvement. >>>>>> >>>>>>> This transformation is only valid on ARMv7 cores. On earlier hardware >>>>>>> (depending on system configuration) it may cause undefined behavior or an >>>>>>> alignment trap. >>>>>>> >>>>>> done. >>>>>> >>>>>>> The range on -1020 to +1024 is used in several places, but without any >>>>>>> apparent explanation of why it's different to the range of an ldrd >>>>>>> instruction. I figured it out eventually, but it deserves a comment. >>>>>>> >>>>>> Comments added. >>>>>> >>>>>>>> + "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1], >>>>>>>> + operands[2], 0, operands[3], 1)" >>>>>>> >>>>>>> Passed operands do not match expected types. Specifically "0" is not an rtx >>>>>>> (should be "NULL_RTX"), and "1" is not a boolean value (should be "true"). >>>>>>> Many other occurrences. >>>>>>> >>>>>> Fixed. >>>>>> >>>>>>>> +(define_constraint "Py" >>>>>>>> + "@internal In Thumb-2 state a constant that is a multiple of 4 in the >>>>>>>> + range -1020 to 1024" >>>>>>> >>>>>>> This comment seems particularly pointless. You should mention why this >>>>>>> exists/where it is used. >>>>>>> >>>>>>> I think you're better off enforcing this in the insn condition, and remove >>>>>>> this constraint. At least half the uses (the -reg[12] insns) are incorrect, >>>>>>> and you already need the condition to enforce the dependency between the >>>>>>> operands. >>>>>>> >>>>>> I removed this constraint and add the check to insn condition. >>>>>> >>>>>>>> +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base, >>>>>>>>... >>>>>>>> + if (ldrd && (reg1 == reg2)) >>>>>>>> + return false; >>>>>>> >>>>>>> This function is part of the instruction condition. Instruction conditions >>>>>>> must not be used to enforce register allocation. >>>>>>> >>>>>> removed. >>>>>> >>>>>>>> +thumb2_legitimate_ldrd_p ( >>>>>>>>... >>>>>>>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>>>>>>> + return false; >>>>>>> >>>>>>> You're incorrectly assuming offset1 < offset2, which might not be true at this >>>>>>> point. >>>>>>> >>>>>> The following check assumes offset1 < offset2 >>>>>> + if ((offset1 + 4) == offset2) >>>>>> + return true; >>>>>> >>>>>> And another check assumes offset2 < offset1, so both cases are covered. >>>>>> + if ((offset2 + 4) == offset1) >>>>>> + return true; >>>>>> >>>>>>>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>>>>>>> + cost. */ >>>>>>>> + return false; >>>>>>> >>>>>>> Code clearly doesn't match the comment. In fact this function always returns >>>>>>> false. >>>>>>> >>>>>> Richard mentioned that in some cases (specifically cortex A9) ldm has >>>>>> less cost than ldrd and we should model this in the insn pattern. This >>>>>> function is used for this. But I don't know the cortex A9 architecture >>>>>> detail, so it should be filled by somebody with more knowledge about >>>>>> it in future. >>>>>> >>>>>> Wei Guozhi >>>>>> >>>>>> >>>>>> ChangeLog: >>>>>> 2010-10-16 Wei Guozhi <carrot@google.com> >>>>>> >>>>>> PR target/45335 >>>>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>>>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>>>>> peephole2. >>>>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>>>>> New insn pattern and related peephole2. >>>>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>>>>> (thumb2_check_ldrd_operands): New function. >>>>>> (thumb2_prefer_ldmstm): New function. >>>>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. >>>>>> (thumb2_check_ldrd_operands): New prototype. >>>>>> (thumb2_prefer_ldmstm): New prototype. >>>>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>>>>> Change the ldm/stm patterns with 2 words to ARM only. >>>>>> >>>>>> >>>>>> 2010-10-16 Wei Guozhi <carrot@google.com> >>>>>> >>>>>> PR target/45335 >>>>>> * gcc.target/arm/pr45335.c: New test. >>>>>> * gcc.target/arm/pr40457-1.c: Changed to load 3 words. >>>>>> * gcc.target/arm/pr40457-2.c: Changed to store 3 words. >>>>>> * gcc.target/arm/pr40457-3.c: Changed to store 3 words. >>>>>> >>>>>> >>>>>> Index: thumb2.md >>>>>> =================================================================== >>>>>> --- thumb2.md (revision 165492) >>>>>> +++ thumb2.md (working copy) >>>>>> @@ -1118,3 +1118,228 @@ (define_peephole2 >>>>>> " >>>>>> operands[2] = GEN_INT (32 - INTVAL (operands[2])); >>>>>> ") >>>>>> + >>>>>> +(define_insn "*thumb2_ldrd" >>>>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>>>> + (mem:SI (plus:SI >>>>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>>>> + (match_operand:SI 3 "const_int_operand" "")))) >>>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>>> + (mem:SI (plus:SI (match_dup 2) >>>>>> + (match_operand:SI 4 "const_int_operand" ""))))])] >>>>>> + "TARGET_THUMB2 && arm_arch7 >>>>>> + && thumb2_check_ldrd_operands (operands[3], operands[4])" >>>>>> + "* >>>>>> + { >>>>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>>>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>>>>> + if (offset1 > offset2) >>>>>> + { >>>>>> + /* Swap the operands so that memory [base+offset1] is loaded into >>>>>> + operands[0]. */ >>>>>> + rtx tmp = operands[0]; >>>>>> + operands[0] = operands[1]; >>>>>> + operands[1] = tmp; >>>>>> + tmp = operands[3]; >>>>>> + operands[3] = operands[4]; >>>>>> + operands[4] = tmp; >>>>>> + offset1 = INTVAL (operands[3]); >>>>>> + offset2 = INTVAL (operands[4]); >>>>>> + } >>>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>>> + operands[2], operands[3], operands[4], true)) >>>>>> + return \"ldmdb\\t%2, {%0, %1}\"; >>>>>> + else if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>>>> + { >>>>>> + if (offset1 <= -256) >>>>>> + { >>>>>> + output_asm_insn (\"sub\\t%2, %2, %n3\", operands); >>>>>> + output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands); >>>>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>>>> + } >>>>>> + else >>>>>> + { >>>>>> + output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands); >>>>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>>>> + } >>>>>> + return \"\"; >>>>>> + } >>>>>> + else >>>>>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>>>>> + }" >>>>>> +) >>>>>> + >>>>>> +(define_insn "*thumb2_ldrd_reg1" >>>>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>>>> + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) >>>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>>> + (mem:SI (plus:SI (match_dup 2) >>>>>> + (match_operand:SI 3 "const_int_operand" ""))))])] >>>>>> + "TARGET_THUMB2 && arm_arch7 >>>>>> + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" >>>>>> + "* >>>>>> + { >>>>>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>>>>> + if (offset2 == 4) >>>>>> + { >>>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>>> + operands[2], NULL_RTX, operands[3], true)) >>>>>> + return \"ldmia\\t%2, {%0, %1}\"; >>>>>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>>>> + { >>>>>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>>>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>>>> + return \"\"; >>>>>> + } >>>>>> + return \"ldrd\\t%0, %1, [%2]\"; >>>>>> + } >>>>>> + else >>>>>> + { >>>>>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>>>>> + { >>>>>> + output_asm_insn (\"ldr\\t%0, [%2]\", operands); >>>>>> + output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands); >>>>>> + } >>>>>> + return \"ldrd\\t%1, %0, [%2, %3]\"; >>>>>> + } >>>>>> + }" >>>>>> +) >>>>>> + >>>>>> +(define_insn "*thumb2_ldrd_reg2" >>>>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>>>> + (mem:SI (plus:SI >>>>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>>>> + (match_operand:SI 3 "const_int_operand" "")))) >>>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>>> + (mem:SI (match_dup 2)))])] >>>>>> + "TARGET_THUMB2 && arm_arch7 >>>>>> + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" >>>>>> + "* >>>>>> + { >>>>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>>>> + if (offset1 == -4) >>>>>> + { >>>>>> + if (fix_cm3_ldrd && (operands[2] == operands[0])) >>>>>> + { >>>>>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>>>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>>>> + return \"\"; >>>>>> + } >>>>>> + return \"ldrd\\t%0, %1, [%2, %3]\"; >>>>>> + } >>>>>> + else >>>>>> + { >>>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>>> + operands[2], operands[3], NULL_RTX, true)) >>>>>> + return \"ldmia\\t%2, {%1, %0}\"; >>>>>> + if (fix_cm3_ldrd && (operands[2] == operands[1])) >>>>>> + { >>>>>> + output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands); >>>>>> + output_asm_insn (\"ldr\\t%1, [%2]\", operands); >>>>>> + return \"\"; >>>>>> + } >>>>>> + return \"ldrd\\t%1, %0, [%2]\"; >>>>>> + } >>>>>> + }" >>>>>> +) >>>>>> + >>>>>> +(define_peephole2 >>>>>> + [(set (match_operand:SI 0 "s_register_operand" "") >>>>>> + (match_operand:SI 2 "memory_operand" "")) >>>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>>> + (match_operand:SI 3 "memory_operand" ""))] >>>>>> + "TARGET_THUMB2 && arm_arch7 >>>>>> + && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>>>>> + operands[2], operands[3], true)" >>>>>> + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") >>>>>> + (match_operand:SI 2 "memory_operand" "")) >>>>>> + (set (match_operand:SI 1 "s_register_operand" "") >>>>>> + (match_operand:SI 3 "memory_operand" ""))])] >>>>>> + "" >>>>>> +) >>>>>> + >>>>>> +(define_insn "*thumb2_strd" >>>>>> + [(parallel [(set (mem:SI >>>>>> + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") >>>>>> + (match_operand:SI 3 "const_int_operand" ""))) >>>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>>> + (set (mem:SI (plus:SI (match_dup 2) >>>>>> + (match_operand:SI 4 "const_int_operand" ""))) >>>>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>>>> + "TARGET_THUMB2 && arm_arch7 >>>>>> + && thumb2_check_ldrd_operands (operands[3], operands[4])" >>>>>> + "* >>>>>> + { >>>>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>>>> + HOST_WIDE_INT offset2 = INTVAL (operands[4]); >>>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>>> + operands[2], operands[3], operands[4], false)) >>>>>> + return \"stmdb\\t%2, {%0, %1}\"; >>>>>> + if (offset1 < offset2) >>>>>> + return \"strd\\t%0, %1, [%2, %3]\"; >>>>>> + else >>>>>> + return \"strd\\t%1, %0, [%2, %4]\"; >>>>>> + }" >>>>>> +) >>>>>> + >>>>>> +(define_insn "*thumb2_strd_reg1" >>>>>> + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) >>>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>>> + (set (mem:SI (plus:SI (match_dup 2) >>>>>> + (match_operand:SI 3 "const_int_operand" ""))) >>>>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>>>> + "TARGET_THUMB2 && arm_arch7 >>>>>> + && thumb2_check_ldrd_operands (NULL_RTX, operands[3])" >>>>>> + "* >>>>>> + { >>>>>> + HOST_WIDE_INT offset2 = INTVAL (operands[3]); >>>>>> + if (offset2 == 4) >>>>>> + { >>>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>>> + operands[2], NULL_RTX, operands[3], false)) >>>>>> + return \"stmia\\t%2, {%0, %1}\"; >>>>>> + return \"strd\\t%0, %1, [%2]\"; >>>>>> + } >>>>>> + else >>>>>> + return \"strd\\t%1, %0, [%2, %3]\"; >>>>>> + }" >>>>>> +) >>>>>> + >>>>>> +(define_insn "*thumb2_strd_reg2" >>>>>> + [(parallel [(set (mem:SI (plus:SI >>>>>> + (match_operand:SI 2 "s_register_operand" "rk") >>>>>> + (match_operand:SI 3 "const_int_operand" ""))) >>>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>>> + (set (mem:SI (match_dup 2)) >>>>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>>>> + "TARGET_THUMB2 && arm_arch7 >>>>>> + && thumb2_check_ldrd_operands (operands[3], NULL_RTX)" >>>>>> + "* >>>>>> + { >>>>>> + HOST_WIDE_INT offset1 = INTVAL (operands[3]); >>>>>> + if (offset1 == -4) >>>>>> + return \"strd\\t%0, %1, [%2, %3]\"; >>>>>> + else >>>>>> + { >>>>>> + if (thumb2_prefer_ldmstm (operands[0], operands[1], >>>>>> + operands[2], operands[3], NULL_RTX, false)) >>>>>> + return \"stmia\\t%2, {%1, %0}\"; >>>>>> + return \"strd\\t%1, %0, [%2]\"; >>>>>> + } >>>>>> + }" >>>>>> +) >>>>>> + >>>>>> +(define_peephole2 >>>>>> + [(set (match_operand:SI 2 "memory_operand" "") >>>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>>> + (set (match_operand:SI 3 "memory_operand" "") >>>>>> + (match_operand:SI 1 "s_register_operand" ""))] >>>>>> + "TARGET_THUMB2 && arm_arch7 >>>>>> + && thumb2_legitimate_ldrd_p (operands[0], operands[1], >>>>>> + operands[2], operands[3], false)" >>>>>> + [(parallel [(set (match_operand:SI 2 "memory_operand" "") >>>>>> + (match_operand:SI 0 "s_register_operand" "")) >>>>>> + (set (match_operand:SI 3 "memory_operand" "") >>>>>> + (match_operand:SI 1 "s_register_operand" ""))])] >>>>>> + "" >>>>>> +) >>>>>> Index: arm.c >>>>>> =================================================================== >>>>>> --- arm.c (revision 165492) >>>>>> +++ arm.c (working copy) >>>>>> @@ -23254,4 +23254,134 @@ arm_builtin_support_vector_misalignment >>>>>> is_packed); >>>>>> } >>>>>> >>>>>> +/* Check the validity of operands in an ldrd/strd instruction. */ >>>>>> +bool >>>>>> +thumb2_check_ldrd_operands (rtx off1, rtx off2) >>>>>> +{ >>>>>> + HOST_WIDE_INT offset1 = 0; >>>>>> + HOST_WIDE_INT offset2 = 0; >>>>>> + >>>>>> + if (off1 != NULL_RTX) >>>>>> + offset1 = INTVAL (off1); >>>>>> + if (off2 != NULL_RTX) >>>>>> + offset2 = INTVAL (off2); >>>>>> + >>>>>> + /* The offset range of LDRD is [-1020, 1020]. Here we check if both >>>>>> + offsets lie in the range [-1020, 1024]. If one of the offsets is >>>>>> + 1024, the following condition ((offset1 + 4) == offset2) will ensure >>>>>> + offset1 to be 1020, suitable for instruction LDRD. */ >>>>>> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >>>>>> + return false; >>>>>> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >>>>>> + return false; >>>>>> + >>>>>> + if ((offset1 + 4) == offset2) >>>>>> + return true; >>>>>> + if ((offset2 + 4) == offset1) >>>>>> + return true; >>>>>> + >>>>>> + return false; >>>>>> +} >>>>>> + >>>>>> +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. >>>>>> + That is they use the same base register, and the gap between constant >>>>>> + offsets should be 4. */ >>>>>> +bool >>>>>> +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) >>>>>> +{ >>>>>> + rtx base1, base2, op1; >>>>>> + rtx addr1 = XEXP (mem1, 0); >>>>>> + rtx addr2 = XEXP (mem2, 0); >>>>>> + HOST_WIDE_INT offset1 = 0; >>>>>> + HOST_WIDE_INT offset2 = 0; >>>>>> + >>>>>> + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) >>>>>> + return false; >>>>>> + >>>>>> + if (REG_P (addr1)) >>>>>> + base1 = addr1; >>>>>> + else if (GET_CODE (addr1) == PLUS) >>>>>> + { >>>>>> + base1 = XEXP (addr1, 0); >>>>>> + op1 = XEXP (addr1, 1); >>>>>> + if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT)) >>>>>> + return false; >>>>>> + offset1 = INTVAL (op1); >>>>>> + } >>>>>> + else >>>>>> + return false; >>>>>> + >>>>>> + if (REG_P (addr2)) >>>>>> + base2 = addr2; >>>>>> + else if (GET_CODE (addr2) == PLUS) >>>>>> + { >>>>>> + base2 = XEXP (addr2, 0); >>>>>> + op1 = XEXP (addr2, 1); >>>>>> + if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT)) >>>>>> + return false; >>>>>> + offset2 = INTVAL (op1); >>>>>> + } >>>>>> + else >>>>>> + return false; >>>>>> + >>>>>> + if (base1 != base2) >>>>>> + return false; >>>>>> + >>>>>> + /* The offset range of LDRD is [-1020, 1020]. Here we check if both >>>>>> + offsets lie in the range [-1020, 1024]. If one of the offsets is >>>>>> + 1024, the following condition ((offset1 + 4) == offset2) will ensure >>>>>> + offset1 to be 1020, suitable for instruction LDRD. */ >>>>>> + if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0)) >>>>>> + return false; >>>>>> + if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0)) >>>>>> + return false; >>>>>> + >>>>>> + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) >>>>>> + return false; >>>>>> + >>>>>> + if ((offset1 + 4) == offset2) >>>>>> + return true; >>>>>> + if ((offset2 + 4) == offset1) >>>>>> + return true; >>>>>> + >>>>>> + return false; >>>>>> +} >>>>>> + >>>>>> +/* Check if the insn can be expressed as ldm/stm with less cost. */ >>>>>> +bool >>>>>> +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base, >>>>>> + rtx off1, rtx off2, bool ldrd) >>>>>> +{ >>>>>> + HOST_WIDE_INT offset1 = 0; >>>>>> + HOST_WIDE_INT offset2 = 0; >>>>>> + >>>>>> + if (off1 != NULL_RTX) >>>>>> + offset1 = INTVAL (off1); >>>>>> + if (off2 != NULL_RTX) >>>>>> + offset2 = INTVAL (off2); >>>>>> + >>>>>> + if (offset1 > offset2) >>>>>> + { >>>>>> + rtx tmp; >>>>>> + HOST_WIDE_INT t = offset1; >>>>>> + offset1 = offset2; >>>>>> + offset2 = t; >>>>>> + tmp = reg1; >>>>>> + reg1 = reg2; >>>>>> + reg2 = tmp; >>>>>> + } >>>>>> + >>>>>> + /* The offset of ldmdb is -8, the offset of ldmia is 0. */ >>>>>> + if ((offset1 != -8) && (offset1 != 0)) >>>>>> + return false; >>>>>> + >>>>>> + /* Lower register corresponds to lower memory. */ >>>>>> + if (REGNO (reg1) > REGNO (reg2)) >>>>>> + return false; >>>>>> + >>>>>> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower >>>>>> + cost. */ >>>>>> + return false; >>>>>> +} >>>>>> + >>>>>> #include "gt-arm.h" >>>>>> Index: arm-protos.h >>>>>> =================================================================== >>>>>> --- arm-protos.h (revision 165492) >>>>>> +++ arm-protos.h (working copy) >>>>>> @@ -150,6 +150,9 @@ extern void arm_expand_sync (enum machin >>>>>> extern const char *arm_output_memory_barrier (rtx *); >>>>>> extern const char *arm_output_sync_insn (rtx, rtx *); >>>>>> extern unsigned int arm_sync_loop_insns (rtx , rtx *); >>>>>> +extern bool thumb2_check_ldrd_operands (rtx, rtx); >>>>>> +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); >>>>>> +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool); >>>>>> >>>>>> #if defined TREE_CODE >>>>>> extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); >>>>>> Index: ldmstm.md >>>>>> =================================================================== >>>>>> --- ldmstm.md (revision 165492) >>>>>> +++ ldmstm.md (working copy) >>>>>> @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" >>>>>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>>>>> (mem:SI (plus:SI (match_dup 3) >>>>>> (const_int 4))))])] >>>>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>>>> "ldm%(ia%)\t%3, {%1, %2}" >>>>>> [(set_attr "type" "load2") >>>>>> (set_attr "predicable" "yes")]) >>>>>> @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" >>>>>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>>>>> (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) >>>>>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>>>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>>>> "stm%(ia%)\t%3, {%1, %2}" >>>>>> [(set_attr "type" "store2") >>>>>> (set_attr "predicable" "yes")]) >>>>>> @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" >>>>>> (set (match_operand:SI 2 "arm_hard_register_operand" "") >>>>>> (mem:SI (plus:SI (match_dup 3) >>>>>> (const_int -4))))])] >>>>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>>>> "ldm%(db%)\t%3, {%1, %2}" >>>>>> [(set_attr "type" "load2") >>>>>> (set_attr "predicable" "yes")]) >>>>>> @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" >>>>>> (match_operand:SI 1 "arm_hard_register_operand" "")) >>>>>> (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) >>>>>> (match_operand:SI 2 "arm_hard_register_operand" ""))])] >>>>>> - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" >>>>>> + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" >>>>>> "stm%(db%)\t%3, {%1, %2}" >>>>>> [(set_attr "type" "store2") >>>>>> (set_attr "predicable" "yes")]) >>>>>> >>>>>> >>>>>> Index: pr40457-3.c >>>>>> =================================================================== >>>>>> --- pr40457-3.c (revision 165492) >>>>>> +++ pr40457-3.c (working copy) >>>>>> @@ -5,6 +5,7 @@ void foo(int* p) >>>>>> { >>>>>> p[0] = 1; >>>>>> p[1] = 0; >>>>>> + p[2] = 2; >>>>>> } >>>>>> >>>>>> /* { dg-final { scan-assembler "stm" } } */ >>>>>> Index: pr40457-1.c >>>>>> =================================================================== >>>>>> --- pr40457-1.c (revision 165492) >>>>>> +++ pr40457-1.c (working copy) >>>>>> @@ -1,9 +1,9 @@ >>>>>> -/* { dg-options "-Os" } */ >>>>>> +/* { dg-options "-O2" } */ >>>>>> /* { dg-do compile } */ >>>>>> >>>>>> int bar(int* p) >>>>>> { >>>>>> - int x = p[0] + p[1]; >>>>>> + int x = p[0] + p[1] + p[2]; >>>>>> return x; >>>>>> } >>>>>> >>>>>> Index: pr40457-2.c >>>>>> =================================================================== >>>>>> --- pr40457-2.c (revision 165492) >>>>>> +++ pr40457-2.c (working copy) >>>>>> @@ -5,6 +5,7 @@ void foo(int* p) >>>>>> { >>>>>> p[0] = 1; >>>>>> p[1] = 0; >>>>>> + p[2] = 2; >>>>>> } >>>>>> >>>>>> /* { dg-final { scan-assembler "stm" } } */ >>>>>> Index: pr45335.c >>>>>> =================================================================== >>>>>> --- pr45335.c (revision 0) >>>>>> +++ pr45335.c (revision 0) >>>>>> @@ -0,0 +1,22 @@ >>>>>> +/* { dg-options "-mthumb -O2" } */ >>>>>> +/* { dg-require-effective-target arm_thumb2_ok } */ >>>>>> +/* { dg-final { scan-assembler "ldrd" } } */ >>>>>> +/* { dg-final { scan-assembler "strd" } } */ >>>>>> + >>>>>> +struct S >>>>>> +{ >>>>>> + void* p1; >>>>>> + void* p2; >>>>>> + void* p3; >>>>>> + void* p4; >>>>>> +}; >>>>>> + >>>>>> +extern printf(char*, ...); >>>>>> + >>>>>> +void foo1(struct S* fp, struct S* otherSaveArea) >>>>>> +{ >>>>>> + struct S* saveA = fp - 1; >>>>>> + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); >>>>>> + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", >>>>>> + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); >>>>>> +} >>>>>> >>>>> >>>> >>> >> > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-04 8:57 ` Carrot Wei @ 2011-01-11 14:43 ` Nick Clifton 2011-01-11 14:51 ` Richard Earnshaw 2011-01-12 13:49 ` Paul Brook 0 siblings, 2 replies; 46+ messages in thread From: Nick Clifton @ 2011-01-11 14:43 UTC (permalink / raw) To: Carrot Wei Cc: Paul Brook, Richard Earnshaw, gcc-patches, ramana.radhakrishnan Hi Carrot, >>>>>>> ChangeLog: >>>>>>> 2010-10-16 Wei Guozhi<carrot@google.com> >>>>>>> >>>>>>> PR target/45335 >>>>>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>>>>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>>>>>> peephole2. >>>>>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>>>>>> New insn pattern and related peephole2. >>>>>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>>>>>> (thumb2_check_ldrd_operands): New function. >>>>>>> (thumb2_prefer_ldmstm): New function. >>>>>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. >>>>>>> (thumb2_check_ldrd_operands): New prototype. >>>>>>> (thumb2_prefer_ldmstm): New prototype. >>>>>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>>>>>> Change the ldm/stm patterns with 2 words to ARM only. Approved - please apply. Happy New Year! Cheers Nick ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-11 14:43 ` Nick Clifton @ 2011-01-11 14:51 ` Richard Earnshaw 2011-01-11 15:10 ` Nathan Froyd ` (2 more replies) 2011-01-12 13:49 ` Paul Brook 1 sibling, 3 replies; 46+ messages in thread From: Richard Earnshaw @ 2011-01-11 14:51 UTC (permalink / raw) To: Nick Clifton; +Cc: Carrot Wei, Paul Brook, gcc-patches, ramana.radhakrishnan On Tue, 2011-01-11 at 14:19 +0000, Nick Clifton wrote: > Hi Carrot, > > >>>>>>> ChangeLog: > >>>>>>> 2010-10-16 Wei Guozhi<carrot@google.com> > >>>>>>> > >>>>>>> PR target/45335 > >>>>>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, > >>>>>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related > >>>>>>> peephole2. > >>>>>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): > >>>>>>> New insn pattern and related peephole2. > >>>>>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. > >>>>>>> (thumb2_check_ldrd_operands): New function. > >>>>>>> (thumb2_prefer_ldmstm): New function. > >>>>>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. > >>>>>>> (thumb2_check_ldrd_operands): New prototype. > >>>>>>> (thumb2_prefer_ldmstm): New prototype. > >>>>>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): > >>>>>>> Change the ldm/stm patterns with 2 words to ARM only. > > Approved - please apply. > No, this is not yet ready. It certainly isn't ready to go in this late in gcc-4.6, which is now in regression-only fixes stage. Sorry, I still think this needs more work, but I'm dashing off to yet another meeting right now. R. > Happy New Year! > > Cheers > Nick > > > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-11 14:51 ` Richard Earnshaw @ 2011-01-11 15:10 ` Nathan Froyd 2011-01-12 6:34 ` Ian Lance Taylor 2011-01-12 14:01 ` Diego Novillo 2 siblings, 0 replies; 46+ messages in thread From: Nathan Froyd @ 2011-01-11 15:10 UTC (permalink / raw) To: Richard Earnshaw Cc: Nick Clifton, Carrot Wei, Paul Brook, gcc-patches, ramana.radhakrishnan On Tue, Jan 11, 2011 at 02:49:06PM +0000, Richard Earnshaw wrote: > On Tue, 2011-01-11 at 14:19 +0000, Nick Clifton wrote: > > Hi Carrot, > > > > >>>>>>> ChangeLog: > > >>>>>>> 2010-10-16 Wei Guozhi<carrot@google.com> > > > > Approved - please apply. > > > > No, this is not yet ready. It certainly isn't ready to go in this late > in gcc-4.6, which is now in regression-only fixes stage. > > Sorry, I still think this needs more work, but I'm dashing off to yet > another meeting right now. Carrot has been pinging *this* patch for almost three months (and needed to ping a previous patch for a month, maybe more). And only just now, when it's been approved, you come forward and wave your hands and say "needs more work"? That doesn't seem very sporting. -Nathan ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-11 14:51 ` Richard Earnshaw 2011-01-11 15:10 ` Nathan Froyd @ 2011-01-12 6:34 ` Ian Lance Taylor 2011-01-12 10:29 ` Richard Guenther 2011-01-12 14:01 ` Diego Novillo 2 siblings, 1 reply; 46+ messages in thread From: Ian Lance Taylor @ 2011-01-12 6:34 UTC (permalink / raw) To: Richard Earnshaw Cc: Nick Clifton, Carrot Wei, Paul Brook, gcc-patches, ramana.radhakrishnan On Tue, Jan 11, 2011 at 6:49 AM, Richard Earnshaw <rearnsha@arm.com> wrote: > > On Tue, 2011-01-11 at 14:19 +0000, Nick Clifton wrote: >> Hi Carrot, >> >> >>>>>>> ChangeLog: >> >>>>>>> 2010-10-16 Wei Guozhi<carrot@google.com> >> >>>>>>> >> >>>>>>> PR target/45335 >> >>>>>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >> >>>>>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >> >>>>>>> peephole2. >> >>>>>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >> >>>>>>> New insn pattern and related peephole2. >> >>>>>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >> >>>>>>> (thumb2_check_ldrd_operands): New function. >> >>>>>>> (thumb2_prefer_ldmstm): New function. >> >>>>>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. >> >>>>>>> (thumb2_check_ldrd_operands): New prototype. >> >>>>>>> (thumb2_prefer_ldmstm): New prototype. >> >>>>>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >> >>>>>>> Change the ldm/stm patterns with 2 words to ARM only. >> >> Approved - please apply. >> > > No, this is not yet ready. It certainly isn't ready to go in this late > in gcc-4.6, which is now in regression-only fixes stage. > > Sorry, I still think this needs more work, but I'm dashing off to yet > another meeting right now. Richard, I really have to object to this. Carrot proposed this patch before gcc went into stage 3, and has pinged it regularly, which by the rules as I understand them means that it can be accepted during stage 3 if the target maintainers agree. This really hits a chord for me as I've been hearing recently from people both inside and outside of Google that it is incredibly hard to get any patches into gcc, and people are naturally looking for alternatives. I think that we as a community need to take this seriously. I don't think you have to accept this patch. But it is just not OK to ignore multiple pings and then reject it after it has been accepted by another ARM maintainer. If you're going to reject a patch, reject it fast. If you are going to reject this patch now, please, please, take the time to do a proper review and suggest how the patch can be improved. Please do not say that a patch that was first proposed in August (!), and has been regularly pinged and updated quickly to all substantive responses, has to wait another three months for stage 1 before it can be accepted. Ian ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-12 6:34 ` Ian Lance Taylor @ 2011-01-12 10:29 ` Richard Guenther 0 siblings, 0 replies; 46+ messages in thread From: Richard Guenther @ 2011-01-12 10:29 UTC (permalink / raw) To: Ian Lance Taylor Cc: Richard Earnshaw, Nick Clifton, Carrot Wei, Paul Brook, gcc-patches, ramana.radhakrishnan On Wed, Jan 12, 2011 at 6:23 AM, Ian Lance Taylor <iant@google.com> wrote: > On Tue, Jan 11, 2011 at 6:49 AM, Richard Earnshaw <rearnsha@arm.com> wrote: >> >> On Tue, 2011-01-11 at 14:19 +0000, Nick Clifton wrote: >>> Hi Carrot, >>> >>> >>>>>>> ChangeLog: >>> >>>>>>> 2010-10-16 Wei Guozhi<carrot@google.com> >>> >>>>>>> >>> >>>>>>> PR target/45335 >>> >>>>>>> * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1, >>> >>>>>>> thumb2_ldrd_reg2 and peephole2): New insn pattern and related >>> >>>>>>> peephole2. >>> >>>>>>> (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2): >>> >>>>>>> New insn pattern and related peephole2. >>> >>>>>>> * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function. >>> >>>>>>> (thumb2_check_ldrd_operands): New function. >>> >>>>>>> (thumb2_prefer_ldmstm): New function. >>> >>>>>>> * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype. >>> >>>>>>> (thumb2_check_ldrd_operands): New prototype. >>> >>>>>>> (thumb2_prefer_ldmstm): New prototype. >>> >>>>>>> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db): >>> >>>>>>> Change the ldm/stm patterns with 2 words to ARM only. >>> >>> Approved - please apply. >>> >> >> No, this is not yet ready. It certainly isn't ready to go in this late >> in gcc-4.6, which is now in regression-only fixes stage. >> >> Sorry, I still think this needs more work, but I'm dashing off to yet >> another meeting right now. > > Richard, I really have to object to this. Carrot proposed this patch > before gcc went into stage 3, and has pinged it regularly, which by > the rules as I understand them means that it can be accepted during > stage 3 if the target maintainers agree. Not specific to this issue, but we are in stage 4 now which means that only fixes for regressions and documentation are allowed. Patches that only affect non-primary/secondary targets target maintainers may have more freedom. We also regularly accepted non-regression wrong-code and rejects-valid patches at this stage. That's 2 cents from your release manager(s) (aka, "it was posted during stageN < 3" isn't on its own a good enough reason to not honor restrictions of stageN >= 3). Richard. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-11 14:51 ` Richard Earnshaw 2011-01-11 15:10 ` Nathan Froyd 2011-01-12 6:34 ` Ian Lance Taylor @ 2011-01-12 14:01 ` Diego Novillo 2011-01-12 21:56 ` Mike Stump 2 siblings, 1 reply; 46+ messages in thread From: Diego Novillo @ 2011-01-12 14:01 UTC (permalink / raw) To: Richard Earnshaw Cc: Nick Clifton, Carrot Wei, Paul Brook, gcc-patches, ramana.radhakrishnan On Tue, Jan 11, 2011 at 09:49, Richard Earnshaw <rearnsha@arm.com> wrote: > No, this is not yet ready. It certainly isn't ready to go in this late > in gcc-4.6, which is now in regression-only fixes stage. > > Sorry, I still think this needs more work, but I'm dashing off to yet > another meeting right now. Richard, This is simply unacceptable. Carrot has been carrying this patch for more than 4 months now, diligently pinging the patch and incorporating the very scarce reviews he got from the ARM maintainers. Now that it has finally been approved, you summarily reject it with no explanation? Please provide an explanation, at least. Our ARM developers at Google have been having a particularly hard time trying to get their patches noticed by maintainers. Patches are pinged many times and ignored for long stretches of time. Perhaps we need more ARM maintainers? Is there anything we could do to help the current situation? Diego. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-12 14:01 ` Diego Novillo @ 2011-01-12 21:56 ` Mike Stump 0 siblings, 0 replies; 46+ messages in thread From: Mike Stump @ 2011-01-12 21:56 UTC (permalink / raw) To: Diego Novillo Cc: Richard Earnshaw, Nick Clifton, Carrot Wei, Paul Brook, gcc-patches, ramana.radhakrishnan On Jan 12, 2011, at 5:49 AM, Diego Novillo wrote: > Patches are pinged many times and ignored for long stretches of time. Perhaps we > need more ARM maintainers? 3 months, yeah, I'd think it would be beneficial to have additional maintainers as well. I don't think gcc is benefitted by review times that take more than 48 hours. If the gcc web site had a poll button (a la /.), we could run random polls to help answer interesting questions. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-11 14:43 ` Nick Clifton 2011-01-11 14:51 ` Richard Earnshaw @ 2011-01-12 13:49 ` Paul Brook 2011-01-12 15:20 ` Richard Earnshaw 1 sibling, 1 reply; 46+ messages in thread From: Paul Brook @ 2011-01-12 13:49 UTC (permalink / raw) To: Nick Clifton Cc: Carrot Wei, Richard Earnshaw, gcc-patches, ramana.radhakrishnan > > Why is this restricted to Thumb mode? The ARM variant of ldrd isn't quite > > as flexible, but still provides a useful improvement over ldm. > > I agree the ARM version is also useful. But it brings much less > benefit with too much complexity (due to more restriction and insn > pattern conflict with ldm). So I will leave it as a future > improvement. I'm still not convinced. Surely there's no more complexity than the current ldm fallback bits. > >> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower > >> + cost. */ > >> + return false; > > > > Code clearly doesn't match the comment. In fact this function always > > returns false. > > Richard mentioned that in some cases (specifically cortex A9) ldm has > less cost than ldrd and we should model this in the insn pattern. This > function is used for this. But I don't know the cortex A9 architecture > detail, so it should be filled by somebody with more knowledge about > it in future. This is trivially dead code. As such it should be removed. I consider this sort of thing to be actively harmful. At best it's likely to bitrot and need rewriting when you implement your "future" changes. At worst it triggers incorrectly and breaks something. > --- pr40457-1.c (revision 165492) > +++ pr40457-1.c (working copy) > @@ -1,9 +1,9 @@ > -/* { dg-options "-Os" } */ > +/* { dg-options "-O2" } */ > > /* { dg-do compile } */ > > int bar(int* p) This looks wrong. ldm is always smaller but is only faster on some cores, so I'd expect compiling with -O2 to make this test less reliable. Paul ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-12 13:49 ` Paul Brook @ 2011-01-12 15:20 ` Richard Earnshaw 2011-01-13 10:45 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Richard Earnshaw @ 2011-01-12 15:20 UTC (permalink / raw) To: Paul Brook; +Cc: Nick Clifton, Carrot Wei, gcc-patches, ramana.radhakrishnan On Wed, 2011-01-12 at 13:19 +0000, Paul Brook wrote: > > > Why is this restricted to Thumb mode? The ARM variant of ldrd isn't quite > > > as flexible, but still provides a useful improvement over ldm. > > > > I agree the ARM version is also useful. But it brings much less > > benefit with too much complexity (due to more restriction and insn > > pattern conflict with ldm). So I will leave it as a future > > improvement. > > I'm still not convinced. Surely there's no more complexity than the current > ldm fallback bits. > > > >> + /* Now ldm/stm is possible. Check for special cases ldm/stm has lower > > >> + cost. */ > > >> + return false; > > > > > > Code clearly doesn't match the comment. In fact this function always > > > returns false. > > > > Richard mentioned that in some cases (specifically cortex A9) ldm has > > less cost than ldrd and we should model this in the insn pattern. This > > function is used for this. But I don't know the cortex A9 architecture > > detail, so it should be filled by somebody with more knowledge about > > it in future. > > This is trivially dead code. As such it should be removed. > I consider this sort of thing to be actively harmful. At best it's likely to > bitrot and need rewriting when you implement your "future" changes. At worst > it triggers incorrectly and breaks something. > > > --- pr40457-1.c (revision 165492) > > +++ pr40457-1.c (working copy) > > @@ -1,9 +1,9 @@ > > -/* { dg-options "-Os" } */ > > +/* { dg-options "-O2" } */ > > > > /* { dg-do compile } */ > > > > int bar(int* p) > > This looks wrong. ldm is always smaller but is only faster on some cores, so > I'd expect compiling with -O2 to make this test less reliable. > > Paul > Additionally, all the define_insn patterns in the patch can generate more than one instruction. They MUST have a length attribute that specifies the number of bytes that can be generated when the default (4 bytes) is insufficient; otherwise the constant placement code will fail. R. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-12 15:20 ` Richard Earnshaw @ 2011-01-13 10:45 ` Carrot Wei 2011-01-13 11:12 ` Richard Earnshaw ` (2 more replies) 0 siblings, 3 replies; 46+ messages in thread From: Carrot Wei @ 2011-01-13 10:45 UTC (permalink / raw) To: Richard Earnshaw Cc: Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan One question about the attribute length. It looks the attribute expression is not very powerful according to http://gcc.gnu.org/onlinedocs/gccint/Expressions.html#Expressions, then how can I express following expressions: if (fix_cm3_ldrd && (operands[2] == operands[0])) if (offset1 <= -256) thanks Carrot > > Additionally, all the define_insn patterns in the patch can generate > more than one instruction. They MUST have a length attribute that > specifies the number of bytes that can be generated when the default (4 > bytes) is insufficient; otherwise the constant placement code will fail. > > R. > > > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-13 10:45 ` Carrot Wei @ 2011-01-13 11:12 ` Richard Earnshaw 2011-01-13 11:15 ` Ramana Radhakrishnan 2011-01-13 17:19 ` Mike Stump 2 siblings, 0 replies; 46+ messages in thread From: Richard Earnshaw @ 2011-01-13 11:12 UTC (permalink / raw) To: Carrot Wei; +Cc: Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan On Thu, 2011-01-13 at 17:27 +0800, Carrot Wei wrote: > One question about the attribute length. It looks the attribute > expression is not very powerful according to > http://gcc.gnu.org/onlinedocs/gccint/Expressions.html#Expressions, > then how can I express following expressions: > > if (fix_cm3_ldrd && (operands[2] == operands[0])) > > if (offset1 <= -256) > > thanks > Carrot > > > > > Additionally, all the define_insn patterns in the patch can generate > > more than one instruction. They MUST have a length attribute that > > specifies the number of bytes that can be generated when the default (4 > > bytes) is insufficient; otherwise the constant placement code will fail. > > > > R. > > > > > > > In the worst case you have to make the attribute express the longest sequence that the pattern can generate. If that's going to be significantly longer than the common case then really you should think about how you might restructure the code to avoid over accounting too often (maybe by creating separate constraint alternatives). Attribute expressions are not totally inflexible and with some care you can often express the length quite precisely. R. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-13 10:45 ` Carrot Wei 2011-01-13 11:12 ` Richard Earnshaw @ 2011-01-13 11:15 ` Ramana Radhakrishnan 2011-01-14 9:25 ` Carrot Wei 2011-01-13 17:19 ` Mike Stump 2 siblings, 1 reply; 46+ messages in thread From: Ramana Radhakrishnan @ 2011-01-13 11:15 UTC (permalink / raw) To: Carrot Wei Cc: Richard Earnshaw, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan On Thu, Jan 13, 2011 at 9:27 AM, Carrot Wei <carrot@google.com> wrote: > One question about the attribute length. It looks the attribute > expression is not very powerful according to > http://gcc.gnu.org/onlinedocs/gccint/Expressions.html#Expressions, > then how can I express following expressions: > > if (fix_cm3_ldrd && (operands[2] == operands[0])) Can't you express this as ? (set_attr "length" (and (ne (symbol_ref ("fix_cm3_ldrd") (const_int 0)) (eq (match_dup 2) (match_dup 0))) (const_int <length of insns>) It's too early in the day and I haven't yet had my coffee but you probably get the picture. If you can't get separate constraints as Richard says or the logic as I mention above becomes too complicated which I suspect it might, it would be worth factoring out the logic into a common C function parameterised on counting vs emiting. Then you could just call the same function with and without the "emit/count" flag from the place where you set the length attribute and the place where you want to emit the assembler. Then the logic is in one place and more maintainable than having 2 implementatons of the same logic. HTH Ramana ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-13 11:15 ` Ramana Radhakrishnan @ 2011-01-14 9:25 ` Carrot Wei 2011-01-14 10:17 ` Richard Earnshaw 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2011-01-14 9:25 UTC (permalink / raw) To: Ramana Radhakrishnan Cc: Richard Earnshaw, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan On Thu, Jan 13, 2011 at 6:25 PM, Ramana Radhakrishnan <ramana.gcc@googlemail.com> wrote: > On Thu, Jan 13, 2011 at 9:27 AM, Carrot Wei <carrot@google.com> wrote: >> One question about the attribute length. It looks the attribute >> expression is not very powerful according to >> http://gcc.gnu.org/onlinedocs/gccint/Expressions.html#Expressions, >> then how can I express following expressions: >> >> if (fix_cm3_ldrd && (operands[2] == operands[0])) > > > Can't you express this as ? > > (set_attr "length" > (and (ne (symbol_ref ("fix_cm3_ldrd") (const_int 0)) > (eq (match_dup 2) (match_dup 0))) (const_int <length of insns>) > According to http://gcc.gnu.org/onlinedocs/gccint/Insn-Lengths.html#Insn-Lengths, (match_dup n) can only be used with a label_ref operand. > > It's too early in the day and I haven't yet had my coffee but you > probably get the picture. > > If you can't get separate constraints as Richard says or the logic as > I mention above becomes too complicated which I suspect it might, it > would be worth factoring out the logic into a common C function > parameterised on counting vs emiting. Then you could just call the > same function with and without the "emit/count" flag from the place > where you set the length attribute and the place where you want to > emit the assembler. > > Then the logic is in one place and more maintainable than having 2 > implementatons of the same logic. > This is a good idea! thanks Carrot ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-14 9:25 ` Carrot Wei @ 2011-01-14 10:17 ` Richard Earnshaw 2011-01-18 15:46 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Richard Earnshaw @ 2011-01-14 10:17 UTC (permalink / raw) To: Carrot Wei Cc: Ramana Radhakrishnan, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan On Fri, 2011-01-14 at 17:18 +0800, Carrot Wei wrote: > On Thu, Jan 13, 2011 at 6:25 PM, Ramana Radhakrishnan > <ramana.gcc@googlemail.com> wrote: > > On Thu, Jan 13, 2011 at 9:27 AM, Carrot Wei <carrot@google.com> wrote: > >> One question about the attribute length. It looks the attribute > >> expression is not very powerful according to > >> http://gcc.gnu.org/onlinedocs/gccint/Expressions.html#Expressions, > >> then how can I express following expressions: > >> > >> if (fix_cm3_ldrd && (operands[2] == operands[0])) > > > > > > Can't you express this as ? > > > > (set_attr "length" > > (and (ne (symbol_ref ("fix_cm3_ldrd") (const_int 0)) > > (eq (match_dup 2) (match_dup 0))) (const_int <length of insns>) > > > According to http://gcc.gnu.org/onlinedocs/gccint/Insn-Lengths.html#Insn-Lengths, > (match_dup n) can only be used with a label_ref operand. > > > > > It's too early in the day and I haven't yet had my coffee but you > > probably get the picture. > > > > If you can't get separate constraints as Richard says or the logic as > > I mention above becomes too complicated which I suspect it might, it > > would be worth factoring out the logic into a common C function > > parameterised on counting vs emiting. Then you could just call the > > same function with and without the "emit/count" flag from the place > > where you set the length attribute and the place where you want to > > emit the assembler. > > > > Then the logic is in one place and more maintainable than having 2 > > implementatons of the same logic. > > > This is a good idea! > It can sometimes be done that way, but beware: separating the length calculations from the insns it relates to is a long-term maintenance nightmare, because now the code is in two separate places. R. > thanks > Carrot > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-14 10:17 ` Richard Earnshaw @ 2011-01-18 15:46 ` Carrot Wei 2011-01-27 5:42 ` Jie Zhang ` (2 more replies) 0 siblings, 3 replies; 46+ messages in thread From: Carrot Wei @ 2011-01-18 15:46 UTC (permalink / raw) To: Richard Earnshaw Cc: Ramana Radhakrishnan, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan On Fri, Jan 14, 2011 at 5:51 PM, Richard Earnshaw <rearnsha@arm.com> wrote: > > On Fri, 2011-01-14 at 17:18 +0800, Carrot Wei wrote: >> On Thu, Jan 13, 2011 at 6:25 PM, Ramana Radhakrishnan >> <ramana.gcc@googlemail.com> wrote: >> > On Thu, Jan 13, 2011 at 9:27 AM, Carrot Wei <carrot@google.com> wrote: >> >> One question about the attribute length. It looks the attribute >> >> expression is not very powerful according to >> >> http://gcc.gnu.org/onlinedocs/gccint/Expressions.html#Expressions, >> >> then how can I express following expressions: >> >> >> >> if (fix_cm3_ldrd && (operands[2] == operands[0])) >> > >> > >> > Can't you express this as ? >> > >> > (set_attr "length" >> > (and (ne (symbol_ref ("fix_cm3_ldrd") (const_int 0)) >> > (eq (match_dup 2) (match_dup 0))) (const_int <length of insns>) >> > >> According to http://gcc.gnu.org/onlinedocs/gccint/Insn-Lengths.html#Insn-Lengths, >> (match_dup n) can only be used with a label_ref operand. >> >> > >> > It's too early in the day and I haven't yet had my coffee but you >> > probably get the picture. >> > >> > If you can't get separate constraints as Richard says or the logic as >> > I mention above becomes too complicated which I suspect it might, it >> > would be worth factoring out the logic into a common C function >> > parameterised on counting vs emiting. Then you could just call the >> > same function with and without the "emit/count" flag from the place >> > where you set the length attribute and the place where you want to >> > emit the assembler. >> > >> > Then the logic is in one place and more maintainable than having 2 >> > implementatons of the same logic. >> > >> This is a good idea! >> > > It can sometimes be done that way, but beware: separating the length > calculations from the insns it relates to is a long-term maintenance > nightmare, because now the code is in two separate places. > > R. Ramana's method is to put the instruction output and counting in on place. So it's easy to keep them synchronized. My latest version of patch did the following modifications compared to the earlier version: Added support of arm ldrd/strd instructions. Added length attribute to insn patterns. Moved the insn patterns to file ldmstm.md. It has passed the dejagnu testing on arm qemu. thanks Carrot ChangeLog: 2010-01-18 Wei Guozhi <carrot@google.com> PR target/45335 * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_ib, stm2_ib, ldm2_da, stm2_da, ldm2_db, stm2_db): Add condition !arm_arch7 to these insns. (ldrd, ldrd_reg1, ldrd_reg2 and peephole2): New insn patterns and related peephole2. (strd, strd_reg1, strd_reg2 and peephole2): New insn patterns and related peephole2. * gcc/config/arm/arm-protos.h (arm_check_ldrd_operands): New prototype. (arm_legitimate_ldrd_p): New prototype. (arm_output_ldrd): New prototype. * gcc/config/arm/arm.c (arm_check_ldrd_operands): New function. (arm_legitimate_ldrd_p): New function. (arm_output_ldrd): New function. 2010-01-18 Wei Guozhi <carrot@google.com> PR target/45335 * gcc.target/arm/pr45335.c: New test. * gcc.target/arm/pr45335-2.c: New test. * gcc.target/arm/pr45335-3.c: New test. * gcc.target/arm/pr40457-1.c: Add another possible output "ldrd". * gcc.target/arm/pr40457-2.c: Changed to store 3 words. * gcc.target/arm/pr40457-3.c: Changed to store 3 words. Index: arm.c =================================================================== --- arm.c (revision 168737) +++ arm.c (working copy) @@ -23574,4 +23574,234 @@ arm_preferred_rename_class (reg_class_t return NO_REGS; } +/* Check the validity of operands in an ldrd/strd instruction. */ +bool +arm_check_ldrd_operands (rtx reg1, rtx reg2, rtx off1, rtx off2) +{ + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + int regno1 = REGNO (reg1); + int regno2 = REGNO (reg2); + HOST_WIDE_INT max_offset = 1020; + + if (TARGET_ARM) + max_offset = 255; + + if (off1 != NULL_RTX) + offset1 = INTVAL (off1); + if (off2 != NULL_RTX) + offset2 = INTVAL (off2); + + /* The offset range of LDRD is [-max_offset, max_offset]. Here we check if + both offsets lie in the range [-max_offset, max_offset+4]. If one of the + offsets is max_offset+4, the following condition + ((offset1 + 4) == offset2) + will ensure offset1 to be max_offset, suitable for instruction LDRD. */ + if ((offset1 > (max_offset + 4)) || (offset1 < -max_offset) + || ((offset1 & 3) != 0)) + return false; + if ((offset2 > (max_offset + 4)) || (offset2 < -max_offset) + || ((offset2 & 3) != 0)) + return false; + + if ((offset1 + 4) == offset2) + { + if (TARGET_THUMB2) + return true; + + /* TARGET_ARM */ + if (((regno1 & 1) == 0) && ((regno1 + 1) == regno2)) /* ldrd */ + return true; + + if ((regno1 < regno2) && ((offset1 <= 4) && (offset1 >= -8))) /* ldm */ + return true; + } + if ((offset2 + 4) == offset1) + { + if (TARGET_THUMB2) + return true; + + /* TARGET_ARM */ + if (((regno2 & 1) == 0) && ((regno2 + 1) == regno1)) /* ldrd */ + return true; + + if ((regno2 < regno1) && ((offset2 <= 4) && (offset2 >= -8))) /* ldm */ + return true; + } + + return false; +} + +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. + That is they use the same base register, and the gap between constant + offsets should be 4. */ +bool +arm_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) +{ + rtx base1, base2; + rtx offset1 = NULL_RTX; + rtx offset2 = NULL_RTX; + rtx addr1 = XEXP (mem1, 0); + rtx addr2 = XEXP (mem2, 0); + + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) + return false; + + if (REG_P (addr1)) + base1 = addr1; + else if (GET_CODE (addr1) == PLUS) + { + base1 = XEXP (addr1, 0); + offset1 = XEXP (addr1, 1); + if (!REG_P (base1) || (GET_CODE (offset1) != CONST_INT)) + return false; + } + else + return false; + + if (REG_P (addr2)) + base2 = addr2; + else if (GET_CODE (addr2) == PLUS) + { + base2 = XEXP (addr2, 0); + offset2 = XEXP (addr2, 1); + if (!REG_P (base2) || (GET_CODE (offset2) != CONST_INT)) + return false; + } + else + return false; + + if (base1 != base2) + return false; + + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) + return false; + + return arm_check_ldrd_operands (reg1, reg2, offset1, offset2); +} + +/* Output instructions for ldrd and count the number of bytes has been + outputted. Do not actually output instructions if EMIT_P is false. */ +int +arm_output_ldrd (rtx reg1, rtx reg2, rtx base, rtx off1, rtx off2, bool emit_p) +{ + int length = 0; + rtx operands[5]; + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (off1 != NULL_RTX) + offset1 = INTVAL (off1); + else + off1 = GEN_INT (0); + if (off2 != NULL_RTX) + offset2 = INTVAL (off2); + else + off2 = GEN_INT (0); + if (offset1 > offset2) + { + rtx tmp; + HOST_WIDE_INT t = offset1; offset1 = offset2; offset2 = t; + tmp = off1; off1 = off2; off2 = tmp; + tmp = reg1; reg1 = reg2; reg2 = tmp; + } + + operands[0] = reg1; + operands[1] = reg2; + operands[2] = base; + operands[3] = off1; + operands[4] = off2; + + if (TARGET_THUMB2) + { + if (fix_cm3_ldrd && (base == reg1)) + { + if (offset1 <= -256) + { + if (emit_p) + output_asm_insn ("sub\t%2, %2, %n3", operands); + length = 4; + + if (emit_p) + output_asm_insn ("ldr\t%1, [%2, #4]", operands); + if (low_register_operand (reg2, SImode) + && low_register_operand (base, SImode)) + length += 2; + else + length += 4; + + if (emit_p) + output_asm_insn ("ldr\t%0, [%2]", operands); + if (low_register_operand (base, SImode)) + length += 2; + else + length += 4; + } + else + { + if (emit_p) + output_asm_insn ("ldr\t%1, [%2, %4]", operands); + if (low_register_operand (reg2, SImode) && (offset2 >= 0) + && low_register_operand (base, SImode) && (offset2 < 128)) + length += 2; + else + length += 4; + + if (emit_p) + output_asm_insn ("ldr\t%0, [%2, %3]", operands); + if (low_register_operand (base, SImode) + && (offset1 >= 0) && (offset1 < 128)) + length += 2; + else + length += 4; + } + } + else + { + if (emit_p) + output_asm_insn ("ldrd\t%0, %1, [%2, %3]", operands); + length = 4; + } + } + else /* TARGET_ARM */ + { + if ((REGNO (reg2) == (REGNO (reg1) + 1)) && ((REGNO (reg1) & 1) == 0)) + { + if (emit_p) + output_asm_insn ("ldrd\t%0, %1, [%2, %3]", operands); + length = 4; + } + else + { + if (emit_p) + { + switch (offset1) + { + case -8: + output_asm_insn ("ldm%(db%)\t%2, {%0, %1}", operands); + break; + + case -4: + output_asm_insn ("ldm%(da%)\t%2, {%0, %1}", operands); + break; + + case 0: + output_asm_insn ("ldm%(ia%)\t%2, {%0, %1}", operands); + break; + + case 4: + output_asm_insn ("ldm%(ib%)\t%2, {%0, %1}", operands); + break; + + default: + gcc_unreachable (); + } + } + length = 4; + } + } + + return length; +} + #include "gt-arm.h" Index: arm-protos.h =================================================================== --- arm-protos.h (revision 168737) +++ arm-protos.h (working copy) @@ -150,6 +150,9 @@ extern void arm_expand_sync (enum machin extern const char *arm_output_memory_barrier (rtx *); extern const char *arm_output_sync_insn (rtx, rtx *); extern unsigned int arm_sync_loop_insns (rtx , rtx *); +extern bool arm_check_ldrd_operands (rtx, rtx, rtx, rtx); +extern bool arm_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); +extern int arm_output_ldrd (rtx, rtx, rtx, rtx, rtx, bool); #if defined TREE_CODE extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); Index: ldmstm.md =================================================================== --- ldmstm.md (revision 168737) +++ ldmstm.md (working copy) @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int 4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_32BIT && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "ldm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_32BIT && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "stm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -939,7 +939,7 @@ (define_insn "*ldm2_ib" (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int 8))))])] - "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "ldm%(ib%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -965,7 +965,7 @@ (define_insn "*stm2_ib" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int 8))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "stm%(ib%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -990,7 +990,7 @@ (define_insn "*ldm2_da" (const_int -4)))) (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (match_dup 3)))])] - "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "ldm%(da%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -1015,7 +1015,7 @@ (define_insn "*stm2_da" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (match_dup 3)) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "stm%(da%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int -4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_32BIT && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "ldm%(db%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_32BIT && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "stm%(db%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -1189,3 +1189,211 @@ (define_peephole2 FAIL; }) +(define_insn "*ldrd" + [(parallel [(set (match_operand:SI 0 "arm_hard_register_operand" "") + (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "")))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" ""))))])] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], + operands[3], operands[4])" + "* + arm_output_ldrd (operands[0], operands[1], + operands[2], operands[3], operands[4], true); + return \"\"; + " + [(set (attr "length") + (symbol_ref ("arm_output_ldrd (operands[0], operands[1], operands[2], + operands[3], operands[4], false)")))] +) + +(define_insn "*ldrd_reg1" + [(parallel [(set (match_operand:SI 0 "arm_hard_register_operand" "") + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" ""))))])] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], NULL_RTX, operands[3])" + "* + arm_output_ldrd (operands[0], operands[1], + operands[2], NULL_RTX, operands[3], true); + return \"\"; + " + [(set (attr "length") + (symbol_ref ("arm_output_ldrd (operands[0], operands[1], operands[2], + NULL_RTX, operands[3], false)")))] +) + +(define_insn "*ldrd_reg2" + [(parallel [(set (match_operand:SI 0 "arm_hard_register_operand" "") + (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "")))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_dup 2)))])] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], operands[3], NULL_RTX)" + "* + arm_output_ldrd (operands[0], operands[1], + operands[2], operands[3], NULL_RTX, true); + return \"\"; + " + [(set (attr "length") + (symbol_ref ("arm_output_ldrd (operands[0], operands[1], operands[2], + operands[3], NULL_RTX, false)")))] +) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "TARGET_32BIT && arm_arch7 + && arm_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], true)" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))])] + "" +) + +(define_insn "*strd" + [(parallel [(set (mem:SI + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 0 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" ""))) + (match_operand:SI 1 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], + operands[3], operands[4])" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + HOST_WIDE_INT offset2 = INTVAL (operands[4]); + if (offset1 > offset2) + { + rtx tmp = operands[0]; operands[0] = operands[1]; operands[1] = tmp; + tmp = operands[3]; operands[3] = operands[4]; operands[4] = tmp; + offset1 = INTVAL (operands[3]); + offset2 = INTVAL (operands[4]); + } + if (TARGET_THUMB2) + return \"strd\\t%0, %1, [%2, %3]\"; + else /* TARGET_ARM */ + { + if ((REGNO (operands[1]) == (REGNO (operands[0]) + 1)) + && ((REGNO (operands[0]) & 1) == 0)) + return \"strd\\t%0, %1, [%2, %3]\"; + else if (offset1 == -8) + return \"stm%(db%)\\t%2, {%0, %1}\"; + else /* offset1 == 4 */ + return \"stm%(ib%)\\t%2, {%0, %1}\"; + } + }" + [(set_attr "length" "4")] +) + +(define_insn "*strd_reg1" + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) + (match_operand:SI 0 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 1 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], NULL_RTX, operands[3])" + "* + { + HOST_WIDE_INT offset2 = INTVAL (operands[3]); + if (TARGET_THUMB2) + { + if (offset2 == 4) + return \"strd\\t%0, %1, [%2]\"; + else + return \"strd\\t%1, %0, [%2, %3]\"; + } + else /* TARGET_ARM */ + { + if (offset2 == 4) + { + if ((REGNO (operands[1]) == (REGNO (operands[0]) + 1)) + && ((REGNO (operands[0]) & 1) == 0)) + return \"strd\\t%0, %1, [%2]\"; + else + return \"stm%(ia%)\\t%2, {%0, %1}\"; + } + else /* offset2 == -4 */ + { + if ((REGNO (operands[0]) == (REGNO (operands[1]) + 1)) + && ((REGNO (operands[1]) & 1) == 0)) + return \"strd\\t%1, %0, [%2, %3]\"; + else + return \"stm%(da%)\\t%2, {%1, %0}\"; + } + } + }" + [(set_attr "length" "4")] +) + +(define_insn "*strd_reg2" + [(parallel [(set (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 0 "arm_hard_register_operand" "")) + (set (mem:SI (match_dup 2)) + (match_operand:SI 1 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], operands[3], NULL_RTX)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + if (TARGET_THUMB2) + { + if (offset1 == -4) + return \"strd\\t%0, %1, [%2, %3]\"; + else + return \"strd\\t%1, %0, [%2]\"; + } + else /* TARGET_ARM */ + { + if (offset1 == -4) + { + if ((REGNO (operands[1]) == (REGNO (operands[0]) + 1)) + && ((REGNO (operands[0]) & 1) == 0)) + return \"strd\\t%0, %1, [%2, %3]\"; + else + return \"stm%(da%)\\t%2, {%0, %1}\"; + } + else + { + if ((REGNO (operands[0]) == (REGNO (operands[1]) + 1)) + && ((REGNO (operands[1]) & 1) == 0)) + return \"strd\\t%1, %0, [%2]\"; + else + return \"stm%(ia%)\\t%2, {%1, %0}\"; + } + } + }" + [(set_attr "length" "4")] +) + +(define_peephole2 + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))] + "TARGET_32BIT && arm_arch7 + && arm_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], false)" + [(parallel [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))])] + "" +) Index: pr40457-3.c =================================================================== --- pr40457-3.c (revision 168737) +++ pr40457-3.c (working copy) @@ -5,6 +5,7 @@ void foo(int* p) { p[0] = 1; p[1] = 0; + p[2] = 2; } /* { dg-final { scan-assembler "stm" } } */ Index: pr45335-2.c =================================================================== --- pr45335-2.c (revision 0) +++ pr45335-2.c (revision 0) @@ -0,0 +1,10 @@ +/* { dg-options "-Os -march=armv7-a" } */ +/* { dg-do compile } */ + +void foo(int a, int b, int* p) +{ + p[2] = a; + p[3] = b; +} + +/* { dg-final { scan-assembler "strd" } } */ Index: pr45335-3.c =================================================================== --- pr45335-3.c (revision 0) +++ pr45335-3.c (revision 0) @@ -0,0 +1,12 @@ +/* { dg-options "-Os -march=armv7-a" } */ +/* { dg-do compile } */ + +int foo(int a, int b, int* p, int *q) +{ + a = p[2] + p[3]; + *q = a; + *p = a; + return a; +} + +/* { dg-final { scan-assembler "ldrd" } } */ Index: pr40457-1.c =================================================================== --- pr40457-1.c (revision 168737) +++ pr40457-1.c (working copy) @@ -7,4 +7,4 @@ int bar(int* p) return x; } -/* { dg-final { scan-assembler "ldm" } } */ +/* { dg-final { scan-assembler "ldm|ldrd" } } */ Index: pr40457-2.c =================================================================== --- pr40457-2.c (revision 168737) +++ pr40457-2.c (working copy) @@ -5,6 +5,7 @@ void foo(int* p) { p[0] = 1; p[1] = 0; + p[2] = 2; } /* { dg-final { scan-assembler "stm" } } */ Index: pr45335.c =================================================================== --- pr45335.c (revision 0) +++ pr45335.c (revision 0) @@ -0,0 +1,22 @@ +/* { dg-options "-mthumb -O2" } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-final { scan-assembler "ldrd" } } */ +/* { dg-final { scan-assembler "strd" } } */ + +struct S +{ + void* p1; + void* p2; + void* p3; + void* p4; +}; + +extern printf(char*, ...); + +void foo1(struct S* fp, struct S* otherSaveArea) +{ + struct S* saveA = fp - 1; + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); +} ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-18 15:46 ` Carrot Wei @ 2011-01-27 5:42 ` Jie Zhang 2011-01-27 10:43 ` Carrot Wei 2011-03-15 9:19 ` Carrot Wei 2011-03-24 0:25 ` Mike Stump 2 siblings, 1 reply; 46+ messages in thread From: Jie Zhang @ 2011-01-27 5:42 UTC (permalink / raw) To: Carrot Wei Cc: Richard Earnshaw, Ramana Radhakrishnan, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan Hi Carrot, I just found your patch is not in a good format. See the following piece of it: On 01/18/2011 10:59 PM, Carrot Wei wrote: > + /* TARGET_ARM */ > + if (((regno1& 1) == 0)&& ((regno1 + 1) == regno2)) > /* ldrd */ > + return true; > + The "/* ldrd */" line is bad and patch reports an error for it. There are several other similar cases. So the patch can't be applied easily. Could you resend your patch, in a good format? Regards, -- Jie Zhang ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-27 5:42 ` Jie Zhang @ 2011-01-27 10:43 ` Carrot Wei 2011-01-28 9:29 ` Jie Zhang 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2011-01-27 10:43 UTC (permalink / raw) To: Jie Zhang Cc: Richard Earnshaw, Ramana Radhakrishnan, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan [-- Attachment #1: Type: text/plain, Size: 705 bytes --] Oops, gmail wrapped some lines automatically. Resend them as attachment. thanks Carrot On Thu, Jan 27, 2011 at 12:10 PM, Jie Zhang <jie@codesourcery.com> wrote: > Hi Carrot, > > I just found your patch is not in a good format. See the following piece of > it: > > On 01/18/2011 10:59 PM, Carrot Wei wrote: >> >> + /* TARGET_ARM */ >> + if (((regno1& 1) == 0)&& ((regno1 + 1) == regno2)) >> /* ldrd */ >> + return true; >> + > > The "/* ldrd */" line is bad and patch reports an error for it. There are > several other similar cases. So the patch can't be applied easily. Could you > resend your patch, in a good format? > > > Regards, > -- > Jie Zhang > [-- Attachment #2: ldrd.txt --] [-- Type: text/plain, Size: 19482 bytes --] Index: arm.c =================================================================== --- arm.c (revision 168737) +++ arm.c (working copy) @@ -23574,4 +23574,234 @@ arm_preferred_rename_class (reg_class_t return NO_REGS; } +/* Check the validity of operands in an ldrd/strd instruction. */ +bool +arm_check_ldrd_operands (rtx reg1, rtx reg2, rtx off1, rtx off2) +{ + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + int regno1 = REGNO (reg1); + int regno2 = REGNO (reg2); + HOST_WIDE_INT max_offset = 1020; + + if (TARGET_ARM) + max_offset = 255; + + if (off1 != NULL_RTX) + offset1 = INTVAL (off1); + if (off2 != NULL_RTX) + offset2 = INTVAL (off2); + + /* The offset range of LDRD is [-max_offset, max_offset]. Here we check if + both offsets lie in the range [-max_offset, max_offset+4]. If one of the + offsets is max_offset+4, the following condition + ((offset1 + 4) == offset2) + will ensure offset1 to be max_offset, suitable for instruction LDRD. */ + if ((offset1 > (max_offset + 4)) || (offset1 < -max_offset) + || ((offset1 & 3) != 0)) + return false; + if ((offset2 > (max_offset + 4)) || (offset2 < -max_offset) + || ((offset2 & 3) != 0)) + return false; + + if ((offset1 + 4) == offset2) + { + if (TARGET_THUMB2) + return true; + + /* TARGET_ARM */ + if (((regno1 & 1) == 0) && ((regno1 + 1) == regno2)) /* ldrd */ + return true; + + if ((regno1 < regno2) && ((offset1 <= 4) && (offset1 >= -8))) /* ldm */ + return true; + } + if ((offset2 + 4) == offset1) + { + if (TARGET_THUMB2) + return true; + + /* TARGET_ARM */ + if (((regno2 & 1) == 0) && ((regno2 + 1) == regno1)) /* ldrd */ + return true; + + if ((regno2 < regno1) && ((offset2 <= 4) && (offset2 >= -8))) /* ldm */ + return true; + } + + return false; +} + +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. + That is they use the same base register, and the gap between constant + offsets should be 4. */ +bool +arm_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) +{ + rtx base1, base2; + rtx offset1 = NULL_RTX; + rtx offset2 = NULL_RTX; + rtx addr1 = XEXP (mem1, 0); + rtx addr2 = XEXP (mem2, 0); + + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) + return false; + + if (REG_P (addr1)) + base1 = addr1; + else if (GET_CODE (addr1) == PLUS) + { + base1 = XEXP (addr1, 0); + offset1 = XEXP (addr1, 1); + if (!REG_P (base1) || (GET_CODE (offset1) != CONST_INT)) + return false; + } + else + return false; + + if (REG_P (addr2)) + base2 = addr2; + else if (GET_CODE (addr2) == PLUS) + { + base2 = XEXP (addr2, 0); + offset2 = XEXP (addr2, 1); + if (!REG_P (base2) || (GET_CODE (offset2) != CONST_INT)) + return false; + } + else + return false; + + if (base1 != base2) + return false; + + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) + return false; + + return arm_check_ldrd_operands (reg1, reg2, offset1, offset2); +} + +/* Output instructions for ldrd and count the number of bytes has been + outputted. Do not actually output instructions if EMIT_P is false. */ +int +arm_output_ldrd (rtx reg1, rtx reg2, rtx base, rtx off1, rtx off2, bool emit_p) +{ + int length = 0; + rtx operands[5]; + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (off1 != NULL_RTX) + offset1 = INTVAL (off1); + else + off1 = GEN_INT (0); + if (off2 != NULL_RTX) + offset2 = INTVAL (off2); + else + off2 = GEN_INT (0); + if (offset1 > offset2) + { + rtx tmp; + HOST_WIDE_INT t = offset1; offset1 = offset2; offset2 = t; + tmp = off1; off1 = off2; off2 = tmp; + tmp = reg1; reg1 = reg2; reg2 = tmp; + } + + operands[0] = reg1; + operands[1] = reg2; + operands[2] = base; + operands[3] = off1; + operands[4] = off2; + + if (TARGET_THUMB2) + { + if (fix_cm3_ldrd && (base == reg1)) + { + if (offset1 <= -256) + { + if (emit_p) + output_asm_insn ("sub\t%2, %2, %n3", operands); + length = 4; + + if (emit_p) + output_asm_insn ("ldr\t%1, [%2, #4]", operands); + if (low_register_operand (reg2, SImode) + && low_register_operand (base, SImode)) + length += 2; + else + length += 4; + + if (emit_p) + output_asm_insn ("ldr\t%0, [%2]", operands); + if (low_register_operand (base, SImode)) + length += 2; + else + length += 4; + } + else + { + if (emit_p) + output_asm_insn ("ldr\t%1, [%2, %4]", operands); + if (low_register_operand (reg2, SImode) && (offset2 >= 0) + && low_register_operand (base, SImode) && (offset2 < 128)) + length += 2; + else + length += 4; + + if (emit_p) + output_asm_insn ("ldr\t%0, [%2, %3]", operands); + if (low_register_operand (base, SImode) + && (offset1 >= 0) && (offset1 < 128)) + length += 2; + else + length += 4; + } + } + else + { + if (emit_p) + output_asm_insn ("ldrd\t%0, %1, [%2, %3]", operands); + length = 4; + } + } + else /* TARGET_ARM */ + { + if ((REGNO (reg2) == (REGNO (reg1) + 1)) && ((REGNO (reg1) & 1) == 0)) + { + if (emit_p) + output_asm_insn ("ldrd\t%0, %1, [%2, %3]", operands); + length = 4; + } + else + { + if (emit_p) + { + switch (offset1) + { + case -8: + output_asm_insn ("ldm%(db%)\t%2, {%0, %1}", operands); + break; + + case -4: + output_asm_insn ("ldm%(da%)\t%2, {%0, %1}", operands); + break; + + case 0: + output_asm_insn ("ldm%(ia%)\t%2, {%0, %1}", operands); + break; + + case 4: + output_asm_insn ("ldm%(ib%)\t%2, {%0, %1}", operands); + break; + + default: + gcc_unreachable (); + } + } + length = 4; + } + } + + return length; +} + #include "gt-arm.h" Index: arm-protos.h =================================================================== --- arm-protos.h (revision 168737) +++ arm-protos.h (working copy) @@ -150,6 +150,9 @@ extern void arm_expand_sync (enum machin extern const char *arm_output_memory_barrier (rtx *); extern const char *arm_output_sync_insn (rtx, rtx *); extern unsigned int arm_sync_loop_insns (rtx , rtx *); +extern bool arm_check_ldrd_operands (rtx, rtx, rtx, rtx); +extern bool arm_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); +extern int arm_output_ldrd (rtx, rtx, rtx, rtx, rtx, bool); #if defined TREE_CODE extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); Index: ldmstm.md =================================================================== --- ldmstm.md (revision 168737) +++ ldmstm.md (working copy) @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia" (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int 4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_32BIT && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "ldm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -901,7 +901,7 @@ (define_insn "*stm2_ia" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_32BIT && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "stm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -939,7 +939,7 @@ (define_insn "*ldm2_ib" (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int 8))))])] - "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "ldm%(ib%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -965,7 +965,7 @@ (define_insn "*stm2_ib" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int 8))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "stm%(ib%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -990,7 +990,7 @@ (define_insn "*ldm2_da" (const_int -4)))) (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (match_dup 3)))])] - "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "ldm%(da%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -1015,7 +1015,7 @@ (define_insn "*stm2_da" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (match_dup 3)) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "stm%(da%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db" (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int -4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_32BIT && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "ldm%(db%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db" (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_32BIT && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "stm%(db%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -1189,3 +1189,211 @@ (define_peephole2 FAIL; }) +(define_insn "*ldrd" + [(parallel [(set (match_operand:SI 0 "arm_hard_register_operand" "") + (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "")))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" ""))))])] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], + operands[3], operands[4])" + "* + arm_output_ldrd (operands[0], operands[1], + operands[2], operands[3], operands[4], true); + return \"\"; + " + [(set (attr "length") + (symbol_ref ("arm_output_ldrd (operands[0], operands[1], operands[2], + operands[3], operands[4], false)")))] +) + +(define_insn "*ldrd_reg1" + [(parallel [(set (match_operand:SI 0 "arm_hard_register_operand" "") + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" ""))))])] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], NULL_RTX, operands[3])" + "* + arm_output_ldrd (operands[0], operands[1], + operands[2], NULL_RTX, operands[3], true); + return \"\"; + " + [(set (attr "length") + (symbol_ref ("arm_output_ldrd (operands[0], operands[1], operands[2], + NULL_RTX, operands[3], false)")))] +) + +(define_insn "*ldrd_reg2" + [(parallel [(set (match_operand:SI 0 "arm_hard_register_operand" "") + (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "")))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_dup 2)))])] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], operands[3], NULL_RTX)" + "* + arm_output_ldrd (operands[0], operands[1], + operands[2], operands[3], NULL_RTX, true); + return \"\"; + " + [(set (attr "length") + (symbol_ref ("arm_output_ldrd (operands[0], operands[1], operands[2], + operands[3], NULL_RTX, false)")))] +) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "TARGET_32BIT && arm_arch7 + && arm_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], true)" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))])] + "" +) + +(define_insn "*strd" + [(parallel [(set (mem:SI + (plus:SI (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 0 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" ""))) + (match_operand:SI 1 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], + operands[3], operands[4])" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + HOST_WIDE_INT offset2 = INTVAL (operands[4]); + if (offset1 > offset2) + { + rtx tmp = operands[0]; operands[0] = operands[1]; operands[1] = tmp; + tmp = operands[3]; operands[3] = operands[4]; operands[4] = tmp; + offset1 = INTVAL (operands[3]); + offset2 = INTVAL (operands[4]); + } + if (TARGET_THUMB2) + return \"strd\\t%0, %1, [%2, %3]\"; + else /* TARGET_ARM */ + { + if ((REGNO (operands[1]) == (REGNO (operands[0]) + 1)) + && ((REGNO (operands[0]) & 1) == 0)) + return \"strd\\t%0, %1, [%2, %3]\"; + else if (offset1 == -8) + return \"stm%(db%)\\t%2, {%0, %1}\"; + else /* offset1 == 4 */ + return \"stm%(ib%)\\t%2, {%0, %1}\"; + } + }" + [(set_attr "length" "4")] +) + +(define_insn "*strd_reg1" + [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) + (match_operand:SI 0 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 1 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], NULL_RTX, operands[3])" + "* + { + HOST_WIDE_INT offset2 = INTVAL (operands[3]); + if (TARGET_THUMB2) + { + if (offset2 == 4) + return \"strd\\t%0, %1, [%2]\"; + else + return \"strd\\t%1, %0, [%2, %3]\"; + } + else /* TARGET_ARM */ + { + if (offset2 == 4) + { + if ((REGNO (operands[1]) == (REGNO (operands[0]) + 1)) + && ((REGNO (operands[0]) & 1) == 0)) + return \"strd\\t%0, %1, [%2]\"; + else + return \"stm%(ia%)\\t%2, {%0, %1}\"; + } + else /* offset2 == -4 */ + { + if ((REGNO (operands[0]) == (REGNO (operands[1]) + 1)) + && ((REGNO (operands[1]) & 1) == 0)) + return \"strd\\t%1, %0, [%2, %3]\"; + else + return \"stm%(da%)\\t%2, {%1, %0}\"; + } + } + }" + [(set_attr "length" "4")] +) + +(define_insn "*strd_reg2" + [(parallel [(set (mem:SI (plus:SI + (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 0 "arm_hard_register_operand" "")) + (set (mem:SI (match_dup 2)) + (match_operand:SI 1 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], operands[3], NULL_RTX)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + if (TARGET_THUMB2) + { + if (offset1 == -4) + return \"strd\\t%0, %1, [%2, %3]\"; + else + return \"strd\\t%1, %0, [%2]\"; + } + else /* TARGET_ARM */ + { + if (offset1 == -4) + { + if ((REGNO (operands[1]) == (REGNO (operands[0]) + 1)) + && ((REGNO (operands[0]) & 1) == 0)) + return \"strd\\t%0, %1, [%2, %3]\"; + else + return \"stm%(da%)\\t%2, {%0, %1}\"; + } + else + { + if ((REGNO (operands[0]) == (REGNO (operands[1]) + 1)) + && ((REGNO (operands[1]) & 1) == 0)) + return \"strd\\t%1, %0, [%2]\"; + else + return \"stm%(ia%)\\t%2, {%1, %0}\"; + } + } + }" + [(set_attr "length" "4")] +) + +(define_peephole2 + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))] + "TARGET_32BIT && arm_arch7 + && arm_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], false)" + [(parallel [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))])] + "" +) Index: pr40457-3.c =================================================================== --- pr40457-3.c (revision 168737) +++ pr40457-3.c (working copy) @@ -5,6 +5,7 @@ void foo(int* p) { p[0] = 1; p[1] = 0; + p[2] = 2; } /* { dg-final { scan-assembler "stm" } } */ Index: pr45335-2.c =================================================================== --- pr45335-2.c (revision 0) +++ pr45335-2.c (revision 0) @@ -0,0 +1,10 @@ +/* { dg-options "-Os -march=armv7-a" } */ +/* { dg-do compile } */ + +void foo(int a, int b, int* p) +{ + p[2] = a; + p[3] = b; +} + +/* { dg-final { scan-assembler "strd" } } */ Index: pr45335-3.c =================================================================== --- pr45335-3.c (revision 0) +++ pr45335-3.c (revision 0) @@ -0,0 +1,12 @@ +/* { dg-options "-Os -march=armv7-a" } */ +/* { dg-do compile } */ + +int foo(int a, int b, int* p, int *q) +{ + a = p[2] + p[3]; + *q = a; + *p = a; + return a; +} + +/* { dg-final { scan-assembler "ldrd" } } */ Index: pr40457-1.c =================================================================== --- pr40457-1.c (revision 168737) +++ pr40457-1.c (working copy) @@ -7,4 +7,4 @@ int bar(int* p) return x; } -/* { dg-final { scan-assembler "ldm" } } */ +/* { dg-final { scan-assembler "ldm|ldrd" } } */ Index: pr40457-2.c =================================================================== --- pr40457-2.c (revision 168737) +++ pr40457-2.c (working copy) @@ -5,6 +5,7 @@ void foo(int* p) { p[0] = 1; p[1] = 0; + p[2] = 2; } /* { dg-final { scan-assembler "stm" } } */ Index: pr45335.c =================================================================== --- pr45335.c (revision 0) +++ pr45335.c (revision 0) @@ -0,0 +1,22 @@ +/* { dg-options "-mthumb -O2" } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-final { scan-assembler "ldrd" } } */ +/* { dg-final { scan-assembler "strd" } } */ + +struct S +{ + void* p1; + void* p2; + void* p3; + void* p4; +}; + +extern printf(char*, ...); + +void foo1(struct S* fp, struct S* otherSaveArea) +{ + struct S* saveA = fp - 1; + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); +} ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-27 10:43 ` Carrot Wei @ 2011-01-28 9:29 ` Jie Zhang 2011-01-28 11:19 ` Carrot Wei 0 siblings, 1 reply; 46+ messages in thread From: Jie Zhang @ 2011-01-28 9:29 UTC (permalink / raw) To: Carrot Wei Cc: Richard Earnshaw, Ramana Radhakrishnan, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan Hi Carrot, On 01/27/2011 05:27 PM, Carrot Wei wrote: > Oops, gmail wrapped some lines automatically. > Resend them as attachment. > Thanks! I found another (non-technical) issue in your patch. $ grep "^Index" ldrd.txt Index: arm.c Index: arm-protos.h Index: ldmstm.md Index: pr40457-3.c Index: pr45335-2.c Index: pr45335-3.c Index: pr40457-1.c Index: pr40457-2.c Index: pr45335.c the first three files are in gcc/config/arm/ while the other files I believe belong to gcc/testsuite/gcc.target/arm/. I don't know why your patch does not contains those directory information. If you do "svn diff" under gcc/, the patch should contain such directory information. Regards, Jie ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-28 9:29 ` Jie Zhang @ 2011-01-28 11:19 ` Carrot Wei 2011-01-28 12:16 ` Jie Zhang 0 siblings, 1 reply; 46+ messages in thread From: Carrot Wei @ 2011-01-28 11:19 UTC (permalink / raw) To: Jie Zhang Cc: Richard Earnshaw, Ramana Radhakrishnan, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan I did "svn diff" in gcc/testsuite/gcc.target/arm/ and gcc/config/arm/ separately and concatenate the results together. So you need to break them and apply the patches separately. On Fri, Jan 28, 2011 at 11:55 AM, Jie Zhang <jie@codesourcery.com> wrote: > Hi Carrot, > > On 01/27/2011 05:27 PM, Carrot Wei wrote: >> >> Oops, gmail wrapped some lines automatically. >> Resend them as attachment. >> > Thanks! I found another (non-technical) issue in your patch. > > $ grep "^Index" ldrd.txt > Index: arm.c > Index: arm-protos.h > Index: ldmstm.md > Index: pr40457-3.c > Index: pr45335-2.c > Index: pr45335-3.c > Index: pr40457-1.c > Index: pr40457-2.c > Index: pr45335.c > > the first three files are in gcc/config/arm/ while the other files I believe > belong to gcc/testsuite/gcc.target/arm/. I don't know why your patch does > not contains those directory information. If you do "svn diff" under gcc/, > the patch should contain such directory information. > > > Regards, > Jie > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-28 11:19 ` Carrot Wei @ 2011-01-28 12:16 ` Jie Zhang 0 siblings, 0 replies; 46+ messages in thread From: Jie Zhang @ 2011-01-28 12:16 UTC (permalink / raw) To: Carrot Wei Cc: Richard Earnshaw, Ramana Radhakrishnan, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan On 01/28/2011 05:29 PM, Carrot Wei wrote: > On Fri, Jan 28, 2011 at 11:55 AM, Jie Zhang<jie@codesourcery.com> wrote: >> Hi Carrot, >> >> On 01/27/2011 05:27 PM, Carrot Wei wrote: >>> >>> Oops, gmail wrapped some lines automatically. >>> Resend them as attachment. >>> >> Thanks! I found another (non-technical) issue in your patch. >> >> $ grep "^Index" ldrd.txt >> Index: arm.c >> Index: arm-protos.h >> Index: ldmstm.md >> Index: pr40457-3.c >> Index: pr45335-2.c >> Index: pr45335-3.c >> Index: pr40457-1.c >> Index: pr40457-2.c >> Index: pr45335.c >> >> the first three files are in gcc/config/arm/ while the other files I believe >> belong to gcc/testsuite/gcc.target/arm/. I don't know why your patch does >> not contains those directory information. If you do "svn diff" under gcc/, >> the patch should contain such directory information. >> > I did "svn diff" in gcc/testsuite/gcc.target/arm/ and gcc/config/arm/ > separately and concatenate the results together. So you need to break > them and apply the patches separately. > This makes it difficult for people to apply and try out our patch. You can just make the patch by running "svn diff" from gcc/ once. Btw, please don't top reply. Regards, -- Jie Zhang ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-18 15:46 ` Carrot Wei 2011-01-27 5:42 ` Jie Zhang @ 2011-03-15 9:19 ` Carrot Wei 2011-03-24 0:25 ` Mike Stump 2 siblings, 0 replies; 46+ messages in thread From: Carrot Wei @ 2011-03-15 9:19 UTC (permalink / raw) To: Richard Earnshaw Cc: Ramana Radhakrishnan, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan The trunk is opened again, could any maintainers continue to review this patch? thanks Carrot On Tue, Jan 18, 2011 at 10:59 PM, Carrot Wei <carrot@google.com> wrote: > Ramana's method is to put the instruction output and counting in on place. > So it's easy to keep them synchronized. > > My latest version of patch did the following modifications compared to > the earlier version: Added support of arm ldrd/strd instructions. Added length > attribute to insn patterns. Moved the insn patterns to file ldmstm.md. > > It has passed the dejagnu testing on arm qemu. > > thanks > Carrot ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-18 15:46 ` Carrot Wei 2011-01-27 5:42 ` Jie Zhang 2011-03-15 9:19 ` Carrot Wei @ 2011-03-24 0:25 ` Mike Stump 2011-03-29 10:18 ` Carrot Wei 2 siblings, 1 reply; 46+ messages in thread From: Mike Stump @ 2011-03-24 0:25 UTC (permalink / raw) To: Carrot Wei Cc: Richard Earnshaw, Ramana Radhakrishnan, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan On Jan 18, 2011, at 6:59 AM, Carrot Wei wrote: > +(define_insn "*ldrd" > + [(parallel [(set (match_operand:SI 0 "arm_hard_register_operand" "") parallel is implicit, you can safely remove it from all define_insns. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-03-24 0:25 ` Mike Stump @ 2011-03-29 10:18 ` Carrot Wei 0 siblings, 0 replies; 46+ messages in thread From: Carrot Wei @ 2011-03-29 10:18 UTC (permalink / raw) To: Mike Stump Cc: Richard Earnshaw, Ramana Radhakrishnan, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan [-- Attachment #1: Type: text/plain, Size: 1644 bytes --] Thank you for the knowledge. I've updated the insn patterns accordingly. Again tested on arm qemu. thanks Carrot ChangeLog: 2011-03-29 Wei Guozhi <carrot@google.com> PR target/45335 * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_ib, stm2_ib, ldm2_da, stm2_da, ldm2_db, stm2_db): Add condition !arm_arch7 to these insns. (ldrd, ldrd_reg1, ldrd_reg2 and peephole2): New insn patterns and related peephole2. (strd, strd_reg1, strd_reg2 and peephole2): New insn patterns and related peephole2. * gcc/config/arm/arm-protos.h (arm_check_ldrd_operands): New prototype. (arm_legitimate_ldrd_p): New prototype. (arm_output_ldrd): New prototype. * gcc/config/arm/arm.c (arm_check_ldrd_operands): New function. (arm_legitimate_ldrd_p): New function. (arm_output_ldrd): New function. 2011-03-29 Wei Guozhi <carrot@google.com> PR target/45335 * gcc.target/arm/pr45335.c: New test. * gcc.target/arm/pr45335-2.c: New test. * gcc.target/arm/pr45335-3.c: New test. * gcc.target/arm/pr40457-1.c: Add another possible output "ldrd". * gcc.target/arm/pr40457-2.c: Changed to store 3 words. * gcc.target/arm/pr40457-3.c: Changed to store 3 words. On Thu, Mar 24, 2011 at 8:25 AM, Mike Stump <mikestump@comcast.net> wrote: > On Jan 18, 2011, at 6:59 AM, Carrot Wei wrote: >> +(define_insn "*ldrd" >> + [(parallel [(set (match_operand:SI 0 "arm_hard_register_operand" "") > > parallel is implicit, you can safely remove it from all define_insns. > [-- Attachment #2: pr45335.txt --] [-- Type: text/plain, Size: 19643 bytes --] Index: testsuite/gcc.target/arm/pr40457-3.c =================================================================== --- testsuite/gcc.target/arm/pr40457-3.c (revision 171439) +++ testsuite/gcc.target/arm/pr40457-3.c (working copy) @@ -5,6 +5,7 @@ void foo(int* p) { p[0] = 1; p[1] = 0; + p[2] = 2; } /* { dg-final { scan-assembler "stm" } } */ Index: testsuite/gcc.target/arm/pr45335-2.c =================================================================== --- testsuite/gcc.target/arm/pr45335-2.c (revision 0) +++ testsuite/gcc.target/arm/pr45335-2.c (revision 0) @@ -0,0 +1,10 @@ +/* { dg-options "-Os -march=armv7-a" } */ +/* { dg-do compile } */ + +void foo(int a, int b, int* p) +{ + p[2] = a; + p[3] = b; +} + +/* { dg-final { scan-assembler "strd" } } */ Index: testsuite/gcc.target/arm/pr45335-3.c =================================================================== --- testsuite/gcc.target/arm/pr45335-3.c (revision 0) +++ testsuite/gcc.target/arm/pr45335-3.c (revision 0) @@ -0,0 +1,12 @@ +/* { dg-options "-Os -march=armv7-a" } */ +/* { dg-do compile } */ + +int foo(int a, int b, int* p, int *q) +{ + a = p[2] + p[3]; + *q = a; + *p = a; + return a; +} + +/* { dg-final { scan-assembler "ldrd" } } */ Index: testsuite/gcc.target/arm/pr40457-1.c =================================================================== --- testsuite/gcc.target/arm/pr40457-1.c (revision 171439) +++ testsuite/gcc.target/arm/pr40457-1.c (working copy) @@ -7,4 +7,4 @@ int bar(int* p) return x; } -/* { dg-final { scan-assembler "ldm" } } */ +/* { dg-final { scan-assembler "ldm|ldrd" } } */ Index: testsuite/gcc.target/arm/pr40457-2.c =================================================================== --- testsuite/gcc.target/arm/pr40457-2.c (revision 171439) +++ testsuite/gcc.target/arm/pr40457-2.c (working copy) @@ -5,6 +5,7 @@ void foo(int* p) { p[0] = 1; p[1] = 0; + p[2] = 2; } /* { dg-final { scan-assembler "stm" } } */ Index: testsuite/gcc.target/arm/pr45335.c =================================================================== --- testsuite/gcc.target/arm/pr45335.c (revision 0) +++ testsuite/gcc.target/arm/pr45335.c (revision 0) @@ -0,0 +1,22 @@ +/* { dg-options "-mthumb -O2" } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-final { scan-assembler "ldrd" } } */ +/* { dg-final { scan-assembler "strd" } } */ + +struct S +{ + void* p1; + void* p2; + void* p3; + void* p4; +}; + +extern printf(char*, ...); + +void foo1(struct S* fp, struct S* otherSaveArea) +{ + struct S* saveA = fp - 1; + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); +} Index: config/arm/arm.c =================================================================== --- config/arm/arm.c (revision 171439) +++ config/arm/arm.c (working copy) @@ -23681,4 +23681,234 @@ arm_preferred_rename_class (reg_class_t return NO_REGS; } +/* Check the validity of operands in an ldrd/strd instruction. */ +bool +arm_check_ldrd_operands (rtx reg1, rtx reg2, rtx off1, rtx off2) +{ + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + int regno1 = REGNO (reg1); + int regno2 = REGNO (reg2); + HOST_WIDE_INT max_offset = 1020; + + if (TARGET_ARM) + max_offset = 255; + + if (off1 != NULL_RTX) + offset1 = INTVAL (off1); + if (off2 != NULL_RTX) + offset2 = INTVAL (off2); + + /* The offset range of LDRD is [-max_offset, max_offset]. Here we check if + both offsets lie in the range [-max_offset, max_offset+4]. If one of the + offsets is max_offset+4, the following condition + ((offset1 + 4) == offset2) + will ensure offset1 to be max_offset, suitable for instruction LDRD. */ + if ((offset1 > (max_offset + 4)) || (offset1 < -max_offset) + || ((offset1 & 3) != 0)) + return false; + if ((offset2 > (max_offset + 4)) || (offset2 < -max_offset) + || ((offset2 & 3) != 0)) + return false; + + if ((offset1 + 4) == offset2) + { + if (TARGET_THUMB2) + return true; + + /* TARGET_ARM */ + if (((regno1 & 1) == 0) && ((regno1 + 1) == regno2)) /* ldrd */ + return true; + + if ((regno1 < regno2) && ((offset1 <= 4) && (offset1 >= -8))) /* ldm */ + return true; + } + if ((offset2 + 4) == offset1) + { + if (TARGET_THUMB2) + return true; + + /* TARGET_ARM */ + if (((regno2 & 1) == 0) && ((regno2 + 1) == regno1)) /* ldrd */ + return true; + + if ((regno2 < regno1) && ((offset2 <= 4) && (offset2 >= -8))) /* ldm */ + return true; + } + + return false; +} + +/* Check if the two memory accesses can be merged to an ldrd/strd instruction. + That is they use the same base register, and the gap between constant + offsets should be 4. */ +bool +arm_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd) +{ + rtx base1, base2; + rtx offset1 = NULL_RTX; + rtx offset2 = NULL_RTX; + rtx addr1 = XEXP (mem1, 0); + rtx addr2 = XEXP (mem2, 0); + + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) + return false; + + if (REG_P (addr1)) + base1 = addr1; + else if (GET_CODE (addr1) == PLUS) + { + base1 = XEXP (addr1, 0); + offset1 = XEXP (addr1, 1); + if (!REG_P (base1) || (GET_CODE (offset1) != CONST_INT)) + return false; + } + else + return false; + + if (REG_P (addr2)) + base2 = addr2; + else if (GET_CODE (addr2) == PLUS) + { + base2 = XEXP (addr2, 0); + offset2 = XEXP (addr2, 1); + if (!REG_P (base2) || (GET_CODE (offset2) != CONST_INT)) + return false; + } + else + return false; + + if (base1 != base2) + return false; + + if (ldrd && ((reg1 == reg2) || (reg1 == base1))) + return false; + + return arm_check_ldrd_operands (reg1, reg2, offset1, offset2); +} + +/* Output instructions for ldrd and count the number of bytes has been + outputted. Do not actually output instructions if EMIT_P is false. */ +int +arm_output_ldrd (rtx reg1, rtx reg2, rtx base, rtx off1, rtx off2, bool emit_p) +{ + int length = 0; + rtx operands[5]; + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + if (off1 != NULL_RTX) + offset1 = INTVAL (off1); + else + off1 = GEN_INT (0); + if (off2 != NULL_RTX) + offset2 = INTVAL (off2); + else + off2 = GEN_INT (0); + if (offset1 > offset2) + { + rtx tmp; + HOST_WIDE_INT t = offset1; offset1 = offset2; offset2 = t; + tmp = off1; off1 = off2; off2 = tmp; + tmp = reg1; reg1 = reg2; reg2 = tmp; + } + + operands[0] = reg1; + operands[1] = reg2; + operands[2] = base; + operands[3] = off1; + operands[4] = off2; + + if (TARGET_THUMB2) + { + if (fix_cm3_ldrd && (base == reg1)) + { + if (offset1 <= -256) + { + if (emit_p) + output_asm_insn ("sub\t%2, %2, %n3", operands); + length = 4; + + if (emit_p) + output_asm_insn ("ldr\t%1, [%2, #4]", operands); + if (low_register_operand (reg2, SImode) + && low_register_operand (base, SImode)) + length += 2; + else + length += 4; + + if (emit_p) + output_asm_insn ("ldr\t%0, [%2]", operands); + if (low_register_operand (base, SImode)) + length += 2; + else + length += 4; + } + else + { + if (emit_p) + output_asm_insn ("ldr\t%1, [%2, %4]", operands); + if (low_register_operand (reg2, SImode) && (offset2 >= 0) + && low_register_operand (base, SImode) && (offset2 < 128)) + length += 2; + else + length += 4; + + if (emit_p) + output_asm_insn ("ldr\t%0, [%2, %3]", operands); + if (low_register_operand (base, SImode) + && (offset1 >= 0) && (offset1 < 128)) + length += 2; + else + length += 4; + } + } + else + { + if (emit_p) + output_asm_insn ("ldrd\t%0, %1, [%2, %3]", operands); + length = 4; + } + } + else /* TARGET_ARM */ + { + if ((REGNO (reg2) == (REGNO (reg1) + 1)) && ((REGNO (reg1) & 1) == 0)) + { + if (emit_p) + output_asm_insn ("ldrd\t%0, %1, [%2, %3]", operands); + length = 4; + } + else + { + if (emit_p) + { + switch (offset1) + { + case -8: + output_asm_insn ("ldm%(db%)\t%2, {%0, %1}", operands); + break; + + case -4: + output_asm_insn ("ldm%(da%)\t%2, {%0, %1}", operands); + break; + + case 0: + output_asm_insn ("ldm%(ia%)\t%2, {%0, %1}", operands); + break; + + case 4: + output_asm_insn ("ldm%(ib%)\t%2, {%0, %1}", operands); + break; + + default: + gcc_unreachable (); + } + } + length = 4; + } + } + + return length; +} + #include "gt-arm.h" Index: config/arm/arm-protos.h =================================================================== --- config/arm/arm-protos.h (revision 171439) +++ config/arm/arm-protos.h (working copy) @@ -152,6 +152,9 @@ extern void arm_expand_sync (enum machin extern const char *arm_output_memory_barrier (rtx *); extern const char *arm_output_sync_insn (rtx, rtx *); extern unsigned int arm_sync_loop_insns (rtx , rtx *); +extern bool arm_check_ldrd_operands (rtx, rtx, rtx, rtx); +extern bool arm_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool); +extern int arm_output_ldrd (rtx, rtx, rtx, rtx, rtx, bool); #if defined TREE_CODE extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); Index: config/arm/ldmstm.md =================================================================== --- config/arm/ldmstm.md (revision 171439) +++ config/arm/ldmstm.md (working copy) @@ -852,7 +852,7 @@ (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int 4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_32BIT && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "ldm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -901,7 +901,7 @@ (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_32BIT && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "stm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -939,7 +939,7 @@ (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int 8))))])] - "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "ldm%(ib%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -965,7 +965,7 @@ (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int 8))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "stm%(ib%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -990,7 +990,7 @@ (const_int -4)))) (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (match_dup 3)))])] - "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "ldm%(da%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -1015,7 +1015,7 @@ (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (match_dup 3)) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "TARGET_ARM && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "stm%(da%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -1041,7 +1041,7 @@ (set (match_operand:SI 2 "arm_hard_register_operand" "") (mem:SI (plus:SI (match_dup 3) (const_int -4))))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_32BIT && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "ldm%(db%)\t%3, {%1, %2}" [(set_attr "type" "load2") (set_attr "predicable" "yes")]) @@ -1067,7 +1067,7 @@ (match_operand:SI 1 "arm_hard_register_operand" "")) (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) (match_operand:SI 2 "arm_hard_register_operand" ""))])] - "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "TARGET_32BIT && !arm_arch7 && XVECLEN (operands[0], 0) == 2" "stm%(db%)\t%3, {%1, %2}" [(set_attr "type" "store2") (set_attr "predicable" "yes")]) @@ -1189,3 +1189,207 @@ FAIL; }) +(define_insn "*ldrd" + [(set (match_operand:SI 0 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "")))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" ""))))] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], + operands[3], operands[4])" + "* + arm_output_ldrd (operands[0], operands[1], + operands[2], operands[3], operands[4], true); + return \"\"; + " + [(set (attr "length") + (symbol_ref ("arm_output_ldrd (operands[0], operands[1], operands[2], + operands[3], operands[4], false)")))] +) + +(define_insn "*ldrd_reg1" + [(set (match_operand:SI 0 "arm_hard_register_operand" "") + (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" ""))))] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], NULL_RTX, operands[3])" + "* + arm_output_ldrd (operands[0], operands[1], + operands[2], NULL_RTX, operands[3], true); + return \"\"; + " + [(set (attr "length") + (symbol_ref ("arm_output_ldrd (operands[0], operands[1], operands[2], + NULL_RTX, operands[3], false)")))] +) + +(define_insn "*ldrd_reg2" + [(set (match_operand:SI 0 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" "")))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_dup 2)))] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], operands[3], NULL_RTX)" + "* + arm_output_ldrd (operands[0], operands[1], + operands[2], operands[3], NULL_RTX, true); + return \"\"; + " + [(set (attr "length") + (symbol_ref ("arm_output_ldrd (operands[0], operands[1], operands[2], + operands[3], NULL_RTX, false)")))] +) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "TARGET_32BIT && arm_arch7 + && arm_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], true)" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))])] + "" +) + +(define_insn "*strd" + [(set (mem:SI (plus:SI (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 0 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 4 "const_int_operand" ""))) + (match_operand:SI 1 "arm_hard_register_operand" ""))] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], + operands[3], operands[4])" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + HOST_WIDE_INT offset2 = INTVAL (operands[4]); + if (offset1 > offset2) + { + rtx tmp = operands[0]; operands[0] = operands[1]; operands[1] = tmp; + tmp = operands[3]; operands[3] = operands[4]; operands[4] = tmp; + offset1 = INTVAL (operands[3]); + offset2 = INTVAL (operands[4]); + } + if (TARGET_THUMB2) + return \"strd\\t%0, %1, [%2, %3]\"; + else /* TARGET_ARM */ + { + if ((REGNO (operands[1]) == (REGNO (operands[0]) + 1)) + && ((REGNO (operands[0]) & 1) == 0)) + return \"strd\\t%0, %1, [%2, %3]\"; + else if (offset1 == -8) + return \"stm%(db%)\\t%2, {%0, %1}\"; + else /* offset1 == 4 */ + return \"stm%(ib%)\\t%2, {%0, %1}\"; + } + }" + [(set_attr "length" "4")] +) + +(define_insn "*strd_reg1" + [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk")) + (match_operand:SI 0 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 2) + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 1 "arm_hard_register_operand" ""))] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], NULL_RTX, operands[3])" + "* + { + HOST_WIDE_INT offset2 = INTVAL (operands[3]); + if (TARGET_THUMB2) + { + if (offset2 == 4) + return \"strd\\t%0, %1, [%2]\"; + else + return \"strd\\t%1, %0, [%2, %3]\"; + } + else /* TARGET_ARM */ + { + if (offset2 == 4) + { + if ((REGNO (operands[1]) == (REGNO (operands[0]) + 1)) + && ((REGNO (operands[0]) & 1) == 0)) + return \"strd\\t%0, %1, [%2]\"; + else + return \"stm%(ia%)\\t%2, {%0, %1}\"; + } + else /* offset2 == -4 */ + { + if ((REGNO (operands[0]) == (REGNO (operands[1]) + 1)) + && ((REGNO (operands[1]) & 1) == 0)) + return \"strd\\t%1, %0, [%2, %3]\"; + else + return \"stm%(da%)\\t%2, {%1, %0}\"; + } + } + }" + [(set_attr "length" "4")] +) + +(define_insn "*strd_reg2" + [(set (mem:SI (plus:SI (match_operand:SI 2 "s_register_operand" "rk") + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 0 "arm_hard_register_operand" "")) + (set (mem:SI (match_dup 2)) + (match_operand:SI 1 "arm_hard_register_operand" ""))] + "TARGET_32BIT && arm_arch7 + && arm_check_ldrd_operands (operands[0], operands[1], operands[3], NULL_RTX)" + "* + { + HOST_WIDE_INT offset1 = INTVAL (operands[3]); + if (TARGET_THUMB2) + { + if (offset1 == -4) + return \"strd\\t%0, %1, [%2, %3]\"; + else + return \"strd\\t%1, %0, [%2]\"; + } + else /* TARGET_ARM */ + { + if (offset1 == -4) + { + if ((REGNO (operands[1]) == (REGNO (operands[0]) + 1)) + && ((REGNO (operands[0]) & 1) == 0)) + return \"strd\\t%0, %1, [%2, %3]\"; + else + return \"stm%(da%)\\t%2, {%0, %1}\"; + } + else + { + if ((REGNO (operands[0]) == (REGNO (operands[1]) + 1)) + && ((REGNO (operands[1]) & 1) == 0)) + return \"strd\\t%1, %0, [%2]\"; + else + return \"stm%(ia%)\\t%2, {%1, %0}\"; + } + } + }" + [(set_attr "length" "4")] +) + +(define_peephole2 + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))] + "TARGET_32BIT && arm_arch7 + && arm_legitimate_ldrd_p (operands[0], operands[1], + operands[2], operands[3], false)" + [(parallel [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))])] + "" +) ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-01-13 10:45 ` Carrot Wei 2011-01-13 11:12 ` Richard Earnshaw 2011-01-13 11:15 ` Ramana Radhakrishnan @ 2011-01-13 17:19 ` Mike Stump 2 siblings, 0 replies; 46+ messages in thread From: Mike Stump @ 2011-01-13 17:19 UTC (permalink / raw) To: Carrot Wei Cc: Richard Earnshaw, Paul Brook, Nick Clifton, gcc-patches, ramana.radhakrishnan On Jan 13, 2011, at 1:27 AM, Carrot Wei wrote: > One question about the attribute length. It looks the attribute > expression is not very powerful Check out i386/i386.md... Take a look at nops and *jcc_1 for example. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words
@ 2011-06-07 9:44 Nick Clifton
2011-06-14 2:22 ` Ramana Radhakrishnan
0 siblings, 1 reply; 46+ messages in thread
From: Nick Clifton @ 2011-06-07 9:44 UTC (permalink / raw)
To: Carrot Wei; +Cc: GCC Patches, Richard Earnshaw
Hi Carrot,
> ChangeLog:
> 2011-03-29 Wei Guozhi <carrot@google.com>
>
> PR target/45335
> * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_ib, stm2_ib, ldm2_da,
> stm2_da, ldm2_db, stm2_db): Add condition !arm_arch7 to these insns.
> (ldrd, ldrd_reg1, ldrd_reg2 and peephole2): New insn patterns and
> related peephole2.
> (strd, strd_reg1, strd_reg2 and peephole2): New insn patterns and
> related peephole2.
> * gcc/config/arm/arm-protos.h (arm_check_ldrd_operands): New prototype.
> (arm_legitimate_ldrd_p): New prototype.
> (arm_output_ldrd): New prototype.
> * gcc/config/arm/arm.c (arm_check_ldrd_operands): New function.
> (arm_legitimate_ldrd_p): New function.
> (arm_output_ldrd): New function.
>
>
> 2011-03-29 Wei Guozhi <carrot@google.com>
>
> PR target/45335
> * gcc.target/arm/pr45335.c: New test.
> * gcc.target/arm/pr45335-2.c: New test.
> * gcc.target/arm/pr45335-3.c: New test.
> * gcc.target/arm/pr40457-1.c: Add another possible output "ldrd".
> * gcc.target/arm/pr40457-2.c: Changed to store 3 words.
> * gcc.target/arm/pr40457-3.c: Changed to store 3 words.
Approved - please apply.
Cheers
Nick
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words 2011-06-07 9:44 Nick Clifton @ 2011-06-14 2:22 ` Ramana Radhakrishnan 0 siblings, 0 replies; 46+ messages in thread From: Ramana Radhakrishnan @ 2011-06-14 2:22 UTC (permalink / raw) To: Carrot Wei; +Cc: GCC Patches, Richard Earnshaw, nickc This caused http://gcc.gnu.org/PR49398. You might also consider putting the PR numbers in your commit log in the future that these patches attempt to fix so that the PR's in bugzilla get the associated commit message. Please add links to this commit to the individual PRs this commit attempts to fix (PR45335 , PR40457). cheers Ramana ^ permalink raw reply [flat|nested] 46+ messages in thread
end of thread, other threads:[~2011-06-14 0:48 UTC | newest] Thread overview: 46+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2010-08-22 6:49 [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words Carrot Wei 2010-08-24 13:55 ` Carrot Wei 2010-08-24 14:14 ` Ramana Radhakrishnan 2010-08-25 10:02 ` Carrot Wei 2010-09-01 15:25 ` Richard Earnshaw 2010-09-04 13:15 ` Carrot Wei 2010-09-13 14:54 ` Carrot Wei 2010-09-19 9:10 ` [PING][PATCH: " Carrot Wei 2010-09-25 19:25 ` Carrot Wei 2010-10-05 11:53 ` Carrot Wei 2010-10-12 9:00 ` Carrot Wei 2010-10-12 15:37 ` Ian Lance Taylor 2010-10-13 11:28 ` [PATCH: " Paul Brook 2010-10-16 14:36 ` Carrot Wei 2010-10-24 16:59 ` Carrot Wei 2010-10-31 17:55 ` Carrot Wei 2010-11-23 0:23 ` Carrot Wei 2010-11-30 0:01 ` Carrot Wei 2010-12-14 22:58 ` Carrot Wei 2011-01-04 8:57 ` Carrot Wei 2011-01-11 14:43 ` Nick Clifton 2011-01-11 14:51 ` Richard Earnshaw 2011-01-11 15:10 ` Nathan Froyd 2011-01-12 6:34 ` Ian Lance Taylor 2011-01-12 10:29 ` Richard Guenther 2011-01-12 14:01 ` Diego Novillo 2011-01-12 21:56 ` Mike Stump 2011-01-12 13:49 ` Paul Brook 2011-01-12 15:20 ` Richard Earnshaw 2011-01-13 10:45 ` Carrot Wei 2011-01-13 11:12 ` Richard Earnshaw 2011-01-13 11:15 ` Ramana Radhakrishnan 2011-01-14 9:25 ` Carrot Wei 2011-01-14 10:17 ` Richard Earnshaw 2011-01-18 15:46 ` Carrot Wei 2011-01-27 5:42 ` Jie Zhang 2011-01-27 10:43 ` Carrot Wei 2011-01-28 9:29 ` Jie Zhang 2011-01-28 11:19 ` Carrot Wei 2011-01-28 12:16 ` Jie Zhang 2011-03-15 9:19 ` Carrot Wei 2011-03-24 0:25 ` Mike Stump 2011-03-29 10:18 ` Carrot Wei 2011-01-13 17:19 ` Mike Stump 2011-06-07 9:44 Nick Clifton 2011-06-14 2:22 ` Ramana Radhakrishnan
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).