From: Alexey Kravets <mr.kayrick@gmail.com>
To: gcc@gcc.gnu.org
Subject: [ARM]Extra load store/instructions compared to gcc-3.4
Date: Wed, 25 Apr 2012 12:17:00 -0000 [thread overview]
Message-ID: <CAG7KMY6ywHVRXM6jmNNxB7urUNJRJTQ55hRKiz9idt4p5q9GrQ@mail.gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 761 bytes --]
Hi guys,
I have a test case (shell sort, see attached) compiled with different
ARM compilers:
GCC-4.6.3, GCC-3.4.6, and ARMCC.
Both ARMCC and GCC-3.4.6 generate quite optimal assembly while GCC-4.6.3
inserts extra load/store instructions compared to the other compilers.
Can the SSA representation usage in modern GCC be the reason for this?
If so, has anyone tried to do something about it?
The generated assembly codes are attached:
sort-3.4.s: Assembly, generated by the GCC-3.4.6
sort-4.6.3.s: Assembly, generated by the GCC-4.6.3
sort-armcc.s: Assembly, generated by the ARMCC
% armcc
ARM C/C++ Compiler, 4.1 [Build 713]
The file has been compiled with following options:
for GCC:
-O3
for ARMCC:
-O3 -Otime
--
Alexey Kravets
mr.kayrick@gmail.com
[-- Attachment #2: sort-3.4.s --]
[-- Type: text/plain, Size: 924 bytes --]
.file "sort.i"
.global __divsi3
.text
.align 2
.global shell_sort
.type shell_sort, %function
shell_sort:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
mov fp, r1
sub r7, r0, #4
mov r8, #1
.L2:
add r3, r8, r8, asl #1
add r8, r3, #1
cmp r8, fp
ble .L2
.L17:
mov r0, r8
mov r1, #3
bl __divsi3
add r9, r0, #1
cmp r9, fp
mov r8, r0
bgt .L16
.L26:
ldr sl, [r7, r9, asl #2]
mov r4, r9
b .L11
.L25:
ldr r5, [r7, r6, asl #2]
mov r0, r5
bl strcmp
cmp r0, #0
ble .L12
str r5, [r7, r4, asl #2]
mov r4, r6
.L11:
cmp r4, r8
rsb r6, r8, r4
mov r1, sl
bgt .L25
.L12:
add r9, r9, #1
cmp r9, fp
str sl, [r7, r4, asl #2]
ble .L26
.L16:
cmp r8, #1
bgt .L17
ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, pc}
.size shell_sort, .-shell_sort
.ident "GCC: (GNU) 3.4.6"
[-- Attachment #3: shell_sort.c --]
[-- Type: text/x-csrc, Size: 535 bytes --]
#include <string.h>
void shell_sort(char *strings[], int n)
{
int h, i, j;
char *v;
strings--; /* Make array 1 origin */
h = 1;
do {h = h * 3 + 1;} while (h <= n);
do {
h = h / 3;
for (i = h + 1; i <= n; i++) {
v = strings[i];
j = i;
while (j > h && strcmp(strings[j-h], v) > 0) {
strings[j] = strings[j-h];
j = j-h;
}
strings[j] = v;
}
}
while (h > 1);
}
[-- Attachment #4: sort-4.6.3.s --]
[-- Type: text/plain, Size: 2033 bytes --]
.cpu cortex-a9
.eabi_attribute 27, 3
.fpu vfp3
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 2
.file "shell_sort.c"
.text
.align 2
.global shell_sort
.type shell_sort, %function
shell_sort:
@ args = 0, pretend = 0, frame = 40
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
mov r3, r1
mov r9, #1
sub sp, sp, #44
sub r2, r0, #4
str r0, [sp, #28]
str r1, [sp, #32]
str r2, [sp, #24]
.L2:
add r9, r9, r9, asl #1
add r9, r9, #1
cmp r9, r3
ble .L2
ldr r2, [sp, #32]
movw r3, #21846
movt r3, 21845
str r3, [sp, #36]
add r2, r2, #1
str r2, [sp, #20]
.L9:
ldr r2, [sp, #36]
smull r2, r3, r2, r9
ldr r2, [sp, #32]
sub r9, r3, r9, asr #31
add r3, r9, #1
cmp r2, r3
str r3, [sp, #8]
blt .L3
rsb r3, r9, r9, asl #30
mov r2, r9, asl #2
mov r3, r3, asl #2
str r2, [sp, #0]
rsb fp, r9, #0
str r3, [sp, #4]
ldr r3, [sp, #28]
add r3, r3, r2
ldr r2, [sp, #28]
str r3, [sp, #12]
str r2, [sp, #16]
.L7:
ldr r8, [sp, #12]
ldr r3, [sp, #8]
mov r2, r8
ldr sl, [r2], #4
cmp r9, r3
str r2, [sp, #12]
bge .L4
ldr r4, [sp, #16]
mov r7, r3
b .L5
.L6:
ldr r3, [sp, #0]
cmp r9, r6
ldr r2, [sp, #4]
mov r7, r6
str r5, [r4, r3]
add r4, r4, r2
bge .L4
.L5:
ldr r5, [r4, #0]
mov r1, sl
add r6, r7, fp
mov r8, r4
mov r0, r5
bl strcmp
cmp r0, #0
bgt .L6
ldr r3, [sp, #24]
add r8, r3, r7, asl #2
.L4:
ldr r2, [sp, #8]
ldr r3, [sp, #20]
str sl, [r8, #0]
add r2, r2, #1
str r2, [sp, #8]
cmp r2, r3
ldr r2, [sp, #16]
add r2, r2, #4
str r2, [sp, #16]
bne .L7
.L3:
cmp r9, #1
bgt .L9
add sp, sp, #44
ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, pc}
.size shell_sort, .-shell_sort
.ident "GCC: (VDLinux.RC1) 4.6.3 20120105 (prerelease)"
.section .note.GNU-stack,"",%progbits
[-- Attachment #5: sort-armcc.s --]
[-- Type: text/plain, Size: 2083 bytes --]
; generated by ARM C/C++ Compiler, 4.1 [Build 713]
; commandline armcc [-S --cpu=Cortex-A9 --fpu=VFPv3 -O3 -Otime shell_sort.c]
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
shell_sort PROC
PUSH {r4-r12,lr}
MOV r10,r1
SUB r6,r0,#4
MOV r5,#1
|L0.16|
ADD r0,r5,r5,LSL #1
ADD r5,r0,#1
CMP r5,r10
BLE |L0.16|
LDR r11,|L0.140|
|L0.36|
SMULL r1,r0,r11,r5
SUB r5,r0,r0,ASR #31
ADD r8,r5,#1
CMP r8,r10
BGT |L0.128|
|L0.56|
LDR r9,[r6,r8,LSL #2]
MOV r4,r8
B |L0.80|
|L0.68|
LDR r0,[r6,r7,LSL #2]
STR r0,[r6,r4,LSL #2]
MOV r4,r7
|L0.80|
CMP r4,r5
BLE |L0.112|
SUB r7,r4,r5
MOV r1,r9
LDR r0,[r6,r7,LSL #2]
BL strcmp
CMP r0,#0
BGT |L0.68|
|L0.112|
ADD r8,r8,#1
CMP r8,r10
STR r9,[r6,r4,LSL #2]
BLE |L0.56|
|L0.128|
CMP r5,#1
BGT |L0.36|
POP {r4-r12,pc}
ENDP
|L0.140|
DCD 0x55555556
AREA ||.arm_vfe_header||, DATA, READONLY, NOALLOC, ALIGN=2
DCD 0x00000000
EXPORT shell_sort [CODE]
IMPORT ||Lib$$Request$$armlib|| [CODE,WEAK]
IMPORT strcmp [CODE]
ATTR FILESCOPE
ATTR SETVALUE Tag_ABI_PCS_wchar_t,2
ATTR SETVALUE Tag_ABI_enum_size,1
ATTR SETVALUE Tag_ABI_optimization_goals,2
ATTR SETSTRING Tag_conformance,"2.06"
ATTR SETVALUE AV,18,1
ASSERT {ENDIAN} = "little"
ASSERT {INTER} = {TRUE}
ASSERT {ROPI} = {FALSE}
ASSERT {RWPI} = {FALSE}
ASSERT {IEEE_FULL} = {FALSE}
ASSERT {IEEE_PART} = {FALSE}
ASSERT {IEEE_JAVA} = {FALSE}
END
next reply other threads:[~2012-04-25 12:17 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-04-25 12:17 Alexey Kravets [this message]
2012-04-25 13:00 ` Alexander Monakov
2012-04-25 13:22 ` Alexey Kravets
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAG7KMY6ywHVRXM6jmNNxB7urUNJRJTQ55hRKiz9idt4p5q9GrQ@mail.gmail.com \
--to=mr.kayrick@gmail.com \
--cc=gcc@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).