[ARM]Extra load store/instructions compared to gcc-3.4

public inbox for gcc@gcc.gnu.org
 help / color / mirror / Atom feed

From: Alexey Kravets <mr.kayrick@gmail.com>
To: gcc@gcc.gnu.org
Subject: [ARM]Extra load store/instructions compared to gcc-3.4
Date: Wed, 25 Apr 2012 12:17:00 -0000	[thread overview]
Message-ID: <CAG7KMY6ywHVRXM6jmNNxB7urUNJRJTQ55hRKiz9idt4p5q9GrQ@mail.gmail.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 761 bytes --]

Hi guys,
I have a test case (shell sort, see attached) compiled with different
ARM compilers:
GCC-4.6.3, GCC-3.4.6, and ARMCC.

Both ARMCC and GCC-3.4.6  generate quite optimal assembly while GCC-4.6.3
inserts extra load/store instructions compared to the other compilers.

Can the SSA representation usage in modern GCC be the reason for this?

If so, has anyone tried to do something about it?

The generated assembly codes are attached:
sort-3.4.s: Assembly, generated by the GCC-3.4.6
sort-4.6.3.s: Assembly, generated by the GCC-4.6.3
sort-armcc.s: Assembly, generated by the ARMCC

% armcc
ARM C/C++ Compiler, 4.1 [Build 713]

The file has been compiled with following options:
for GCC:
-O3
for ARMCC:
-O3 -Otime


-- 
Alexey Kravets
mr.kayrick@gmail.com

[-- Attachment #2: sort-3.4.s --]
[-- Type: text/plain, Size: 924 bytes --]

	.file	"sort.i"
	.global	__divsi3
	.text
	.align	2
	.global	shell_sort
	.type	shell_sort, %function
shell_sort:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	stmfd	sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
	mov	fp, r1
	sub	r7, r0, #4
	mov	r8, #1
.L2:
	add	r3, r8, r8, asl #1
	add	r8, r3, #1
	cmp	r8, fp
	ble	.L2
.L17:
	mov	r0, r8
	mov	r1, #3
	bl	__divsi3
	add	r9, r0, #1
	cmp	r9, fp
	mov	r8, r0
	bgt	.L16
.L26:
	ldr	sl, [r7, r9, asl #2]
	mov	r4, r9
	b	.L11
.L25:
	ldr	r5, [r7, r6, asl #2]
	mov	r0, r5
	bl	strcmp
	cmp	r0, #0
	ble	.L12
	str	r5, [r7, r4, asl #2]
	mov	r4, r6
.L11:
	cmp	r4, r8
	rsb	r6, r8, r4
	mov	r1, sl
	bgt	.L25
.L12:
	add	r9, r9, #1
	cmp	r9, fp
	str	sl, [r7, r4, asl #2]
	ble	.L26
.L16:
	cmp	r8, #1
	bgt	.L17
	ldmfd	sp!, {r4, r5, r6, r7, r8, r9, sl, fp, pc}
	.size	shell_sort, .-shell_sort
	.ident	"GCC: (GNU) 3.4.6"

[-- Attachment #3: shell_sort.c --]
[-- Type: text/x-csrc, Size: 535 bytes --]

#include <string.h>
void shell_sort(char *strings[], int n)
{
    int h, i, j;
    char *v;

    strings--;        /* Make array 1 origin */
    h = 1;
    do {h = h * 3 + 1;} while (h <= n);
    do {
        h = h / 3;
        for (i = h + 1; i <= n; i++) {
            v = strings[i];
            j = i;
            while (j > h && strcmp(strings[j-h], v) > 0) {
                strings[j] = strings[j-h];
                j = j-h;
            }
            strings[j] = v;
        }
    }
    while (h > 1);
}

[-- Attachment #4: sort-4.6.3.s --]
[-- Type: text/plain, Size: 2033 bytes --]

	.cpu cortex-a9
	.eabi_attribute 27, 3
	.fpu vfp3
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 2
	.eabi_attribute 34, 1
	.eabi_attribute 18, 2
	.file	"shell_sort.c"
	.text
	.align	2
	.global	shell_sort
	.type	shell_sort, %function
shell_sort:
	@ args = 0, pretend = 0, frame = 40
	@ frame_needed = 0, uses_anonymous_args = 0
	stmfd	sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
	mov	r3, r1
	mov	r9, #1
	sub	sp, sp, #44
	sub	r2, r0, #4
	str	r0, [sp, #28]
	str	r1, [sp, #32]
	str	r2, [sp, #24]
.L2:
	add	r9, r9, r9, asl #1
	add	r9, r9, #1
	cmp	r9, r3
	ble	.L2
	ldr	r2, [sp, #32]
	movw	r3, #21846
	movt	r3, 21845
	str	r3, [sp, #36]
	add	r2, r2, #1
	str	r2, [sp, #20]
.L9:
	ldr	r2, [sp, #36]
	smull	r2, r3, r2, r9
	ldr	r2, [sp, #32]
	sub	r9, r3, r9, asr #31
	add	r3, r9, #1
	cmp	r2, r3
	str	r3, [sp, #8]
	blt	.L3
	rsb	r3, r9, r9, asl #30
	mov	r2, r9, asl #2
	mov	r3, r3, asl #2
	str	r2, [sp, #0]
	rsb	fp, r9, #0
	str	r3, [sp, #4]
	ldr	r3, [sp, #28]
	add	r3, r3, r2
	ldr	r2, [sp, #28]
	str	r3, [sp, #12]
	str	r2, [sp, #16]
.L7:
	ldr	r8, [sp, #12]
	ldr	r3, [sp, #8]
	mov	r2, r8
	ldr	sl, [r2], #4
	cmp	r9, r3
	str	r2, [sp, #12]
	bge	.L4
	ldr	r4, [sp, #16]
	mov	r7, r3
	b	.L5
.L6:
	ldr	r3, [sp, #0]
	cmp	r9, r6
	ldr	r2, [sp, #4]
	mov	r7, r6
	str	r5, [r4, r3]
	add	r4, r4, r2
	bge	.L4
.L5:
	ldr	r5, [r4, #0]
	mov	r1, sl
	add	r6, r7, fp
	mov	r8, r4
	mov	r0, r5
	bl	strcmp
	cmp	r0, #0
	bgt	.L6
	ldr	r3, [sp, #24]
	add	r8, r3, r7, asl #2
.L4:
	ldr	r2, [sp, #8]
	ldr	r3, [sp, #20]
	str	sl, [r8, #0]
	add	r2, r2, #1
	str	r2, [sp, #8]
	cmp	r2, r3
	ldr	r2, [sp, #16]
	add	r2, r2, #4
	str	r2, [sp, #16]
	bne	.L7
.L3:
	cmp	r9, #1
	bgt	.L9
	add	sp, sp, #44
	ldmfd	sp!, {r4, r5, r6, r7, r8, r9, sl, fp, pc}
	.size	shell_sort, .-shell_sort
	.ident	"GCC: (VDLinux.RC1) 4.6.3 20120105 (prerelease)"
	.section	.note.GNU-stack,"",%progbits

[-- Attachment #5: sort-armcc.s --]
[-- Type: text/plain, Size: 2083 bytes --]

; generated by ARM C/C++ Compiler, 4.1 [Build 713]
; commandline armcc [-S --cpu=Cortex-A9 --fpu=VFPv3 -O3 -Otime shell_sort.c]
        ARM
        REQUIRE8
        PRESERVE8

        AREA ||.text||, CODE, READONLY, ALIGN=2

shell_sort PROC
        PUSH     {r4-r12,lr}
        MOV      r10,r1
        SUB      r6,r0,#4
        MOV      r5,#1
|L0.16|
        ADD      r0,r5,r5,LSL #1
        ADD      r5,r0,#1
        CMP      r5,r10
        BLE      |L0.16|
        LDR      r11,|L0.140|
|L0.36|
        SMULL    r1,r0,r11,r5
        SUB      r5,r0,r0,ASR #31
        ADD      r8,r5,#1
        CMP      r8,r10
        BGT      |L0.128|
|L0.56|
        LDR      r9,[r6,r8,LSL #2]
        MOV      r4,r8
        B        |L0.80|
|L0.68|
        LDR      r0,[r6,r7,LSL #2]
        STR      r0,[r6,r4,LSL #2]
        MOV      r4,r7
|L0.80|
        CMP      r4,r5
        BLE      |L0.112|
        SUB      r7,r4,r5
        MOV      r1,r9
        LDR      r0,[r6,r7,LSL #2]
        BL       strcmp
        CMP      r0,#0
        BGT      |L0.68|
|L0.112|
        ADD      r8,r8,#1
        CMP      r8,r10
        STR      r9,[r6,r4,LSL #2]
        BLE      |L0.56|
|L0.128|
        CMP      r5,#1
        BGT      |L0.36|
        POP      {r4-r12,pc}
        ENDP

|L0.140|
        DCD      0x55555556

        AREA ||.arm_vfe_header||, DATA, READONLY, NOALLOC, ALIGN=2

        DCD      0x00000000

        EXPORT shell_sort [CODE]

        IMPORT ||Lib$$Request$$armlib|| [CODE,WEAK]
        IMPORT strcmp [CODE]

        ATTR FILESCOPE
        ATTR SETVALUE Tag_ABI_PCS_wchar_t,2
        ATTR SETVALUE Tag_ABI_enum_size,1
        ATTR SETVALUE Tag_ABI_optimization_goals,2
        ATTR SETSTRING Tag_conformance,"2.06"
        ATTR SETVALUE AV,18,1

        ASSERT {ENDIAN} = "little"
        ASSERT {INTER} = {TRUE}
        ASSERT {ROPI} = {FALSE}
        ASSERT {RWPI} = {FALSE}
        ASSERT {IEEE_FULL} = {FALSE}
        ASSERT {IEEE_PART} = {FALSE}
        ASSERT {IEEE_JAVA} = {FALSE}
        END

next             reply	other threads:[~2012-04-25 12:17 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-04-25 12:17 Alexey Kravets [this message]
2012-04-25 13:00 ` Alexander Monakov
2012-04-25 13:22   ` Alexey Kravets

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAG7KMY6ywHVRXM6jmNNxB7urUNJRJTQ55hRKiz9idt4p5q9GrQ@mail.gmail.com \
    --to=mr.kayrick@gmail.com \
    --cc=gcc@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).