* [Bug middle-end/29533] Ada fails to vectorize even trivial loops
[not found] <bug-29533-4@http.gcc.gnu.org/bugzilla/>
2012-07-13 8:55 ` rguenth at gcc dot gnu.org
@ 2012-07-13 9:17 ` steven at gcc dot gnu.org
1 sibling, 0 replies; 4+ messages in thread
From: steven at gcc dot gnu.org @ 2012-07-13 9:17 UTC (permalink / raw)
To: gcc-bugs
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29533
Steven Bosscher <steven at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |RESOLVED
CC| |steven at gcc dot gnu.org
Version|4.2.0 |4.6.0
Resolution| |FIXED
--- Comment #3 from Steven Bosscher <steven at gcc dot gnu.org> 2012-07-13 09:17:05 UTC ---
$ cat compare_lang.ads
package compare_lang is
type the_range is range 0 .. 100;
type My_Array is array (the_range) of Float;
a, b, c : my_array;
procedure do_compare;
end compare_lang;
$ cat compare_lang.adb
package body compare_lang is
procedure do_compare is
begin
for JJJ in the_range loop
a(jjj) := b(jjj) * c(jjj);
end loop;
end do_compare;
end compare_lang;
$ ./xgcc -B. -S -m32 -O3 -gnatp -march=pentium4 -mfpmath=sse -msse3 \
-ftree-vectorize -ftree-vectorizer-verbose=5 compare_lang.adb
Analyzing loop at compare_lang.adb:5
5: vect_model_load_cost: aligned.
5: vect_get_data_access_cost: inside_cost = 1, outside_cost = 0.
5: vect_model_load_cost: aligned.
5: vect_get_data_access_cost: inside_cost = 2, outside_cost = 0.
5: vect_model_store_cost: aligned.
5: vect_get_data_access_cost: inside_cost = 3, outside_cost = 0.
5: vect_model_load_cost: aligned.
5: vect_model_load_cost: inside_cost = 1, outside_cost = 0 .
5: vect_model_load_cost: aligned.
5: vect_model_load_cost: inside_cost = 1, outside_cost = 0 .
5: vect_model_simple_cost: inside_cost = 1, outside_cost = 0 .
5: vect_model_store_cost: aligned.
5: vect_model_store_cost: inside_cost = 1, outside_cost = 0 .
5: Cost model analysis:
Vector inside of loop cost: 4
Vector outside of loop cost: 4
Scalar iteration cost: 4
Scalar outside cost: 0
prologue iterations: 0
epilogue iterations: 1
Calculated minimum iters for profitability: 2
5: Profitability threshold = 3
Vectorizing loop at compare_lang.adb:5
5: LOOP VECTORIZED.
compare_lang.adb:3: note: vectorized 1 loops in function.
$ cat compare_lang.s
.file "compare_lang.adb"
.text
.globl compare_lang__Tmy_arrayBIP
.type compare_lang__Tmy_arrayBIP, @function
compare_lang__Tmy_arrayBIP:
.LFB2:
ret
.LFE2:
.size compare_lang__Tmy_arrayBIP, .-compare_lang__Tmy_arrayBIP
.globl compare_lang__do_compare
.type compare_lang__do_compare, @function
compare_lang__do_compare:
.LFB3:
xorl %eax, %eax
pxor %xmm3, %xmm3
.L4:
movaps %xmm3, %xmm0
movlps compare_lang__b(,%eax,4), %xmm0
movhps compare_lang__b+8(,%eax,4), %xmm0
movaps %xmm3, %xmm1
movlps compare_lang__c(,%eax,4), %xmm1
movhps compare_lang__c+8(,%eax,4), %xmm1
movss compare_lang__c(%eax), %xmm2
mulps %xmm1, %xmm0
mulss compare_lang__b(%eax), %xmm2
movlps %xmm0, compare_lang__a(,%eax,4)
movhps %xmm0, compare_lang__a+8(,%eax,4)
addl $4, %eax
cmpl $100, %eax
jne .L4
movss compare_lang__b+400, %xmm0
mulss compare_lang__c+400, %xmm0
movss %xmm0, compare_lang__a+400
ret
.LFE3:
.size compare_lang__do_compare, .-compare_lang__do_compare
.globl compare_lang__c
.bss
.align 32
.type compare_lang__c, @object
.size compare_lang__c, 404
compare_lang__c:
.zero 404
.globl compare_lang__b
.align 32
.type compare_lang__b, @object
.size compare_lang__b, 404
compare_lang__b:
.zero 404
.globl compare_lang__a
.align 32
.type compare_lang__a, @object
.size compare_lang__a, 404
compare_lang__a:
.zero 404
.globl compare_lang_E
.data
.align 2
.type compare_lang_E, @object
.size compare_lang_E, 2
compare_lang_E:
.zero 2
.section .eh_frame,"a",@progbits
.Lframe1:
.long .LECIE1-.LSCIE1
.LSCIE1:
.long 0
.byte 0x3
.string ""
.uleb128 0x1
.sleb128 -4
.uleb128 0x8
.byte 0xc
.uleb128 0x4
.uleb128 0x4
.byte 0x88
.uleb128 0x1
.align 4
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1
.LASFDE1:
.long .LASFDE1-.Lframe1
.long .LFB2
.long .LFE2-.LFB2
.align 4
.LEFDE1:
.LSFDE3:
.long .LEFDE3-.LASFDE3
.LASFDE3:
.long .LASFDE3-.Lframe1
.long .LFB3
.long .LFE3-.LFB3
.align 4
.LEFDE3:
.ident "GCC: (GNU) 4.8.0 20120711 (experimental) [trunk revision
189427]"
.section .note.GNU-stack,"",@progbits
Likewise on power7 with "GCC: (GNU) 4.6.3 20120306 (Red Hat 4.6.3-2)":
.file "compare_lang.adb"
.section ".toc","aw"
.section ".text"
.align 2
.p2align 4,,15
.globl compare_lang__Tmy_arrayBIP
.section ".opd","aw"
.align 3
compare_lang__Tmy_arrayBIP:
.quad .L.compare_lang__Tmy_arrayBIP,.TOC.@tocbase
.previous
.type compare_lang__Tmy_arrayBIP, @function
.L.compare_lang__Tmy_arrayBIP:
.LFB2:
.cfi_startproc
blr
.long 0
.byte 0,3,0,0,0,0,0,0
.cfi_endproc
.LFE2:
.size compare_lang__Tmy_arrayBIP,.-.L.compare_lang__Tmy_arrayBIP
.align 2
.p2align 4,,15
.globl compare_lang__do_compare
.section ".opd","aw"
.align 3
compare_lang__do_compare:
.quad .L.compare_lang__do_compare,.TOC.@tocbase
.previous
.type compare_lang__do_compare, @function
.L.compare_lang__do_compare:
.LFB3:
.cfi_startproc
li 0,25
addis 9,2,.LANCHOR0@toc@ha
mtctr 0
addi 9,9,.LANCHOR0@toc@l
li 10,0
li 11,0
addi 8,9,416
addi 7,9,832
.p2align 4,,15
.L3:
lxvw4x 13,9,11
lxvw4x 0,8,11
lwzx 0,9,10
lwzx 0,8,10
addi 10,10,4
xvmulsp 0,13,0
stxvw4x 0,7,11
addi 11,11,16
bdnz .L3
lfs 13,400(9)
lfs 0,816(9)
fmuls 0,13,0
stfs 0,1232(9)
blr
.long 0
.byte 0,3,0,0,0,0,0,0
.cfi_endproc
.LFE3:
.size compare_lang__do_compare,.-.L.compare_lang__do_compare
.globl compare_lang__c
.globl compare_lang__b
.globl compare_lang__a
.globl compare_lang_E
.section ".data"
.type compare_lang_E, @object
.size compare_lang_E, 1
compare_lang_E:
.zero 1
.section ".bss"
.align 4
.set .LANCHOR0,. + 0
.type compare_lang__b, @object
.size compare_lang__b, 404
compare_lang__b:
.zero 404
.zero 12
.type compare_lang__c, @object
.size compare_lang__c, 404
compare_lang__c:
.zero 404
.zero 12
.type compare_lang__a, @object
.size compare_lang__a, 404
compare_lang__a:
.zero 404
.ident "GCC: (GNU) 4.6.3 20120306 (Red Hat 4.6.3-2)"
.section .note.GNU-stack,"",@progbits
^ permalink raw reply [flat|nested] 4+ messages in thread