public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug c/16962] New: loop unrolling with x86-64 asm not efficient
@ 2004-08-10 13:42 tomstdenis at iahu dot ca
2004-08-10 14:09 ` [Bug tree-optimization/16962] " falk at debian dot org
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: tomstdenis at iahu dot ca @ 2004-08-10 13:42 UTC (permalink / raw)
To: gcc-bugs
[note this bug was previously reported as part of bug #16961 which currently
reports inefficient 128-bit additions].
I have the following demo program
typedef unsigned long long mp_digit;
typedef unsigned long mp_word __attribute__ ((mode(TI)));
mp_word a, b;
// demo slow 128-bit add
void test(void)
{
a += b;
}
// this unrolls right (but is otherwise inefficient cuz of the 128-bit add)
void test2(mp_word *out, mp_digit x, mp_digit *y, int n)
{
int z;
for (z = 0; z < n; z++) out[z] += ((mp_word)x) * ((mp_word)*y++);
}
// this unrolls poorly
void test3(mp_word *out, mp_digit x, mp_digit *y, int n)
{
int z;
for (z = 0; z < n; z++) {
asm("movq %0,%%rax\n"
"mulq (%1)\n"
"addq %%rax,(%2)\n"
"adcq %%rdx,8(%2)\n"
::"r"(x), "r"(y), "r"(out) : "%rax", "%rdx");
++out;
++y;
}
}
And it produces inefficient loop unrolling when ASM blocks are used...
I used "gcc -O3 -fomit-frame-pointer -funroll-loops -march=k8 -m64 -S test.c"
.file "test.c"
.text
.p2align 4,,15
.globl test
.type test, @function
test:
.LFB2:
movq a(%rip), %r10
movq b(%rip), %r8
xorl %ecx, %ecx
movq a+8(%rip), %rdi
movq b+8(%rip), %r9
leaq (%r10,%r8), %rax
leaq (%rdi,%r9), %rsi
cmpq %r10, %rax
movq %rax, a(%rip)
setb %cl
leaq (%rcx,%rsi), %rdx
movq %rdx, a+8(%rip)
ret
.LFE2:
.size test, .-test
.p2align 4,,15
.globl test2
.type test2, @function
test2:
.LFB3:
movq %r13, -24(%rsp)
.LCFI0:
movq %r14, -16(%rsp)
.LCFI1:
movq %rdi, %r11
movq %r15, -8(%rsp)
.LCFI2:
movq %rbx, -48(%rsp)
.LCFI3:
movq %rsi, %r13
movq %rbp, -40(%rsp)
.LCFI4:
movq %r12, -32(%rsp)
.LCFI5:
subq $64, %rsp
.LCFI6:
testl %ecx, %ecx
movq %rdx, %r14
movl %ecx, %r15d
jle .L8
movq %rsi, %rax
movq (%rdi), %r12
movq 8(%rdi), %rdi
mulq (%rdx)
leal -1(%r15), %r10d
xorl %ecx, %ecx
leaq 8(%r14), %rbp
movl %r10d, %ebx
andl $3, %ebx
movq %rdx, %r9
leaq (%r12,%rax), %rdx
leaq (%rdi,%r9), %rsi
cmpq %r12, %rdx
movq %rdx, -8(%rsp)
movq -8(%rsp), %rax
setb %cl
movq %rsi, (%rsp)
addq %rcx, (%rsp)
movq (%rsp), %rdx
movl %r10d, %r12d
movl $16, %r10d
testl %r12d, %r12d
movq %rax, (%r11)
movq %rdx, 8(%r11)
je .L8
testl %ebx, %ebx
je .L6
cmpl $1, %ebx
je .L23
cmpl $2, %ebx
.p2align 4,,5
je .L24
movq %r13, %rax
movq 16(%r11), %rsi
movq 24(%r11), %rdi
mulq 8(%r14)
leaq 16(%r14), %rbp
movb $32, %r10b
leaq (%rsi,%rax), %r12
leaq (%rdi,%rdx), %rcx
xorl %eax, %eax
cmpq %rsi, %r12
movq %rcx, -80(%rsp)
movq %r12, -88(%rsp)
setb %al
addq %rax, -80(%rsp)
movq -88(%rsp), %r14
movq -80(%rsp), %rbx
leal -2(%r15), %r12d
movq %r14, 16(%r11)
movq %rbx, 24(%r11)
.L24:
movq %r13, %rax
movq (%r10,%r11), %rcx
xorl %r8d, %r8d
mulq (%rbp)
addq $8, %rbp
movq %rax, %rdi
movq 8(%r10,%r11), %rax
leaq (%rcx,%rdi), %r9
leaq (%rax,%rdx), %rbx
cmpq %rcx, %r9
movq %r9, -104(%rsp)
setb %r8b
movq -104(%rsp), %rdx
decl %r12d
movq %rbx, -96(%rsp)
addq %r8, -96(%rsp)
movq -96(%rsp), %r15
movq %rdx, (%r10,%r11)
movq %r15, 8(%r10,%r11)
addq $16, %r10
.L23:
movq %r13, %rax
movq 8(%r10,%r11), %r14
xorl %r8d, %r8d
mulq (%rbp)
addq $8, %rbp
movq %rax, %r9
movq (%r10,%r11), %rax
leaq (%r14,%rdx), %rdx
movq %rdx, -112(%rsp)
leaq (%rax,%r9), %rcx
cmpq %rax, %rcx
movq %rcx, -120(%rsp)
movq -120(%rsp), %r15
setb %r8b
addq %r8, -112(%rsp)
movq -112(%rsp), %rsi
movq %r15, (%r10,%r11)
movq %rsi, 8(%r10,%r11)
addq $16, %r10
decl %r12d
je .L8
.p2align 4,,7
.L6:
movq %r13, %rax
movq (%r10,%r11), %rbx
movq (%r10,%r11), %r15
mulq (%rbp)
movq 8(%r10,%r11), %rsi
xorl %r9d, %r9d
movq 16(%r10,%r11), %r8
movq 32(%r10,%r11), %r14
addq %rax, %rbx
movq %r13, %rax
cmpq %r15, %rbx
movq 24(%r10,%r11), %r15
movq %rbx, -24(%rsp)
setb %r9b
addq %rdx, %rsi
movq -24(%rsp), %rcx
movq %rsi, -16(%rsp)
addq %r9, -16(%rsp)
xorl %esi, %esi
movq -16(%rsp), %rdx
movq %rcx, (%r10,%r11)
movq %rdx, 8(%r10,%r11)
mulq 8(%rbp)
addq %rax, %r8
movq 16(%r10,%r11), %rax
movq %r8, -40(%rsp)
movq -40(%rsp), %rdi
cmpq %rax, %r8
movq %r13, %rax
setb %sil
addq %rdx, %r15
movq %rdi, 16(%r10,%r11)
mulq 16(%rbp)
movq %r15, -32(%rsp)
movq 40(%r10,%r11), %r15
addq %rsi, -32(%rsp)
movq -32(%rsp), %r9
movq %r9, 24(%r10,%r11)
movq %rdx, %rbx
movq 32(%r10,%r11), %rdx
movq %rax, %rcx
addq %rcx, %rdx
cmpq %r14, %rdx
movq %rdx, -56(%rsp)
movq -56(%rsp), %rdi
setb %r8b
addq %rbx, %r15
movl %r8d, %eax
movq %r15, -48(%rsp)
xorl %r15d, %r15d
movzbl %al, %esi
addq %rsi, -48(%rsp)
movq %r13, %rax
movq -48(%rsp), %r9
movq %rdi, 32(%r10,%r11)
mulq 24(%rbp)
movq 56(%r10,%r11), %r14
addq $32, %rbp
movq %r9, 40(%r10,%r11)
movq %rax, %rcx
movq 48(%r10,%r11), %rax
movq %rdx, %rbx
leaq (%r14,%rbx), %r8
leaq (%rax,%rcx), %rdx
movq %r8, -64(%rsp)
cmpq %rax, %rdx
movq %rdx, -72(%rsp)
movq -72(%rsp), %rsi
setb %r15b
addq %r15, -64(%rsp)
movq -64(%rsp), %rdi
movq %rsi, 48(%r10,%r11)
movq %rdi, 56(%r10,%r11)
addq $64, %r10
subl $4, %r12d
jne .L6
.p2align 4,,7
.L8:
movq 16(%rsp), %rbx
movq 24(%rsp), %rbp
movq 32(%rsp), %r12
movq 40(%rsp), %r13
movq 48(%rsp), %r14
movq 56(%rsp), %r15
addq $64, %rsp
ret
.LFE3:
.size test2, .-test2
.p2align 4,,15
.globl test3
.type test3, @function
test3:
.LFB4:
pushq %rbp
.LCFI7:
testl %ecx, %ecx
movq %rsi, %r10
movl %ecx, %ebp
pushq %rbx
.LCFI8:
movq %rdi, %rbx
movq %rdx, %rdi
jle .L33
leal -1(%rbp), %ecx
movl %ecx, %esi
andl $7, %esi
#APP
movq %r10,%rax
mulq (%rdi)
addq %rax,(%rbx)
adcq %rdx,8(%rbx)
#NO_APP
testl %ecx, %ecx
leaq 16(%rbx), %r9
leaq 8(%rdi), %r8
movl %ecx, %r11d
je .L33
testl %esi, %esi
je .L31
cmpl $1, %esi
je .L61
cmpl $2, %esi
.p2align 4,,5
je .L62
cmpl $3, %esi
.p2align 4,,5
je .L63
cmpl $4, %esi
.p2align 4,,5
je .L64
cmpl $5, %esi
.p2align 4,,5
je .L65
cmpl $6, %esi
.p2align 4,,5
je .L66
#APP
movq %r10,%rax
mulq (%r8)
addq %rax,(%r9)
adcq %rdx,8(%r9)
#NO_APP
leaq 32(%rbx), %r9
leaq 16(%rdi), %r8
leal -2(%rbp), %r11d
.L66:
#APP
movq %r10,%rax
mulq (%r8)
addq %rax,(%r9)
adcq %rdx,8(%r9)
#NO_APP
addq $16, %r9
addq $8, %r8
decl %r11d
.L65:
#APP
movq %r10,%rax
mulq (%r8)
addq %rax,(%r9)
adcq %rdx,8(%r9)
#NO_APP
addq $16, %r9
addq $8, %r8
decl %r11d
.L64:
#APP
movq %r10,%rax
mulq (%r8)
addq %rax,(%r9)
adcq %rdx,8(%r9)
#NO_APP
addq $16, %r9
addq $8, %r8
decl %r11d
.L63:
#APP
movq %r10,%rax
mulq (%r8)
addq %rax,(%r9)
adcq %rdx,8(%r9)
#NO_APP
addq $16, %r9
addq $8, %r8
decl %r11d
.L62:
#APP
movq %r10,%rax
mulq (%r8)
addq %rax,(%r9)
adcq %rdx,8(%r9)
#NO_APP
addq $16, %r9
addq $8, %r8
decl %r11d
.L61:
#APP
movq %r10,%rax
mulq (%r8)
addq %rax,(%r9)
adcq %rdx,8(%r9)
#NO_APP
addq $16, %r9
addq $8, %r8
decl %r11d
je .L33
.L31:
#APP
movq %r10,%rax
mulq (%r8)
addq %rax,(%r9)
adcq %rdx,8(%r9)
#NO_APP
leaq 16(%r9), %rsi
leaq 8(%r8), %rbp
#APP
movq %r10,%rax
mulq (%rbp)
addq %rax,(%rsi)
adcq %rdx,8(%rsi)
#NO_APP
leaq 32(%r9), %rdi
leaq 16(%r8), %rbx
#APP
movq %r10,%rax
mulq (%rbx)
addq %rax,(%rdi)
adcq %rdx,8(%rdi)
#NO_APP
leaq 48(%r9), %rcx
leaq 24(%r8), %rbp
#APP
movq %r10,%rax
mulq (%rbp)
addq %rax,(%rcx)
adcq %rdx,8(%rcx)
#NO_APP
leaq 64(%r9), %rsi
leaq 32(%r8), %rdi
#APP
movq %r10,%rax
mulq (%rdi)
addq %rax,(%rsi)
adcq %rdx,8(%rsi)
#NO_APP
leaq 80(%r9), %rbx
leaq 40(%r8), %rcx
#APP
movq %r10,%rax
mulq (%rcx)
addq %rax,(%rbx)
adcq %rdx,8(%rbx)
#NO_APP
leaq 96(%r9), %rbp
leaq 48(%r8), %rdi
#APP
movq %r10,%rax
mulq (%rdi)
addq %rax,(%rbp)
adcq %rdx,8(%rbp)
#NO_APP
leaq 112(%r9), %rsi
leaq 56(%r8), %rbx
#APP
movq %r10,%rax
mulq (%rbx)
addq %rax,(%rsi)
adcq %rdx,8(%rsi)
#NO_APP
subq $-128, %r9
addq $64, %r8
subl $8, %r11d
jne .L31
.L33:
popq %rbx
popq %rbp
ret
.LFE4:
.size test3, .-test3
.comm a,16,16
.comm b,16,16
.section .eh_frame,"a",@progbits
.Lframe1:
.long .LECIE1-.LSCIE1
.LSCIE1:
.long 0x0
.byte 0x1
.string ""
.uleb128 0x1
.sleb128 -8
.byte 0x10
.byte 0xc
.uleb128 0x7
.uleb128 0x8
.byte 0x90
.uleb128 0x1
.align 8
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1
.LASFDE1:
.long .LASFDE1-.Lframe1
.quad .LFB2
.quad .LFE2-.LFB2
.align 8
.LEFDE1:
.LSFDE3:
.long .LEFDE3-.LASFDE3
.LASFDE3:
.long .LASFDE3-.Lframe1
.quad .LFB3
.quad .LFE3-.LFB3
.byte 0x4
.long .LCFI3-.LFB3
.byte 0x83
.uleb128 0x7
.byte 0x8f
.uleb128 0x2
.byte 0x8e
.uleb128 0x3
.byte 0x8d
.uleb128 0x4
.byte 0x4
.long .LCFI6-.LCFI3
.byte 0xe
.uleb128 0x48
.byte 0x8c
.uleb128 0x5
.byte 0x86
.uleb128 0x6
.align 8
.LEFDE3:
.LSFDE5:
.long .LEFDE5-.LASFDE5
.LASFDE5:
.long .LASFDE5-.Lframe1
.quad .LFB4
.quad .LFE4-.LFB4
.byte 0x4
.long .LCFI7-.LFB4
.byte 0xe
.uleb128 0x10
.byte 0x86
.uleb128 0x2
.byte 0x4
.long .LCFI8-.LCFI7
.byte 0xe
.uleb128 0x18
.byte 0x83
.uleb128 0x3
.align 8
.LEFDE5:
.section .note.GNU-stack,"",@progbits
.ident "GCC: (GNU) 3.4.1 (Gentoo Linux 3.4.1, ssp-3.4-2,
pie-8.7.6.3)"
--
Summary: loop unrolling with x86-64 asm not efficient
Product: gcc
Version: 3.4.1
Status: UNCONFIRMED
Severity: normal
Priority: P2
Component: c
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: tomstdenis at iahu dot ca
CC: gcc-bugs at gcc dot gnu dot org
GCC build triplet: gcc version 3.4.1 (Gentoo Linux 3.4.1, ssp-3.4-2, pie-
8.7.6.3)
GCC host triplet: Linux timmy 2.6.7-gentoo-r11 #1 Thu Aug 5 01:49:49 UTC
2004 x86_
GCC target triplet: gcc version 3.4.1 (Gentoo Linux 3.4.1, ssp-3.4-2, pie-
8.7.6.3)
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16962
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Bug tree-optimization/16962] loop unrolling with x86-64 asm not efficient
2004-08-10 13:42 [Bug c/16962] New: loop unrolling with x86-64 asm not efficient tomstdenis at iahu dot ca
@ 2004-08-10 14:09 ` falk at debian dot org
2004-08-10 14:10 ` falk at debian dot org
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: falk at debian dot org @ 2004-08-10 14:09 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From falk at debian dot org 2004-08-10 14:08 -------
Please use the attachment function for large files in the future. Note also
we don't want assembly output anyway (as stated on http://gcc.gnu.org/bugs.html).
Loop work is currently only being done at the lno-branch. It would be nice
if you could test it there.
Also, this way of unrolling loops doesn't seem fundamentally wrong to me
Can you provide performance numbers that show that it is worse?
--
What |Removed |Added
----------------------------------------------------------------------------
Component|c |tree-optimization
GCC build triplet|gcc version 3.4.1 (Gentoo |x86_86-linux
|Linux 3.4.1, ssp-3.4-2, pie-|
|8.7.6.3) |
GCC host triplet|Linux timmy 2.6.7-gentoo-r11|x86_86-linux
|#1 Thu Aug 5 01:49:49 UTC |
|2004 x86_ |
GCC target triplet|gcc version 3.4.1 (Gentoo |x86_86-linux
|Linux 3.4.1, ssp-3.4-2, pie-|
|8.7.6.3) |
Keywords| |missed-optimization
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16962
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Bug tree-optimization/16962] loop unrolling with x86-64 asm not efficient
2004-08-10 13:42 [Bug c/16962] New: loop unrolling with x86-64 asm not efficient tomstdenis at iahu dot ca
2004-08-10 14:09 ` [Bug tree-optimization/16962] " falk at debian dot org
@ 2004-08-10 14:10 ` falk at debian dot org
2004-08-10 14:13 ` tomstdenis at iahu dot ca
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: falk at debian dot org @ 2004-08-10 14:10 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From falk at debian dot org 2004-08-10 14:10 -------
Whoops.
--
What |Removed |Added
----------------------------------------------------------------------------
GCC build triplet|x86_86-linux |x86_64-linux
GCC host triplet|x86_86-linux |x86_64-linux
GCC target triplet|x86_86-linux |x86_64-linux
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16962
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Bug tree-optimization/16962] loop unrolling with x86-64 asm not efficient
2004-08-10 13:42 [Bug c/16962] New: loop unrolling with x86-64 asm not efficient tomstdenis at iahu dot ca
2004-08-10 14:09 ` [Bug tree-optimization/16962] " falk at debian dot org
2004-08-10 14:10 ` falk at debian dot org
@ 2004-08-10 14:13 ` tomstdenis at iahu dot ca
2004-08-10 14:23 ` pinskia at gcc dot gnu dot org
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: tomstdenis at iahu dot ca @ 2004-08-10 14:13 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From tomstdenis at iahu dot ca 2004-08-10 14:13 -------
(In reply to comment #1)
> Please use the attachment function for large files in the future. Note also
> we don't want assembly output anyway (as stated on
http://gcc.gnu.org/bugs.html).
Ooops sorry.
> Loop work is currently only being done at the lno-branch. It would be nice
> if you could test it there.
I'll have to ask. I'm using someone else box atm.
> Also, this way of unrolling loops doesn't seem fundamentally wrong to me
> Can you provide performance numbers that show that it is worse?
No because I can't get it to compile the other way ;-( that's the point!
>From what I can see the bug [maybe in my code]? Is that I do things like
mulq (%1)
So GCC doesn't realize it can mod that and do
mulq 0(%1)
...next iteration
mulq 8(%1)
...next iteration
mulq 16(%1)
So instead it does
mulq 0(%1)
...
lea 8(%some_register),%some_other
mulq (%some_other)
...
Is my ASM code just wrong (in that I mean I'm not making best use of it?) or
is this a legit chance for GCC to optimize better?
>
--
What |Removed |Added
----------------------------------------------------------------------------
GCC build triplet|x86_64-linux |x86_86-linux
GCC host triplet|x86_64-linux |x86_86-linux
GCC target triplet|x86_64-linux |x86_86-linux
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16962
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Bug tree-optimization/16962] loop unrolling with x86-64 asm not efficient
2004-08-10 13:42 [Bug c/16962] New: loop unrolling with x86-64 asm not efficient tomstdenis at iahu dot ca
` (2 preceding siblings ...)
2004-08-10 14:13 ` tomstdenis at iahu dot ca
@ 2004-08-10 14:23 ` pinskia at gcc dot gnu dot org
2004-08-10 14:55 ` falk at debian dot org
2004-08-24 21:06 ` falk at debian dot org
5 siblings, 0 replies; 7+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2004-08-10 14:23 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From pinskia at gcc dot gnu dot org 2004-08-10 14:23 -------
Invalid as there is no way we can schedule instructions inside an asm block.
--
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |RESOLVED
Resolution| |INVALID
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16962
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Bug tree-optimization/16962] loop unrolling with x86-64 asm not efficient
2004-08-10 13:42 [Bug c/16962] New: loop unrolling with x86-64 asm not efficient tomstdenis at iahu dot ca
` (3 preceding siblings ...)
2004-08-10 14:23 ` pinskia at gcc dot gnu dot org
@ 2004-08-10 14:55 ` falk at debian dot org
2004-08-24 21:06 ` falk at debian dot org
5 siblings, 0 replies; 7+ messages in thread
From: falk at debian dot org @ 2004-08-10 14:55 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From falk at debian dot org 2004-08-10 14:55 -------
Well, the fundamental problem is independent of assembly. Here's a much simpler
example:
void f(int *p, int l) {
int i;
for (i = 0; i < l; ++i)
p[i] = 0;
}
With 3.4 and also with an oldish lno, on Alpha I get
$L36:
stq $31,0($3)
lda $5,1($5)
lda $3,8($3)
$L35:
stq $31,0($3)
lda $5,1($5)
lda $3,8($3)
...
while the offsets could easily be constant folded if there were no jumps into
the loop.
--
What |Removed |Added
----------------------------------------------------------------------------
Status|RESOLVED |UNCONFIRMED
Resolution|INVALID |
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16962
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Bug tree-optimization/16962] loop unrolling with x86-64 asm not efficient
2004-08-10 13:42 [Bug c/16962] New: loop unrolling with x86-64 asm not efficient tomstdenis at iahu dot ca
` (4 preceding siblings ...)
2004-08-10 14:55 ` falk at debian dot org
@ 2004-08-24 21:06 ` falk at debian dot org
5 siblings, 0 replies; 7+ messages in thread
From: falk at debian dot org @ 2004-08-24 21:06 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From falk at debian dot org 2004-08-24 21:06 -------
Looking more closely, my Alpha example actually generates reasonable code
now. As to your assembly: I can't really read i386 assembly, but it seems it
modifies something without telling gcc, so it is invalid. To improve
performance, you should avoid (%x) and use a "m" constraint, and not clobber
hard regs but use "=r" on temp vars. Something like:
asm("..." : "=m"(*out) :"r"(x), "m"(*y));
With this, I get entirely reasonable code on the lno-branch, so closing.
--
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |RESOLVED
Resolution| |INVALID
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16962
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2004-08-24 21:06 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-08-10 13:42 [Bug c/16962] New: loop unrolling with x86-64 asm not efficient tomstdenis at iahu dot ca
2004-08-10 14:09 ` [Bug tree-optimization/16962] " falk at debian dot org
2004-08-10 14:10 ` falk at debian dot org
2004-08-10 14:13 ` tomstdenis at iahu dot ca
2004-08-10 14:23 ` pinskia at gcc dot gnu dot org
2004-08-10 14:55 ` falk at debian dot org
2004-08-24 21:06 ` falk at debian dot org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).