public inbox for gcc-bugs@sourceware.org help / color / mirror / Atom feed
From: "tlm at daimi dot au dot dk" <gcc-bugzilla@gcc.gnu.org> To: gcc-bugs@gcc.gnu.org Subject: [Bug rtl-optimization/21827] unroll misses simple elimination - works with manual unroll Date: Tue, 19 Jul 2005 17:34:00 -0000 [thread overview] Message-ID: <20050719170239.23509.qmail@sourceware.org> (raw) In-Reply-To: <20050530183749.21827.tlm@daimi.au.dk> [-- Warning: decoded text below may be mangled, UTF-8 assumed --] [-- Attachment #1: Type: text/plain, Size: 6869 bytes --] ------- Additional Comments From tlm at daimi dot au dot dk 2005-07-19 17:02 ------- (In reply to comment #1) > The first testcase is fixed in 4.0.0. (Though there is a regression on the mainline). I have not looked > into the full testcase. There have not been more reactions on this bug / request, so I give a bit more information (and hopefully motivation) to move forward to a solution of it. I have written the following code : auto_unrolled_knight_count8 and t_auto_unrolled_knight_count9 only have one difference. The first loop goes to 8 the second loop goes to 9. If I manually unroll (meaning replaceing with constant up to 64 - it is a chessproblem - the code is excatly like the code generated in the up to eight example.) The code generated for the 9 example is in my opion quite bad. (It does work - but I consider unrolls finest task to be to eliminate what is (easy known) impossible at compiletime). The code is normally at least 4-5 times slower than the above code ! The source is like this : #define WHITE_KNIGHT 5 int auto_unrolled_knight_count8(unsigned char* board) { int count = 0; for (int bp=0;bp<8;++bp) { if (board[bp]==WHITE_KNIGHT) { if (bp%8>1 && bp/8>0) count++; if (bp%8>0 && bp/8>1) count++; if (bp%8<6 && bp/8>0) count++; if (bp%8<7 && bp/8>1) count++; if (bp%8>1 && bp/8<7) count++; if (bp%8>0 && bp/8<6) count++; if (bp%8<6 && bp/8<7) count++; if (bp%8<7 && bp/8<6) count++; } } return count; } int t_auto_unrolled_knight_count9(unsigned char* board) { int count = 0; for (int bp=0;bp<9;++bp) { if (board[bp]==WHITE_KNIGHT) { if (bp%8>1 && bp/8>0) count++; if (bp%8>0 && bp/8>1) count++; if (bp%8<6 && bp/8>0) count++; if (bp%8<7 && bp/8>1) count++; if (bp%8>1 && bp/8<7) count++; if (bp%8>0 && bp/8<6) count++; if (bp%8<6 && bp/8<7) count++; if (bp%8<7 && bp/8<6) count++; } } return count; } Assembly : (Compiled with -O3 and -funroll-loops) .file "all_in_one.cpp" .text .align 2 .p2align 4,,15 .globl _Z27auto_unrolled_knight_count8Ph .type _Z27auto_unrolled_knight_count8Ph, @function _Z27auto_unrolled_knight_count8Ph: .LFB2: pushl %ebp .LCFI0: xorl %eax, %eax movl %esp, %ebp .LCFI1: movl 8(%ebp), %edx cmpb $5, (%edx) je .L22 .L6: cmpb $5, 1(%edx) je .L23 .L8: cmpb $5, 2(%edx) je .L24 .L10: cmpb $5, 3(%edx) .p2align 4,,5 je .L25 .L12: cmpb $5, 4(%edx) .p2align 4,,5 je .L26 .L14: cmpb $5, 5(%edx) .p2align 4,,5 je .L27 .L16: cmpb $5, 6(%edx) .p2align 4,,5 je .L28 .L18: cmpb $5, 7(%edx) .p2align 4,,5 je .L29 popl %ebp .p2align 4,,6 ret .p2align 4,,7 .L29: popl %ebp addl $2, %eax .p2align 4,,6 ret .p2align 4,,7 .L28: addl $3, %eax .p2align 4,,7 jmp .L18 .p2align 4,,7 .L27: addl $4, %eax .p2align 4,,5 jmp .L16 .p2align 4,,7 .L26: addl $4, %eax .p2align 4,,5 jmp .L14 .p2align 4,,7 .L25: addl $4, %eax .p2align 4,,5 jmp .L12 .p2align 4,,7 .L24: addl $4, %eax .p2align 4,,5 jmp .L10 .p2align 4,,7 .L23: addl $3, %eax .p2align 4,,5 jmp .L8 .p2align 4,,7 .L22: movl $2, %eax .p2align 4,,5 jmp .L6 .LFE2: .size _Z27auto_unrolled_knight_count8Ph, .-_Z27auto_unrolled_knight_count8Ph ----------------------- End of "nice" code ---------------------- .align 2 .p2align 4,,15 .globl _Z29t_auto_unrolled_knight_count9Ph .type _Z29t_auto_unrolled_knight_count9Ph, @function _Z29t_auto_unrolled_knight_count9Ph: .LFB3: pushl %ebp .LCFI2: movl %esp, %ebp .LCFI3: pushl %edi .LCFI4: xorl %edi, %edi pushl %esi .LCFI5: xorl %esi, %esi pushl %ebx .LCFI6: subl $8, %esp .LCFI7: jmp .L31 .p2align 4,,7 .L32: incl %esi movl %esi, -20(%ebp) cmpb $5, (%eax,%esi) je .L64 .L52: incl %esi cmpb $5, (%eax,%esi) je .L60 .L54: movl -20(%ebp), %esi addl $2, %esi cmpl $9, %esi je .L65 .L31: movl 8(%ebp), %eax cmpb $5, (%eax,%esi) jne .L32 movl %esi, %eax cltd shrl $29, %edx leal (%esi,%edx), %ecx andl $7, %ecx subl %edx, %ecx cmpl $1, %ecx setg -15(%ebp) cmpl $7, %esi movzbl -15(%ebp), %edx setg %bl andb %bl, %dl cmpb $1, %dl sbbl $-1, %edi testl %ecx, %ecx setg -14(%ebp) cmpl $15, %esi movzbl -14(%ebp), %edx setg %al andb %al, %dl cmpb $1, %dl sbbl $-1, %edi cmpl $5, %ecx setle -13(%ebp) andb -13(%ebp), %bl cmpb $1, %bl sbbl $-1, %edi cmpl $6, %ecx setle %bl andb %bl, %al cmpb $1, %al movl 8(%ebp), %eax sbbl $-1, %edi cmpl $55, %esi setle %cl andb %cl, -15(%ebp) cmpb $1, -15(%ebp) sbbl $-1, %edi cmpl $47, %esi setle %dl andb %dl, -14(%ebp) cmpb $1, -14(%ebp) sbbl $-1, %edi andb %cl, -13(%ebp) cmpb $1, -13(%ebp) sbbl $-1, %edi andb %dl, %bl cmpb $1, %bl sbbl $-1, %edi incl %esi movl %esi, -20(%ebp) cmpb $5, (%eax,%esi) jne .L52 .L64: movl %esi, %eax cltd shrl $29, %edx leal (%esi,%edx), %ecx andl $7, %ecx subl %edx, %ecx cmpl $1, %ecx setg -15(%ebp) cmpl $7, %esi movzbl -15(%ebp), %edx setg %bl andb %bl, %dl cmpb $1, %dl sbbl $-1, %edi testl %ecx, %ecx setg -14(%ebp) cmpl $15, %esi movzbl -14(%ebp), %edx setg %al andb %al, %dl cmpb $1, %dl sbbl $-1, %edi cmpl $5, %ecx setle -13(%ebp) andb -13(%ebp), %bl cmpb $1, %bl sbbl $-1, %edi cmpl $6, %ecx setle %bl andb %bl, %al cmpb $1, %al movl 8(%ebp), %eax sbbl $-1, %edi cmpl $55, %esi setle %cl andb %cl, -15(%ebp) cmpb $1, -15(%ebp) sbbl $-1, %edi cmpl $47, %esi setle %dl andb %dl, -14(%ebp) cmpb $1, -14(%ebp) sbbl $-1, %edi andb %cl, -13(%ebp) cmpb $1, -13(%ebp) sbbl $-1, %edi andb %dl, %bl cmpb $1, %bl sbbl $-1, %edi incl %esi cmpb $5, (%eax,%esi) jne .L54 .L60: movl %esi, %eax cltd shrl $29, %edx leal (%esi,%edx), %ecx andl $7, %ecx subl %edx, %ecx cmpl $1, %ecx setg -15(%ebp) cmpl $7, %esi movzbl -15(%ebp), %edx setg %bl andb %bl, %dl cmpb $1, %dl sbbl $-1, %edi testl %ecx, %ecx setg -14(%ebp) cmpl $15, %esi movzbl -14(%ebp), %edx setg %al andb %al, %dl cmpb $1, %dl sbbl $-1, %edi cmpl $5, %ecx setle -13(%ebp) andb -13(%ebp), %bl cmpb $1, %bl sbbl $-1, %edi cmpl $6, %ecx setle %bl andb %bl, %al cmpb $1, %al sbbl $-1, %edi cmpl $55, %esi setle %cl andb %cl, -15(%ebp) cmpb $1, -15(%ebp) sbbl $-1, %edi cmpl $47, %esi movl -20(%ebp), %esi setle %dl andb %dl, -14(%ebp) cmpb $1, -14(%ebp) sbbl $-1, %edi andb %cl, -13(%ebp) cmpb $1, -13(%ebp) sbbl $-1, %edi andb %dl, %bl cmpb $1, %bl sbbl $-1, %edi addl $2, %esi cmpl $9, %esi jne .L31 .L65: addl $8, %esp movl %edi, %eax popl %ebx popl %esi popl %edi popl %ebp ret .LFE3: .size _Z29t_auto_unrolled_knight_count9Ph, .-_Z29t_auto_unrolled_knight_count9Ph .ident "GCC: (GNU) 4.0.0" .section .note.GNU-stack,"",@progbits I hope you will confirm the problem (so it can be solved). It would really improve gcc. Regards Thorbjørn -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21827
next prev parent reply other threads:[~2005-07-19 17:02 UTC|newest] Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top 2005-05-30 18:45 [Bug rtl-optimization/21827] New: " tlm at daimi dot au dot dk 2005-05-30 18:55 ` [Bug rtl-optimization/21827] " pinskia at gcc dot gnu dot org 2005-05-30 19:06 ` pinskia at gcc dot gnu dot org 2005-05-31 7:38 ` tlm at daimi dot au dot dk 2005-05-31 20:49 ` tlm at daimi dot au dot dk 2005-07-19 17:34 ` tlm at daimi dot au dot dk [this message] 2005-07-21 18:08 ` pinskia at gcc dot gnu dot org
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20050719170239.23509.qmail@sourceware.org \ --to=gcc-bugzilla@gcc.gnu.org \ --cc=gcc-bugs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).