From: Tom de Vries <Tom_deVries@mentor.com>
To: Jakub Jelinek <jakub@redhat.com>
Cc: "gcc-patches@gnu.org" <gcc-patches@gnu.org>
Subject: Re: [patch] Add counter inits to zero_iter_bb in expand_omp_for_init_counts
Date: Thu, 01 Oct 2015 13:38:00 -0000 [thread overview]
Message-ID: <560D3716.9020804@mentor.com> (raw)
In-Reply-To: <20151001124933.GB28276@tucnak.redhat.com>
[-- Attachment #1: Type: text/plain, Size: 1353 bytes --]
On 01/10/15 14:49, Jakub Jelinek wrote:
> On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote:
>> this patch adds initialization in zero_iter_bb of counters introduced in
>> expand_omp_for_init_counts.
>>
>> This removes the need to set TREE_NO_WARNING on those counters.
>
> Why do you think it is a good idea?
In replace_ssa_name, I've recently added the assert:
...
gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (name));
...
On the gomp-4_0-branch, this assert triggers for a collapsed acc loop,
which uses expand_omp_for_generic for omp-expansion. The assert
triggers because (some of) the counters added by
expand_omp_for_init_counts are not initialized on all paths.
On trunk, for the test-case in the patch, this assert doesn't trigger
because the omp function is split off before ssa.
> I'd be afraid it slows things down unnecessarily.
I think zero_iter_bb is a block that is expected not to be executed
frequently.
I've attached an sdiff of x86_64 assembly for the test-case (before
left, after right). AFAICT, this patch has the effect that it speeds up
the frequent path with one instruction.
> Furthermore, I'd prefer not to change this area of code before
> gomp-4_1-branch is merged, as it will be a nightmare for the merge
> otherwise.
Committing to gomp-4_0-branch for now would work for me.
Thanks,
- Tom
[-- Attachment #2: DIFF --]
[-- Type: text/plain, Size: 3541 bytes --]
.file "collapse.c" .file "collapse.c"
.text .text
.p2align 4,,15 .p2align 4,,15
.type foo._omp_fn.0, @funct .type foo._omp_fn.0, @funct
foo._omp_fn.0: foo._omp_fn.0:
.LFB12: .LFB12:
.cfi_startproc .cfi_startproc
pushq %rbp pushq %rbp
.cfi_def_cfa_offset 16 .cfi_def_cfa_offset 16
.cfi_offset 6, -16 .cfi_offset 6, -16
pushq %rbx pushq %rbx
.cfi_def_cfa_offset 24 .cfi_def_cfa_offset 24
.cfi_offset 3, -24 .cfi_offset 3, -24
xorl %esi, %esi <
subq $24, %rsp subq $24, %rsp
.cfi_def_cfa_offset 48 .cfi_def_cfa_offset 48
movl (%rdi), %eax movl (%rdi), %eax
movl 4(%rdi), %ebp movl 4(%rdi), %ebp
testl %eax, %eax testl %eax, %eax
jle .L8 | jle .L9
testl %ebp, %ebp testl %ebp, %ebp
jle .L8 | jle .L9
movslq %ebp, %rbx movslq %ebp, %rbx
movslq %eax, %rsi movslq %eax, %rsi
imulq %rbx, %rsi imulq %rbx, %rsi
.L8: .L8:
leaq 8(%rsp), %r8 leaq 8(%rsp), %r8
xorl %edi, %edi xorl %edi, %edi
movq %rsp, %rcx movq %rsp, %rcx
movl $1, %edx movl $1, %edx
call GOMP_loop_runtime_sta call GOMP_loop_runtime_sta
testb %al, %al testb %al, %al
jne .L10 jne .L10
.L6: .L6:
call GOMP_loop_end_nowait call GOMP_loop_end_nowait
addq $24, %rsp addq $24, %rsp
.cfi_remember_state .cfi_remember_state
.cfi_def_cfa_offset 24 .cfi_def_cfa_offset 24
popq %rbx popq %rbx
.cfi_def_cfa_offset 16 .cfi_def_cfa_offset 16
popq %rbp popq %rbp
.cfi_def_cfa_offset 8 .cfi_def_cfa_offset 8
ret ret
.p2align 4,,10 .p2align 4,,10
.p2align 3 .p2align 3
.L15: .L15:
.cfi_restore_state .cfi_restore_state
leaq 8(%rsp), %rsi leaq 8(%rsp), %rsi
movq %rsp, %rdi movq %rsp, %rdi
call GOMP_loop_runtime_nex call GOMP_loop_runtime_nex
testb %al, %al testb %al, %al
je .L6 je .L6
.L10: .L10:
movq (%rsp), %rsi movq (%rsp), %rsi
movq 8(%rsp), %r9 movq 8(%rsp), %r9
movq %rsi, %rax movq %rsi, %rax
cqto cqto
idivq %rbx idivq %rbx
movslq %eax, %r8 movslq %eax, %r8
.p2align 4,,10 .p2align 4,,10
.p2align 3 .p2align 3
.L4: .L4:
leaq (%r8,%r8,4), %rdi leaq (%r8,%r8,4), %rdi
movslq %edx, %rcx movslq %edx, %rcx
addq $1, %rsi addq $1, %rsi
cmpq %rsi, %r9 cmpq %rsi, %r9
leaq (%rdi,%rdi,4), %rdi leaq (%rdi,%rdi,4), %rdi
leaq (%rcx,%rdi,4), %rcx leaq (%rcx,%rdi,4), %rcx
movl $1, a(,%rcx,4) movl $1, a(,%rcx,4)
jle .L15 jle .L15
addl $1, %edx addl $1, %edx
cmpl %edx, %ebp cmpl %edx, %ebp
jg .L4 jg .L4
addl $1, %eax addl $1, %eax
xorl %edx, %edx xorl %edx, %edx
movslq %eax, %r8 movslq %eax, %r8
jmp .L4 jmp .L4
> .L9:
> xorl %ebx, %ebx
> xorl %esi, %esi
> jmp .L8
.cfi_endproc .cfi_endproc
.LFE12: .LFE12:
.size foo._omp_fn.0, .-foo. .size foo._omp_fn.0, .-foo.
.p2align 4,,15 .p2align 4,,15
.globl foo .globl foo
.type foo, @function .type foo, @function
foo: foo:
.LFB10: .LFB10:
.cfi_startproc .cfi_startproc
subq $24, %rsp subq $24, %rsp
.cfi_def_cfa_offset 32 .cfi_def_cfa_offset 32
xorl %ecx, %ecx xorl %ecx, %ecx
xorl %edx, %edx xorl %edx, %edx
movl %edi, (%rsp) movl %edi, (%rsp)
movl %esi, 4(%rsp) movl %esi, 4(%rsp)
movl $foo._omp_fn.0, %edi movl $foo._omp_fn.0, %edi
movq %rsp, %rsi movq %rsp, %rsi
call GOMP_parallel call GOMP_parallel
addq $24, %rsp addq $24, %rsp
.cfi_def_cfa_offset 8 .cfi_def_cfa_offset 8
ret ret
.cfi_endproc .cfi_endproc
.LFE10: .LFE10:
.size foo, .-foo .size foo, .-foo
next prev parent reply other threads:[~2015-10-01 13:38 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-01 12:46 Tom de Vries
2015-10-01 12:49 ` Jakub Jelinek
2015-10-01 13:38 ` Tom de Vries [this message]
2015-10-08 8:33 ` [gomp4, committed] " Tom de Vries
2015-10-23 14:40 ` [patch] " Thomas Schwinge
2015-10-28 9:50 ` Tom de Vries
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=560D3716.9020804@mentor.com \
--to=tom_devries@mentor.com \
--cc=gcc-patches@gnu.org \
--cc=jakub@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).