public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Tom de Vries <Tom_deVries@mentor.com>
To: Jakub Jelinek <jakub@redhat.com>
Cc: "gcc-patches@gnu.org" <gcc-patches@gnu.org>
Subject: Re: [patch] Add counter inits to zero_iter_bb in expand_omp_for_init_counts
Date: Thu, 01 Oct 2015 13:38:00 -0000	[thread overview]
Message-ID: <560D3716.9020804@mentor.com> (raw)
In-Reply-To: <20151001124933.GB28276@tucnak.redhat.com>

[-- Attachment #1: Type: text/plain, Size: 1353 bytes --]

On 01/10/15 14:49, Jakub Jelinek wrote:
> On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote:
>> this patch adds initialization in zero_iter_bb of counters introduced in
>> expand_omp_for_init_counts.
>>
>> This removes the need to set TREE_NO_WARNING on those counters.
>
> Why do you think it is a good idea?

In replace_ssa_name, I've recently added the assert:
...
           gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (name));
...

On the gomp-4_0-branch, this assert triggers for a collapsed acc loop, 
which uses expand_omp_for_generic for omp-expansion.  The assert 
triggers because (some of) the counters added by 
expand_omp_for_init_counts are not initialized on all paths.

On trunk, for the test-case in the patch, this assert doesn't trigger 
because the omp function is split off before ssa.

> I'd be afraid it slows things down unnecessarily.

I think zero_iter_bb is a block that is expected not to be executed 
frequently.

I've attached an sdiff of x86_64 assembly for the test-case (before 
left, after right). AFAICT, this patch has the effect that it speeds up 
the frequent path with one instruction.

>  Furthermore, I'd prefer not to change this area of code before
> gomp-4_1-branch is merged, as it will be a nightmare for the merge
> otherwise.

Committing to gomp-4_0-branch for now would work for me.

Thanks,
- Tom


[-- Attachment #2: DIFF --]
[-- Type: text/plain, Size: 3541 bytes --]

	.file	"collapse.c"			.file	"collapse.c"
	.text					.text
	.p2align 4,,15				.p2align 4,,15
	.type	foo._omp_fn.0, @funct		.type	foo._omp_fn.0, @funct
foo._omp_fn.0:				foo._omp_fn.0:
.LFB12:					.LFB12:
	.cfi_startproc				.cfi_startproc
	pushq	%rbp				pushq	%rbp
	.cfi_def_cfa_offset 16			.cfi_def_cfa_offset 16
	.cfi_offset 6, -16			.cfi_offset 6, -16
	pushq	%rbx				pushq	%rbx
	.cfi_def_cfa_offset 24			.cfi_def_cfa_offset 24
	.cfi_offset 3, -24			.cfi_offset 3, -24
	xorl	%esi, %esi	      <
	subq	$24, %rsp			subq	$24, %rsp
	.cfi_def_cfa_offset 48			.cfi_def_cfa_offset 48
	movl	(%rdi), %eax			movl	(%rdi), %eax
	movl	4(%rdi), %ebp			movl	4(%rdi), %ebp
	testl	%eax, %eax			testl	%eax, %eax
	jle	.L8		      |		jle	.L9
	testl	%ebp, %ebp			testl	%ebp, %ebp
	jle	.L8		      |		jle	.L9
	movslq	%ebp, %rbx			movslq	%ebp, %rbx
	movslq	%eax, %rsi			movslq	%eax, %rsi
	imulq	%rbx, %rsi			imulq	%rbx, %rsi
.L8:					.L8:
	leaq	8(%rsp), %r8			leaq	8(%rsp), %r8
	xorl	%edi, %edi			xorl	%edi, %edi
	movq	%rsp, %rcx			movq	%rsp, %rcx
	movl	$1, %edx			movl	$1, %edx
	call	GOMP_loop_runtime_sta		call	GOMP_loop_runtime_sta
	testb	%al, %al			testb	%al, %al
	jne	.L10				jne	.L10
.L6:					.L6:
	call	GOMP_loop_end_nowait		call	GOMP_loop_end_nowait
	addq	$24, %rsp			addq	$24, %rsp
	.cfi_remember_state			.cfi_remember_state
	.cfi_def_cfa_offset 24			.cfi_def_cfa_offset 24
	popq	%rbx				popq	%rbx
	.cfi_def_cfa_offset 16			.cfi_def_cfa_offset 16
	popq	%rbp				popq	%rbp
	.cfi_def_cfa_offset 8			.cfi_def_cfa_offset 8
	ret					ret
	.p2align 4,,10				.p2align 4,,10
	.p2align 3				.p2align 3
.L15:					.L15:
	.cfi_restore_state			.cfi_restore_state
	leaq	8(%rsp), %rsi			leaq	8(%rsp), %rsi
	movq	%rsp, %rdi			movq	%rsp, %rdi
	call	GOMP_loop_runtime_nex		call	GOMP_loop_runtime_nex
	testb	%al, %al			testb	%al, %al
	je	.L6				je	.L6
.L10:					.L10:
	movq	(%rsp), %rsi			movq	(%rsp), %rsi
	movq	8(%rsp), %r9			movq	8(%rsp), %r9
	movq	%rsi, %rax			movq	%rsi, %rax
	cqto					cqto
	idivq	%rbx				idivq	%rbx
	movslq	%eax, %r8			movslq	%eax, %r8
	.p2align 4,,10				.p2align 4,,10
	.p2align 3				.p2align 3
.L4:					.L4:
	leaq	(%r8,%r8,4), %rdi		leaq	(%r8,%r8,4), %rdi
	movslq	%edx, %rcx			movslq	%edx, %rcx
	addq	$1, %rsi			addq	$1, %rsi
	cmpq	%rsi, %r9			cmpq	%rsi, %r9
	leaq	(%rdi,%rdi,4), %rdi		leaq	(%rdi,%rdi,4), %rdi
	leaq	(%rcx,%rdi,4), %rcx		leaq	(%rcx,%rdi,4), %rcx
	movl	$1, a(,%rcx,4)			movl	$1, a(,%rcx,4)
	jle	.L15				jle	.L15
	addl	$1, %edx			addl	$1, %edx
	cmpl	%edx, %ebp			cmpl	%edx, %ebp
	jg	.L4				jg	.L4
	addl	$1, %eax			addl	$1, %eax
	xorl	%edx, %edx			xorl	%edx, %edx
	movslq	%eax, %r8			movslq	%eax, %r8
	jmp	.L4				jmp	.L4
				      >	.L9:
				      >		xorl	%ebx, %ebx
				      >		xorl	%esi, %esi
				      >		jmp	.L8
	.cfi_endproc				.cfi_endproc
.LFE12:					.LFE12:
	.size	foo._omp_fn.0, .-foo.		.size	foo._omp_fn.0, .-foo.
	.p2align 4,,15				.p2align 4,,15
	.globl	foo				.globl	foo
	.type	foo, @function			.type	foo, @function
foo:					foo:
.LFB10:					.LFB10:
	.cfi_startproc				.cfi_startproc
	subq	$24, %rsp			subq	$24, %rsp
	.cfi_def_cfa_offset 32			.cfi_def_cfa_offset 32
	xorl	%ecx, %ecx			xorl	%ecx, %ecx
	xorl	%edx, %edx			xorl	%edx, %edx
	movl	%edi, (%rsp)			movl	%edi, (%rsp)
	movl	%esi, 4(%rsp)			movl	%esi, 4(%rsp)
	movl	$foo._omp_fn.0, %edi		movl	$foo._omp_fn.0, %edi
	movq	%rsp, %rsi			movq	%rsp, %rsi
	call	GOMP_parallel			call	GOMP_parallel
	addq	$24, %rsp			addq	$24, %rsp
	.cfi_def_cfa_offset 8			.cfi_def_cfa_offset 8
	ret					ret
	.cfi_endproc				.cfi_endproc
.LFE10:					.LFE10:
	.size	foo, .-foo			.size	foo, .-foo

  reply	other threads:[~2015-10-01 13:38 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-01 12:46 Tom de Vries
2015-10-01 12:49 ` Jakub Jelinek
2015-10-01 13:38   ` Tom de Vries [this message]
2015-10-08  8:33     ` [gomp4, committed] " Tom de Vries
2015-10-23 14:40     ` [patch] " Thomas Schwinge
2015-10-28  9:50       ` Tom de Vries

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=560D3716.9020804@mentor.com \
    --to=tom_devries@mentor.com \
    --cc=gcc-patches@gnu.org \
    --cc=jakub@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).