public inbox for gcc-help@gcc.gnu.org
 help / color / mirror / Atom feed
* how to correctly pass volatile pointer to _mm_loadu_ps?
@ 2011-01-23 14:36 Paweł Sikora
  0 siblings, 0 replies; only message in thread
From: Paweł Sikora @ 2011-01-23 14:36 UTC (permalink / raw)
  To: gcc-help

[-- Attachment #1: Type: Text/Plain, Size: 1119 bytes --]

hi,

i'm trying to read 128-bits of unaligned data in one atomic move using 'movups' opcode.
the protoype with _mm_loadu_ps was optimized out by the compiler.

$ gcc46 hw_reg.c -Wall -c -O2 -m64 --save-temps
hw_reg.c: In function 'read_mapped_register_128':
hw_reg.c:19:2: warning: passing argument 1 of '_mm_loadu_ps' from incompatible pointer type [enabled by default]
/opt/gcc46/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/include/xmmintrin.h:904:1: note: expected 'const float *' but argument is of type 'const volatile __vector(4) float *'

i've tested another idea with intermediate volatile value:

static __m128 read_mapped_register_128( __m128 volatile const* address, ptrdiff_t index )
{
        __m128 volatile const* p = address + index;
        __m128 volatile const v = _mm_loadu_ps( p );
        return v;
}

but it generates 3 moves while one is enough:

unused_read_128_with_side_effects:
        salq    $4, %rsi
        movups  (%rdi,%rsi), %xmm0
        movaps  %xmm0, -24(%rsp)
        movaps  -24(%rsp), %xmm0
        ret

is there a nice solution in C language for such (one-move) access?

BR,
Pawel.

[-- Attachment #2: hw_reg.c --]
[-- Type: text/x-csrc, Size: 926 bytes --]

#include <stddef.h>
#include <xmmintrin.h>

static unsigned read_mapped_register_32( unsigned volatile const* address, ptrdiff_t index )
{
	unsigned volatile const* p = address + index;
	return *p;
}

static __m64 read_mapped_register_64( __m64 volatile const* address, ptrdiff_t index )
{
	__m64 volatile const* p = address + index;
	return *p;
}

static __m128 read_mapped_register_128( __m128 volatile const* address, ptrdiff_t index )
{
	__m128 volatile const* p = address + index;
	return _mm_loadu_ps( p );
}

void unused_read_32_with_side_effects( unsigned volatile const* address, ptrdiff_t index )
{ read_mapped_register_32( address, index ); }
void unused_read_64_with_side_effects( __m64 volatile const* address, ptrdiff_t index )
{ read_mapped_register_64( address, index );}
void unused_read_128_with_side_effects( __m128 volatile const* address, ptrdiff_t index )
{ read_mapped_register_128( address, index ); }

[-- Attachment #3: hw_reg.s --]
[-- Type: text/plain, Size: 1028 bytes --]

	.file	"hw_reg.c"
	.text
	.p2align 4,,15
	.globl	unused_read_32_with_side_effects
	.type	unused_read_32_with_side_effects, @function
unused_read_32_with_side_effects:
.LFB519:
	.cfi_startproc
	leaq	(%rdi,%rsi,4), %rax
	movl	(%rax), %eax
	ret
	.cfi_endproc
.LFE519:
	.size	unused_read_32_with_side_effects, .-unused_read_32_with_side_effects
	.p2align 4,,15
	.globl	unused_read_64_with_side_effects
	.type	unused_read_64_with_side_effects, @function
unused_read_64_with_side_effects:
.LFB520:
	.cfi_startproc
	leaq	(%rdi,%rsi,8), %rax
	movq	(%rax), %rax
	ret
	.cfi_endproc
.LFE520:
	.size	unused_read_64_with_side_effects, .-unused_read_64_with_side_effects
	.p2align 4,,15
	.globl	unused_read_128_with_side_effects
	.type	unused_read_128_with_side_effects, @function
unused_read_128_with_side_effects:
.LFB521:
	.cfi_startproc
	rep
	ret
	.cfi_endproc
.LFE521:
	.size	unused_read_128_with_side_effects, .-unused_read_128_with_side_effects
	.ident	"GCC: (GNU) 4.6.0 20110122 (experimental)"
	.section	.note.GNU-stack,"",@progbits

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2011-01-23 14:36 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-01-23 14:36 how to correctly pass volatile pointer to _mm_loadu_ps? Paweł Sikora

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).