public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug c/31661] New: mmintrin calls are slower than plain C
@ 2007-04-23 2:01 effbiae at gmail dot com
2007-04-23 2:04 ` [Bug target/31661] " pinskia at gcc dot gnu dot org
0 siblings, 1 reply; 2+ messages in thread
From: effbiae at gmail dot com @ 2007-04-23 2:01 UTC (permalink / raw)
To: gcc-bugs
hi,
this thread:
http://gcc.gnu.org/ml/gcc-help/2007-04/msg00201.html
details my problems. (duplicated here)
i want to sum an array of longs using mmx. i use the functions:
_mm_set_pi32 and _m_paddd
but the resultant binary contains significantly less efficient code
than inline asm or even plain C ( for(i=0;i<n;i++)total+=a[i]; ).
here's the relevant function:
simd_mmintrin(n, is)
I *is;
{ __m64 q,r;
I i;
_m_empty();
q=_m_from_int(0);
for (i=0; i < n; i+=W) {
r=_mm_set_pi32(is[i],is[i+1]);
q=_m_paddd(q,r);
}
union {long a[2];__m64 m;}u;
u.m=q;
return u.a[0]+u.a[1];
}
i have a script RUNME.sh:
$ sh RUNME.sh
---
expect: 199990000
impl: C (SISD)
199990000
real 0m0.604s
user 0m0.580s
sys 0m0.004s
---
expect: 199990000
impl: ASM (SIMD)
199990000
real 0m0.377s
user 0m0.360s
sys 0m0.008s
---
expect: 199990000
impl: MMINTRIN (SIMD)
199990000
real 0m1.235s
user 0m1.228s
sys 0m0.004s
$ cat RUNME.sh
#!/bin/sh
repeats=4000 # number of times to repeat the test
vectorsize=10000 # size of the vector in 32 bit ints
gcc -O -mmmx v.c -o v
for which in 0 1 2; do time ./v $repeats $vectorsize $which; done
$ cat v.c
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <mmintrin.h>
typedef long I;typedef unsigned long J;
typedef char C;
#define IZ sizeof(I)
#define W 2
simd_mmintrin(n, is)
I *is;
{ __v2si q,r;
I i;
_m_empty();
q=_m_from_int(0);
for (i=0; i < n; i+=W) {
memcpy(&r,is+i,IZ*W);
q=_m_paddd(q,r);
}
I*qq=(I*)&q;
return qq[0]+qq[1];
}
simd_asm(n, is)
I *is;
{ I i,*r=malloc(IZ*W*8);
asm("emms");
asm("pxor %mm0,%mm0");
for (i=0; i < n; i+=W) {
asm("movq %0,%%mm1\n\t"
"paddd %%mm1,%%mm0"
:
:"m"(is[i]) );
}
asm("movq %%mm0,%0":"=m"(*(__m64*)r));
return r[0]+r[1];
}
sisd(n, is)
I *is;
{
I i = 0, j = 0;
for (i = 0; i < n; i++)
j += is[i];
return j;
}
main(c, v)
C **v;
{
I n=atol(v[1]), z=atol(v[2]), m=atol(v[3]);
I result, *is=malloc(IZ*(z*=2)), i;
int(*fs[])()={sisd,simd_asm,simd_mmintrin,0};
C*ss[]={"C (SISD)","ASM (SIMD)","MMINTRIN (SIMD)"};
for(i=0;i<z;i++)is[i]=i;
printf("\n\n---\nexpect: %d\n",(z)*(z-1)/2);
printf("impl: %s\n",ss[m]);
while (n--)
result=fs[m](z, is);
printf("%d\n",result);
}
[jack@fedora i]$ gcc -v
Using built-in specs.
Target: i386-redhat-linux
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man
--infodir=/u
sr/share/info --enable-shared --enable-threads=posix --enable-checking=release
-
-with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions
--enable-
libgcj-multifile --enable-languages=c,c++,objc,obj-c++,java,fortran,ada
--enable
-java-awt=gtk --disable-dssi
--with-java-home=/usr/lib/jvm/java-1.4.2-gcj-1.4.2.
0/jre --with-cpu=generic --host=i386-redhat-linux
Thread model: posix
gcc version 4.1.0 20060304 (Red Hat 4.1.0-3)
--
Summary: mmintrin calls are slower than plain C
Product: gcc
Version: 4.1.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: effbiae at gmail dot com
GCC host triplet: fedora core 5; pentium III
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31661
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2007-04-23 2:04 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-04-23 2:01 [Bug c/31661] New: mmintrin calls are slower than plain C effbiae at gmail dot com
2007-04-23 2:04 ` [Bug target/31661] " pinskia at gcc dot gnu dot org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).