public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug c/31661] New: mmintrin calls are slower than plain C
@ 2007-04-23 2:01 effbiae at gmail dot com
2007-04-23 2:04 ` [Bug target/31661] " pinskia at gcc dot gnu dot org
0 siblings, 1 reply; 2+ messages in thread
From: effbiae at gmail dot com @ 2007-04-23 2:01 UTC (permalink / raw)
To: gcc-bugs
hi,
this thread:
http://gcc.gnu.org/ml/gcc-help/2007-04/msg00201.html
details my problems. (duplicated here)
i want to sum an array of longs using mmx. i use the functions:
_mm_set_pi32 and _m_paddd
but the resultant binary contains significantly less efficient code
than inline asm or even plain C ( for(i=0;i<n;i++)total+=a[i]; ).
here's the relevant function:
simd_mmintrin(n, is)
I *is;
{ __m64 q,r;
I i;
_m_empty();
q=_m_from_int(0);
for (i=0; i < n; i+=W) {
r=_mm_set_pi32(is[i],is[i+1]);
q=_m_paddd(q,r);
}
union {long a[2];__m64 m;}u;
u.m=q;
return u.a[0]+u.a[1];
}
i have a script RUNME.sh:
$ sh RUNME.sh
---
expect: 199990000
impl: C (SISD)
199990000
real 0m0.604s
user 0m0.580s
sys 0m0.004s
---
expect: 199990000
impl: ASM (SIMD)
199990000
real 0m0.377s
user 0m0.360s
sys 0m0.008s
---
expect: 199990000
impl: MMINTRIN (SIMD)
199990000
real 0m1.235s
user 0m1.228s
sys 0m0.004s
$ cat RUNME.sh
#!/bin/sh
repeats=4000 # number of times to repeat the test
vectorsize=10000 # size of the vector in 32 bit ints
gcc -O -mmmx v.c -o v
for which in 0 1 2; do time ./v $repeats $vectorsize $which; done
$ cat v.c
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <mmintrin.h>
typedef long I;typedef unsigned long J;
typedef char C;
#define IZ sizeof(I)
#define W 2
simd_mmintrin(n, is)
I *is;
{ __v2si q,r;
I i;
_m_empty();
q=_m_from_int(0);
for (i=0; i < n; i+=W) {
memcpy(&r,is+i,IZ*W);
q=_m_paddd(q,r);
}
I*qq=(I*)&q;
return qq[0]+qq[1];
}
simd_asm(n, is)
I *is;
{ I i,*r=malloc(IZ*W*8);
asm("emms");
asm("pxor %mm0,%mm0");
for (i=0; i < n; i+=W) {
asm("movq %0,%%mm1\n\t"
"paddd %%mm1,%%mm0"
:
:"m"(is[i]) );
}
asm("movq %%mm0,%0":"=m"(*(__m64*)r));
return r[0]+r[1];
}
sisd(n, is)
I *is;
{
I i = 0, j = 0;
for (i = 0; i < n; i++)
j += is[i];
return j;
}
main(c, v)
C **v;
{
I n=atol(v[1]), z=atol(v[2]), m=atol(v[3]);
I result, *is=malloc(IZ*(z*=2)), i;
int(*fs[])()={sisd,simd_asm,simd_mmintrin,0};
C*ss[]={"C (SISD)","ASM (SIMD)","MMINTRIN (SIMD)"};
for(i=0;i<z;i++)is[i]=i;
printf("\n\n---\nexpect: %d\n",(z)*(z-1)/2);
printf("impl: %s\n",ss[m]);
while (n--)
result=fs[m](z, is);
printf("%d\n",result);
}
[jack@fedora i]$ gcc -v
Using built-in specs.
Target: i386-redhat-linux
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man
--infodir=/u
sr/share/info --enable-shared --enable-threads=posix --enable-checking=release
-
-with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions
--enable-
libgcj-multifile --enable-languages=c,c++,objc,obj-c++,java,fortran,ada
--enable
-java-awt=gtk --disable-dssi
--with-java-home=/usr/lib/jvm/java-1.4.2-gcj-1.4.2.
0/jre --with-cpu=generic --host=i386-redhat-linux
Thread model: posix
gcc version 4.1.0 20060304 (Red Hat 4.1.0-3)
--
Summary: mmintrin calls are slower than plain C
Product: gcc
Version: 4.1.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: effbiae at gmail dot com
GCC host triplet: fedora core 5; pentium III
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31661
^ permalink raw reply [flat|nested] 2+ messages in thread
* [Bug target/31661] mmintrin calls are slower than plain C
2007-04-23 2:01 [Bug c/31661] New: mmintrin calls are slower than plain C effbiae at gmail dot com
@ 2007-04-23 2:04 ` pinskia at gcc dot gnu dot org
0 siblings, 0 replies; 2+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2007-04-23 2:04 UTC (permalink / raw)
To: gcc-bugs
------- Comment #1 from pinskia at gcc dot gnu dot org 2007-04-23 03:04 -------
the main reason why it is slower is because taking the address messes up the
lots of stuff.
And also MMX is slower because we try not to reload it so this is a target
issue.
--
pinskia at gcc dot gnu dot org changed:
What |Removed |Added
----------------------------------------------------------------------------
Severity|normal |enhancement
Component|c |target
GCC host triplet|fedora core 5; pentium III |
GCC target triplet| |i686-linux-gnu
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31661
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2007-04-23 2:04 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-04-23 2:01 [Bug c/31661] New: mmintrin calls are slower than plain C effbiae at gmail dot com
2007-04-23 2:04 ` [Bug target/31661] " pinskia at gcc dot gnu dot org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).