From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 10685 invoked by alias); 12 Aug 2002 23:16:03 -0000 Mailing-List: contact gcc-prs-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Archive: List-Post: List-Help: Sender: gcc-prs-owner@gcc.gnu.org Received: (qmail 10665 invoked by uid 71); 12 Aug 2002 23:16:02 -0000 Resent-Date: 12 Aug 2002 23:16:02 -0000 Resent-Message-ID: <20020812231602.10664.qmail@sources.redhat.com> Resent-From: gcc-gnats@gcc.gnu.org (GNATS Filer) Resent-To: nobody@gcc.gnu.org Resent-Cc: gcc-prs@gcc.gnu.org, gcc-bugs@gcc.gnu.org Resent-Reply-To: gcc-gnats@gcc.gnu.org, dholm@telia.com Received: (qmail 7452 invoked by uid 61); 12 Aug 2002 23:07:49 -0000 Message-Id: <20020812230749.7451.qmail@sources.redhat.com> Date: Mon, 12 Aug 2002 16:36:00 -0000 From: dholm@telia.com Reply-To: dholm@telia.com To: gcc-gnats@gcc.gnu.org X-Send-Pr-Version: gnatsweb-2.9.3 (1.1.1.1.2.31) Subject: c++/7582: Intel intrinsics cause segfault with gcc 3.1.1 and 3.2 X-SW-Source: 2002-08/txt/msg00237.txt.bz2 List-Id: >Number: 7582 >Category: c++ >Synopsis: Intel intrinsics cause segfault with gcc 3.1.1 and 3.2 >Confidential: no >Severity: critical >Priority: medium >Responsible: unassigned >State: open >Class: sw-bug >Submitter-Id: net >Arrival-Date: Mon Aug 12 16:16:01 PDT 2002 >Closed-Date: >Last-Modified: >Originator: David Holm >Release: gcc version 3.2 2002-07-26 (prerelease) >Organization: >Environment: Gentoo Linux 1.4, Pentium 3 (Coppermine) >Description: The following code executes perfectly when compiled with the Intel C++ Compiler v6.0 but segfaults when compiled with gcc 3.1.1 or 3.2 (2002-07-26). It's compiled with "g++ (-g3) -Wall -msse intrin.cpp -o intrin" and runs without any output. g++ gives no warnings during compilation. intrin segfaults on this line "_mm_stream_ps((float*) dst, xmm0);" "g++ -v" returns: Reading specs from /usr/lib/gcc-lib/i686-pc-linux-gnu/3.2/specs Configured with: /var/tmp/portage/gcc-3.2_pre/work/gcc-3.2/configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --enable-shared --host=i686-pc-linux-gnu --build=i686-pc-linux-gnu --target=i686-pc-linux-gnu --with-system-zlib --enable-languages=c,c++,ada,f77,objc,java --enable-threads=posix --enable-long-long --disable-checking --enable-cstdio=stdio --enable-clocale=generic --enable-version-specific-runtime-libs --with-gxx-include-dir=/usr/include/g++-v32 --with-local-prefix=/usr/local --enable-shared --enable-nls --without-included-gettext Thread model: posix gcc version 3.2 2002-07-26 (prerelease) I haven't got 3.1.1 anymore, so I can't give you the -v output from it. >How-To-Repeat: #include #include #include #include #include #define small_memcpy(dst,src,n) \ register unsigned long int dummy; \ asm volatile ( \ "rep; movsb\n\t" \ :"=&D"(dst), "=&S"(src), "=&c"(dummy) \ :"0" (dst), "1" (src),"2" (n) \ : "memory"); /** * SIMD Optimized memcpy's are graciously borrowed from DirectFB. */ # define SSE_MMREG_SIZE 16 # define MIN_LEN 0x40 /* 64-byte blocks */ void *memcpy_sse( void *dst, const void *src, size_t len ) { void *retval = dst; size_t i; _mm_prefetch((char*) src, _MM_HINT_NTA); _mm_prefetch((char*) src + 64, _MM_HINT_NTA); _mm_prefetch((char*) src + 128, _MM_HINT_NTA); _mm_prefetch((char*) src + 192, _MM_HINT_NTA); _mm_prefetch((char*) src + 256, _MM_HINT_NTA); if (len >= MIN_LEN) { register unsigned long int delta; delta = ((unsigned long int) dst) & (SSE_MMREG_SIZE - 1); if (delta) { delta = SSE_MMREG_SIZE - delta; len -= delta; small_memcpy(dst, src, delta); } i = len >> 6; len &= 63; if (((unsigned long) src) & 15) for (; i > 0; i--) { __m128 xmm0, xmm1, xmm2, xmm3; _mm_prefetch((char*) src + 320, _MM_HINT_NTA); xmm0 = _mm_loadu_ps((float*) src); xmm1 = _mm_loadu_ps((float*) src + 4); xmm2 = _mm_loadu_ps((float*) src + 8); xmm3 = _mm_loadu_ps((float*) src + 12); _mm_stream_ps((float*) dst, xmm0); _mm_stream_ps((float*) dst + 4, xmm1); _mm_stream_ps((float*) dst + 8, xmm2); _mm_stream_ps((float*) dst + 12, xmm3); #ifdef __GNUC__ (char*) src += 64; (char*) dst += 64; #else src += 64; dst += 64; #endif } else for (; i > 0; i--) { __m128 xmm0, xmm1, xmm2, xmm3; _mm_prefetch((char*) src + 320, _MM_HINT_NTA); xmm0 = _mm_load_ps((float*) src); xmm1 = _mm_load_ps((float*) src + 4); xmm2 = _mm_load_ps((float*) src + 8); xmm3 = _mm_load_ps((float*) src + 12); _mm_stream_ps((float*) dst, xmm0); _mm_stream_ps((float*) dst + 4, xmm1); _mm_stream_ps((float*) dst + 8, xmm2); _mm_stream_ps((float*) dst + 12, xmm3); #ifdef __GNUC__ (char*) src += 64; (char*) dst += 64; #else src += 64; dst += 64; #endif } } if (len) memcpy(dst, src, len); return retval; } int main(void) { char *tmp1, *tmp2; (void*) tmp1 = malloc(1024 * 1024 * 10); (void*) tmp2 = malloc(1024 * 1024 * 10); memcpy_sse(tmp1, tmp2, 1024 * 1024 * 10); free(tmp1); free(tmp2); return 0; } >Fix: >Release-Note: >Audit-Trail: >Unformatted: