public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug c/23570] New: Internal compiler error
@ 2005-08-26 2:32 chen at sys dot wakayama-u dot ac dot jp
2005-08-26 2:50 ` [Bug target/23570] " pinskia at gcc dot gnu dot org
` (9 more replies)
0 siblings, 10 replies; 11+ messages in thread
From: chen at sys dot wakayama-u dot ac dot jp @ 2005-08-26 2:32 UTC (permalink / raw)
To: gcc-bugs
The compiler gives internal compiler error when I try to compile my program with
-O2.
If I compile with -O1, it's OK.
% gcc -O2 -msse2 a.c
a.c: In function 'ludcompf':
a.c:505: internal compiler error: in merge_assigned_reloads, at reload1.c:6091
Please submit a full bug report,
with preprocessed source if appropriate.
See <URL:http://gcc.gnu.org/bugs.html> for instructions.
gcc -v
Using built-in specs.
Target: i686-pc-linux-gnu
Configured with: ../gcc-4.0.2/configure --prefix=/usr --libexecdir=/usr/lib
--enable-shared --enable-threads=posix --enable-__cxa_atexit
--enable-clocale=gnu --enable-libada
--enable-languages=c,ada,c++,f95,java,objc,treelang
Thread model: posix
gcc version 4.0.2 20050825 (prerelease)
/* a.c */
extern int printf (__const char *__restrict __format, ...);
extern double fabs (double __x) __attribute__ ((__nothrow__)) __attribute__
((__const__)); extern double __fabs (double __x) __attribute__ ((__nothrow__))
__attribute__ ((__const__));
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
typedef float __m128 __attribute__ ((__vector_size__ (16)));
static __inline __m128
_mm_setzero_ps (void)
{
return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
}
static __inline __m128
_mm_max_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
}
static __inline __m128
_mm_cmpeq_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
}
static __inline __m128
_mm_set1_ps (float __F)
{
return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
}
static __inline __m128
_mm_and_ps (__m128 __A, __m128 __B)
{
return __builtin_ia32_andps (__A, __B);
}
static __inline __m128
_mm_loadu_ps (float const *__P)
{
return (__m128) __builtin_ia32_loadups (__P);
}
static __inline __m128
_mm_setr_ps (float __Z, float __Y, float __X, float __W)
{
return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
}
static __inline void
_mm_storeu_ps (float *__P, __m128 __A)
{
__builtin_ia32_storeups (__P, (__v4sf)__A);
}
static __inline __m128
_mm_add_ps (__m128 __A, __m128 __B)
{
return (__m128)__builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
}
static __inline __m128
_mm_sub_ps (__m128 __A, __m128 __B)
{
return (__m128)__builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
}
static __inline __m128
_mm_mul_ps (__m128 __A, __m128 __B)
{
return (__m128)__builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
}
typedef double __v2df __attribute__ ((__vector_size__ (16)));
typedef long long __v2di __attribute__ ((__vector_size__ (16)));
typedef int __v4si __attribute__ ((__vector_size__ (16)));
typedef __v2di __m128i;
typedef __v2df __m128d;
static __inline __m128d
_mm_set1_pd (double __F)
{
return __extension__ (__m128d){ __F, __F };
}
static __inline __m128d
_mm_setr_pd (double __W, double __X)
{
return __extension__ (__m128d){ __W, __X };
}
static __inline __m128d
_mm_loadu_pd (double const *__P)
{
return __builtin_ia32_loadupd (__P);
}
static __inline void
_mm_storeu_pd (double *__P, __m128d __A)
{
__builtin_ia32_storeupd (__P, __A);
}
static __inline __m128d
_mm_set_sd (double __F)
{
return __extension__ (__m128d){ __F, 0 };
}
static __inline __m128d
_mm_load_sd (double const *__P)
{
return _mm_set_sd (*__P);
}
static __inline __m128d
_mm_and_pd (__m128d __A, __m128d __B)
{
return __builtin_ia32_andpd (__A, __B);
}
static __inline __m128d se2_abssd(__m128d a)
{
static const union {
__m128d m;
unsigned int i[4];
} u = {
.i[0] = 0xffffffffUL, .i[1] = 0x7fffffffUL,
.i[2] = 0xffffffffUL, .i[3] = 0xffffffffUL
};
__m128d msk = u.m;
return (__m128d)_mm_and_pd(a, msk);
}
static __inline __m128d
_mm_add_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
_mm_sub_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
_mm_mul_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
_mm_mul_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
_mm_max_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
_mm_unpackhi_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
_mm_cmpeq_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
}
static __inline int
_mm_comilt_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
}
static __inline __m128i
_mm_add_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
}
static __inline __m128i
_mm_and_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
}
static __inline __m128i
_mm_andnot_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
}
static __inline __m128i
_mm_or_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
}
typedef union {
__m128 xmm;
__m128i xmmi;
__m128d xmmd;
long long di[2];
unsigned long long udi[4];
int si[4];
unsigned int usi[4];
short hi[8];
unsigned short uhi[8];
signed char qi[16];
unsigned char uqi[16];
double df[2];
float sf[4];
} __attribute__ ((aligned(16))) um128;
static __inline __m128 se_absps(__m128 a)
{
static const union {
__m128 m;
unsigned int i[4];
} u = {
.i[0] = 0x7fffffffUL, .i[1] = 0x7fffffffUL,
.i[2] = 0x7fffffffUL, .i[3] = 0x7fffffffUL
};
__m128 msk = u.m;
return (__m128)_mm_and_ps(a, msk);
}
static __inline __m128d se2_abspd(__m128d a)
{
static const union {
__m128d m;
unsigned int i[4];
} u = {
.i[0] = 0xffffffffUL, .i[1] = 0x7fffffffUL,
.i[2] = 0xffffffffUL, .i[3] = 0x7fffffffUL
};
__m128 msk = u.m;
return (__m128d)_mm_and_pd(a, msk);
}
static void swap_index(int *prow, int n1, int n2)
{
int *p1 = prow + n1;
int *p2 = prow + n2;
n1 = *p1;
n2 = *p2;
*p1 = n2;
*p2 = n1;
}
static int sse2_max_abs_index(double *v, int step, int n)
{
__m128d m1, mm;
__m128i mi1, mim, mi, msk;
um128 u;
double *v2end;
int step2, n2;
static const um128 i0i1 = {
.si[0]=0, .si[1]=0, .si[2]=1, .si[3] = 0
};
static const um128 i1i1 = {
.si[0]=2, .si[1]=0, .si[2]=2, .si[3] = 0
};
for (n2 = 0; n2 < n; ++n2) printf("%f ", v[step * n2]); printf("\n");
if (n <= 1) return 0;
step2 = step + step;
v2end = v + (n / 2) * step2;
mm = se2_abspd(_mm_setr_pd(v[0], v[step]));
v += step2;
mi1 = i1i1.xmmi;
mim = mi = i0i1.xmmi;
while (v < v2end) {
mi = _mm_add_epi32(mi, mi1);
m1 = se2_abspd(_mm_setr_pd(v[0], v[step]));
v += step2;
mm = _mm_max_pd(mm, m1);
msk = (__m128i)_mm_cmpeq_pd(m1, mm);
mim = _mm_or_si128(_mm_and_si128(msk, mi), _mm_andnot_si128(msk, mim));
}
if (n & 1) {
mi = _mm_add_epi32(mi, mi1);
m1 = se2_abssd(_mm_load_sd(v));
mm = _mm_max_pd(mm, m1);
msk = (__m128i)_mm_cmpeq_pd(m1, mm);
mim = _mm_or_si128(_mm_and_si128(msk, mi), _mm_andnot_si128(msk, mim));
}
m1 = _mm_unpackhi_pd(mm, mm);
u.xmmi = mim;
if (_mm_comilt_sd(mm, m1))
return u.si[2];
return u.si[0];
}
static void sse2_add_row(double *dst, double *src, double k, int n)
{
double *dst2end = dst + (n / 2) * 2;
__m128d mk = _mm_set1_pd(k);
while (dst < dst2end) {
__m128d s = _mm_loadu_pd(src);
__m128d d = _mm_loadu_pd(dst);
s = _mm_mul_pd(s, mk);
d = _mm_add_pd(d, s);
_mm_storeu_pd(dst, d);
src += 2;
dst += 2;
}
if (n & 1) {
dst[0] += k * src[0];
}
}
static void sse2_swap_row(double *r1, double *r2, int n)
{
double *r12end = r1 + (n / 2) * 2;
while (r1 < r12end) {
__m128d v1 = _mm_loadu_pd(r1);
__m128d v2 = _mm_loadu_pd(r2);
_mm_storeu_pd(r1, v2);
_mm_storeu_pd(r2, v1);
r1 += 2;
r2 += 2;
}
if (n & 1) {
double t = *r1;
*r1 = *r2;
*r2 = t;
}
}
static int sse_max_abs_indexf(float *v, int step, int n)
{
__m128 m1, mm;
__m128i mi1, mim, mi, msk;
um128 u, ui;
float *v4end, t;
int n4, step2, step3, step4;
static const um128 i0123 = {
.si[0]=0, .si[1]=1, .si[2]=2, .si[3]=3
};
static const um128 i1111 = {
.si[0]=4, .si[1]=4, .si[2]=4, .si[3]=4
};
if (n <= 1) return 0;
n4 = (n / 4) * 4;
mi1 = i1111.xmmi;
mim = mi = i0123.xmmi;
mm = _mm_setzero_ps();
if (n4 > 0) {
step2 = step + step;
step3 = step2 + step;
step4 = step2 + step2;
v4end = v + n4 * step;
mm = se_absps(_mm_setr_ps(v[0], v[step], v[step2], v[step3]));
v += step4;
mi = _mm_add_epi32(mi, mi1);
while (v < v4end) {
m1 = se_absps(_mm_setr_ps(v[0], v[step], v[step2], v[step3]));
mm = _mm_max_ps(mm, m1);
msk = (__m128i)_mm_cmpeq_ps(m1, mm);
mim = _mm_or_si128(_mm_and_si128(msk, mi), _mm_andnot_si128(msk, mim));
v += step4;
mi = _mm_add_epi32(mi, mi1);
}
}
n4 = n - n4;
if (n4) {
int i;
u.xmm = _mm_setzero_ps();
for (i = 0; i < n4; ++i) {
u.sf[i] = v[0];
v += step;
}
m1 = se_absps(u.xmm);
mm = _mm_max_ps(mm, m1);
msk = (__m128i)_mm_cmpeq_ps(m1, mm);
mim = _mm_or_si128(_mm_and_si128(msk, mi), _mm_andnot_si128(msk, mim));
}
ui.xmmi = mim;
u.xmm = mm;
t = u.sf[0];
n = 0;
if (u.sf[1] > t) { t = u.sf[1]; n = 1; }
if (u.sf[2] > t) { t = u.sf[2]; n = 2; }
if (u.sf[3] > t) { t = u.sf[3]; n = 3; }
return ui.si[n];
}
static void sse_add_rowf(float *dst, float *src, float k, int n)
{
int n4 = (n / 4) * 4;
int i;
float *dst4end = dst + n4;
__m128 mk = _mm_set1_ps(k);
while (dst < dst4end) {
__m128 s = _mm_loadu_ps(src);
__m128 d = _mm_loadu_ps(dst);
s = _mm_mul_ps(s, mk);
d = _mm_add_ps(d, s);
_mm_storeu_ps(dst, d);
src += 4;
dst += 4;
}
n4 = n - n4;
for (i = 0; i < n4; ++i) {
dst[i] += k * src[i];
}
}
static void sse_swap_rowf(float *r1, float *r2, int n)
{
int i;
int n4 = (n / 4) * 4;
float *r14end = r1 + n4;
while (r1 < r14end) {
__m128 v1 = _mm_loadu_ps(r1);
__m128 v2 = _mm_loadu_ps(r2);
_mm_storeu_ps(r1, v2);
_mm_storeu_ps(r2, v1);
r1 += 4;
r2 += 4;
}
r14end = r1 + n - n4;
while (r1 < r14end) {
float t = *r1;
*r1 = *r2;
*r2 = t;
r1++;
r2++;
}
}
int
ludcompd(double *m, int nw, int *prow, int n)
{
int i, s = 0;
double *pm;
for (i = 0; i < n; ++i) prow[i] = i;
printf("ludcompd(): SSE2 code is used.\n");
for (i = 0, pm = m; i < n - 1; ++i, pm += nw) {
int vi = sse2_max_abs_index(pm + i, nw, n - i);
double r, *pt;
int j;
if (vi != 0) {
sse2_swap_row(pm, pm + vi * nw, nw);
swap_index(prow, i, i + vi);
s = 1 - s;
}
r = pm[i];
for (j = i + 1, pt = pm + nw; j < n; ++j, pt += nw) {
double k = pt[i] / r;
pt[i] = k;
sse2_add_row(pt + i + 1, pm + i + 1, -k, n - i - 1);
}
}
return s;
}
int
ludcompf(float *m, int nw, int *prow, int n)
{
int i, s = 0;
float *pm;
for (i = 0; i < n; ++i) prow[i] = i;
printf("ludcompf(): SSE2 code is used.\n");
for (i = 0, pm = m; i < n - 1; ++i, pm += nw) {
int vi = sse_max_abs_indexf(pm + i, nw, n - i);
float r, *pt;
int j;
if (vi != 0) {
sse_swap_rowf(pm, pm + vi * nw, nw);
swap_index(prow, i, i + vi);
s = 1 - s;
}
r = pm[i];
for (j = i + 1, pt = pm + nw; j < n; ++j, pt += nw) {
float k = pt[i] / r;
pt[i] = k;
sse_add_rowf(pt + i + 1, pm + i + 1, -k, n - i - 1);
}
}
return s;
}
void test_ludcompd(void)
{
static double m[4][4] = {
{ 1, 2, 3, 4 },
{ 4, 2, 1, 7 },
{ 5, 6, 10, 78 },
{ 3, 2, 1, 0 }
};
int p[4];
printf("%d\n", ludcompd(&m[0][0], 4, p, 4));
printf("%d %d %d %d\n", p[0], p[1], p[2], p[3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[0][0], m[0][1], m[0][2], m[0][3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[1][0], m[1][1], m[1][2], m[1][3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[2][0], m[2][1], m[2][2], m[2][3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[3][0], m[3][1], m[3][2], m[3][3]);
}
void test_ludcompf(void)
{
static float m[4][4] = {
{ 1, 2, 3, 4 },
{ 4, 2, 1, 7 },
{ 5, 6, 10, 78 },
{ 3, 2, 1, 0 }
};
int p[4];
printf("%d\n", ludcompf(&m[0][0], 4, p, 4));
printf("%d %d %d %d\n", p[0], p[1], p[2], p[3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[0][0], m[0][1], m[0][2], m[0][3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[1][0], m[1][1], m[1][2], m[1][3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[2][0], m[2][1], m[2][2], m[2][3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[3][0], m[3][1], m[3][2], m[3][3]);
}
int main()
{
test_ludcompd();
test_ludcompf();
return 0;
}
--
Summary: Internal compiler error
Product: gcc
Version: 4.0.2
Status: UNCONFIRMED
Severity: normal
Priority: P2
Component: c
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: chen at sys dot wakayama-u dot ac dot jp
CC: gcc-bugs at gcc dot gnu dot org
GCC build triplet: i686-pc-linux-gnu
GCC host triplet: i686-pc-linux-gnu
GCC target triplet: i686-pc-linux-gnu
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in import_export_decl, at cp/decl2.c:1726
2005-08-26 2:32 [Bug c/23570] New: Internal compiler error chen at sys dot wakayama-u dot ac dot jp
2005-08-26 2:50 ` [Bug target/23570] " pinskia at gcc dot gnu dot org
@ 2005-08-26 2:50 ` pinskia at gcc dot gnu dot org
2005-08-26 2:58 ` pinskia at gcc dot gnu dot org
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2005-08-26 2:50 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From pinskia at gcc dot gnu dot org 2005-08-26 02:50 -------
Reducing.
--
What |Removed |Added
----------------------------------------------------------------------------
CC| |pinskia at gcc dot gnu dot
| |org
Known to fail| |4.0.0 4.1.0
Known to work| |3.4.0
Summary|Internal compiler error |[4.0/4.1 Regression]
| |internal compiler error: in
| |import_export_decl, at
| |cp/decl2.c:1726
Target Milestone|--- |4.0.2
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Bug target/23570] Internal compiler error
2005-08-26 2:32 [Bug c/23570] New: Internal compiler error chen at sys dot wakayama-u dot ac dot jp
@ 2005-08-26 2:50 ` pinskia at gcc dot gnu dot org
2005-08-26 2:50 ` [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in import_export_decl, at cp/decl2.c:1726 pinskia at gcc dot gnu dot org
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2005-08-26 2:50 UTC (permalink / raw)
To: gcc-bugs
--
What |Removed |Added
----------------------------------------------------------------------------
Component|c |target
Keywords| |ice-on-valid-code
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in import_export_decl, at cp/decl2.c:1726
2005-08-26 2:32 [Bug c/23570] New: Internal compiler error chen at sys dot wakayama-u dot ac dot jp
2005-08-26 2:50 ` [Bug target/23570] " pinskia at gcc dot gnu dot org
2005-08-26 2:50 ` [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in import_export_decl, at cp/decl2.c:1726 pinskia at gcc dot gnu dot org
@ 2005-08-26 2:58 ` pinskia at gcc dot gnu dot org
2005-08-26 3:36 ` [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in merge_assigned_reloads, at reload1.c:6091 pinskia at gcc dot gnu dot org
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2005-08-26 2:58 UTC (permalink / raw)
To: gcc-bugs
--
What |Removed |Added
----------------------------------------------------------------------------
GCC build triplet|i686-pc-linux-gnu |
GCC host triplet|i686-pc-linux-gnu |
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in merge_assigned_reloads, at reload1.c:6091
2005-08-26 2:32 [Bug c/23570] New: Internal compiler error chen at sys dot wakayama-u dot ac dot jp
` (2 preceding siblings ...)
2005-08-26 2:58 ` pinskia at gcc dot gnu dot org
@ 2005-08-26 3:36 ` pinskia at gcc dot gnu dot org
2005-08-26 3:48 ` pinskia at gcc dot gnu dot org
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2005-08-26 3:36 UTC (permalink / raw)
To: gcc-bugs
--
What |Removed |Added
----------------------------------------------------------------------------
Summary|[4.0/4.1 Regression] |[4.0/4.1 Regression]
|internal compiler error: in |internal compiler error: in
|import_export_decl, at |merge_assigned_reloads, at
|cp/decl2.c:1726 |reload1.c:6091
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in merge_assigned_reloads, at reload1.c:6091
2005-08-26 2:32 [Bug c/23570] New: Internal compiler error chen at sys dot wakayama-u dot ac dot jp
` (3 preceding siblings ...)
2005-08-26 3:36 ` [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in merge_assigned_reloads, at reload1.c:6091 pinskia at gcc dot gnu dot org
@ 2005-08-26 3:48 ` pinskia at gcc dot gnu dot org
2005-08-26 7:52 ` uros at kss-loka dot si
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2005-08-26 3:48 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From pinskia at gcc dot gnu dot org 2005-08-26 03:36 -------
Reduced as far as I can get this:
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
typedef float __m128 __attribute__ ((__vector_size__ (16)));
static __inline __m128 _mm_cmpeq_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
}
static __inline __m128 _mm_setr_ps (float __Z, float __Y, float __X, float __W)
{
return __extension__ (__m128)(__v4sf){__Z, __Y, __X, __W };
}
typedef long long __v2di __attribute__ ((__vector_size__ (16)));
static __inline __m128 _mm_and_si128 (__m128 __A, __m128 __B) {
return (__m128)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
}
static __inline __m128 _mm_or_si128 (__m128 __A, __m128 __B) {
return (__m128)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
}
typedef union { __m128 xmmi; int si[4]; } __attribute__ ((aligned(16))) um128;
um128 u;
static inline int sse_max_abs_indexf(float *v, int step, int n)
{
__m128 m1, mm;
__m128 mim, mi, msk;
um128 u, ui;
int n4, step2, step3;
mm = __builtin_ia32_andps((__m128)(__v4sf){0.0, v[step], v[step2], v[step3]},
u.xmmi);
if (n4) {
int i;
for (i = 0; i < n4; ++i) ;
msk = (__m128)_mm_cmpeq_ps(m1, mm);
mim = _mm_or_si128(_mm_and_si128(msk, mi), mim);
}
ui.xmmi = (__m128)mim;
return ui.si[n];
}
static void sse_swap_rowf(float *r1, float *r2, int n) {
int n4 = (n / 4) * 4;
float *r14end = r1 + n4;
while (r1 < r14end) {
*r1 = *r2;
r1++;
}
}
void ludcompf(float *m, int nw, int *prow, int n) {
int i, s = 0;
float *pm;
for (i = 0, pm = m; i < n - 1; ++i, pm += nw)
{
int vi = sse_max_abs_indexf(pm + i, nw, n - i);
float *pt;
int j;
if (vi != 0)
{
sse_swap_rowf(pm, pm + vi * nw, nw);
swap_index(prow, i, i + vi);
}
for (j = i + 1, pt = pm + nw; j < n; ++j, pt += nw)
sse_add_rowf(pt + i + 1, pm + i + 1, -1.0, n - i - 1);
}
}
--
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |NEW
Ever Confirmed| |1
Last reconfirmed|0000-00-00 00:00:00 |2005-08-26 03:36:35
date| |
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in merge_assigned_reloads, at reload1.c:6091
2005-08-26 2:32 [Bug c/23570] New: Internal compiler error chen at sys dot wakayama-u dot ac dot jp
` (4 preceding siblings ...)
2005-08-26 3:48 ` pinskia at gcc dot gnu dot org
@ 2005-08-26 7:52 ` uros at kss-loka dot si
2005-08-26 10:28 ` uros at kss-loka dot si
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: uros at kss-loka dot si @ 2005-08-26 7:52 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From uros at kss-loka dot si 2005-08-26 07:50 -------
The problem here is in the sse_concatv2sf pattern:
;; ??? In theory we can match memory for the MMX alternative, but allowing
;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
;; alternatives pretty much forces the MMX alternative to be chosen.
(define_insn "*sse_concatv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
(vec_concat:V2SF
(match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
(match_operand:SF 2 "vector_move_operand" " x,C,*y, C")))]
and "vector_move_operand" operand constraint, defined as:
;; Return 1 when OP is operand acceptable for standard SSE move.
(define_predicate "vector_move_operand"
(ior (match_operand 0 "nonimmediate_operand")
(match_operand 0 "const0_operand")))
Please note, that "vector_move_operand" allows memory operands, but register
constraint doesn't. So, following pattern confuses reload:
(insn:HI 63 62 64 3 (set (reg:V2SF 21 xmm0 [117])
(vec_concat:V2SF (mem:SF (plus:SI (plus:SI (reg/f:SI 68 [ ivtmp.71 ])
(reg:SI 88 [ D.1795 ]))
(const_int -4 [0xfffffffc])) [2 S4 A32])
(mem:SF (plus:SI (plus:SI (reg/f:SI 68 [ ivtmp.71 ])
(reg:SI 89 [ D.1800 ]))
(const_int -4 [0xfffffffc])) [2 S4 A32]))) 612
{*sse_concatv2sf} (nil)
(BTW: "sse2_loadld" pattern could have the same problem, no "m" register
constraint.)
The immediate fix would be to define another operand constraint, similar
to "vector_move_operand":
;; Same as above, but excluding memory operands.
(define_predicate "vector_move_nomem_operand"
(ior (match_operand 0 "register_operand")
(match_operand 0 "const0_operand")))
When operand 2 of sse_concatv2sf pattern is constrained with this new
constraint, gcc is able to compile both testcases, and following result is
produced (for both -01 and -02):
ludcompd(): SSE2 code is used.
1.000000 4.000000 5.000000 3.000000
-2.800000 0.800000 -1.600000
-1.000000 -1.000000
0
2 1 3 0
5.000 6.000 10.000 78.000
0.800 -2.800 -7.000 -55.400
0.600 0.571 -1.000 -15.143
0.200 -0.286 1.000 -12.286
ludcompf(): SSE2 code is used.
1
2 1 0 3
5.000 6.000 10.000 78.000
0.800 -2.800 -7.000 -55.400
0.200 -0.286 -1.000 -27.429
0.600 0.571 1.000 12.286
Unfortunatelly, ludcompf() result (the second one) is wrong when -O1 or -O2 is
used. It is correct without optimizations.
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in merge_assigned_reloads, at reload1.c:6091
2005-08-26 2:32 [Bug c/23570] New: Internal compiler error chen at sys dot wakayama-u dot ac dot jp
` (5 preceding siblings ...)
2005-08-26 7:52 ` uros at kss-loka dot si
@ 2005-08-26 10:28 ` uros at kss-loka dot si
2005-08-31 8:51 ` uros at kss-loka dot si
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: uros at kss-loka dot si @ 2005-08-26 10:28 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From uros at kss-loka dot si 2005-08-26 09:35 -------
(In reply to comment #3)
> Unfortunatelly, ludcompf() result (the second one) is wrong when -O1 or -O2
> is used. It is correct without optimizations.
This is a problem of infamous i387 precision handling. The error can be found
in this part of the code:
...
if (u.sf[1] > t) { t = u.sf[1]; n = 1; }
if (u.sf[2] > t) { t = u.sf[2]; n = 2; }
if (u.sf[3] > t) { t = u.sf[3]; n = 3; }
...
Without optimizations, the values of u.sf[1] and t that are at some moment
loaded into x87 registers are:
u.sf[1] = 1.000000119...
t = 0.999999880...
and branch is taken. However, with optimizations, the values are different:
u.sf[1] = 0.999999642...
t = 0.999999821...
This is a problem of the i387 design and not the problem of gcc. In your case,
you should use -ffloat-store or -mfpmath=sse.
BTW: At the moment, I have very limited time, so I won't be able to create a
patch to fix the ICE for some time...
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in merge_assigned_reloads, at reload1.c:6091
2005-08-26 2:32 [Bug c/23570] New: Internal compiler error chen at sys dot wakayama-u dot ac dot jp
` (6 preceding siblings ...)
2005-08-26 10:28 ` uros at kss-loka dot si
@ 2005-08-31 8:51 ` uros at kss-loka dot si
2005-08-31 17:34 ` cvs-commit at gcc dot gnu dot org
2005-09-27 15:57 ` [Bug target/23570] [4.0 " mmitchel at gcc dot gnu dot org
9 siblings, 0 replies; 11+ messages in thread
From: uros at kss-loka dot si @ 2005-08-31 8:51 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From uros at kss-loka dot si 2005-08-31 08:48 -------
Patch.
--
What |Removed |Added
----------------------------------------------------------------------------
AssignedTo|unassigned at gcc dot gnu |uros at kss-loka dot si
|dot org |
URL| |http://gcc.gnu.org/ml/gcc-
| |patches/2005-
| |08/msg01819.html
Status|NEW |ASSIGNED
Keywords| |patch
Last reconfirmed|2005-08-26 03:36:35 |2005-08-31 08:48:43
date| |
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in merge_assigned_reloads, at reload1.c:6091
2005-08-26 2:32 [Bug c/23570] New: Internal compiler error chen at sys dot wakayama-u dot ac dot jp
` (7 preceding siblings ...)
2005-08-31 8:51 ` uros at kss-loka dot si
@ 2005-08-31 17:34 ` cvs-commit at gcc dot gnu dot org
2005-09-27 15:57 ` [Bug target/23570] [4.0 " mmitchel at gcc dot gnu dot org
9 siblings, 0 replies; 11+ messages in thread
From: cvs-commit at gcc dot gnu dot org @ 2005-08-31 17:34 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From cvs-commit at gcc dot gnu dot org 2005-08-31 17:28 -------
Subject: Bug 23570
CVSROOT: /cvs/gcc
Module name: gcc
Changes by: rth@gcc.gnu.org 2005-08-31 17:27:54
Modified files:
gcc : ChangeLog
gcc/config/i386: sse.md
Added files:
gcc/testsuite/gcc.target/i386: pr23570.c
Log message:
PR target/23570
* config/i386/sse.md (*sse_concatv2sf): Change operand 2 constraint
to "reg_or_0_operand".
(sse2_loadld): Change operand 1 constraint to "reg_or_0_operand".
Patches:
http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/gcc/ChangeLog.diff?cvsroot=gcc&r1=2.9863&r2=2.9864
http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/gcc/config/i386/sse.md.diff?cvsroot=gcc&r1=1.23&r2=1.24
http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/gcc/testsuite/gcc.target/i386/pr23570.c.diff?cvsroot=gcc&r1=NONE&r2=1.1
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Bug target/23570] [4.0 Regression] internal compiler error: in merge_assigned_reloads, at reload1.c:6091
2005-08-26 2:32 [Bug c/23570] New: Internal compiler error chen at sys dot wakayama-u dot ac dot jp
` (8 preceding siblings ...)
2005-08-31 17:34 ` cvs-commit at gcc dot gnu dot org
@ 2005-09-27 15:57 ` mmitchel at gcc dot gnu dot org
9 siblings, 0 replies; 11+ messages in thread
From: mmitchel at gcc dot gnu dot org @ 2005-09-27 15:57 UTC (permalink / raw)
To: gcc-bugs
--
What |Removed |Added
----------------------------------------------------------------------------
Target Milestone|4.0.2 |4.0.3
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2005-09-27 15:57 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-08-26 2:32 [Bug c/23570] New: Internal compiler error chen at sys dot wakayama-u dot ac dot jp
2005-08-26 2:50 ` [Bug target/23570] " pinskia at gcc dot gnu dot org
2005-08-26 2:50 ` [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in import_export_decl, at cp/decl2.c:1726 pinskia at gcc dot gnu dot org
2005-08-26 2:58 ` pinskia at gcc dot gnu dot org
2005-08-26 3:36 ` [Bug target/23570] [4.0/4.1 Regression] internal compiler error: in merge_assigned_reloads, at reload1.c:6091 pinskia at gcc dot gnu dot org
2005-08-26 3:48 ` pinskia at gcc dot gnu dot org
2005-08-26 7:52 ` uros at kss-loka dot si
2005-08-26 10:28 ` uros at kss-loka dot si
2005-08-31 8:51 ` uros at kss-loka dot si
2005-08-31 17:34 ` cvs-commit at gcc dot gnu dot org
2005-09-27 15:57 ` [Bug target/23570] [4.0 " mmitchel at gcc dot gnu dot org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).