* [patch, fortran] Reduce stack use in blocked matmul
@ 2017-05-05 20:35 Thomas Koenig
2017-05-08 17:02 ` Jerry DeLisle
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Thomas Koenig @ 2017-05-05 20:35 UTC (permalink / raw)
To: fortran, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1262 bytes --]
Hello world,
the attached patch reduces the stack usage by the blocked
version of matmul for cases where we don't need the full buffer.
This should improve stack usage.
Regression-tested. I also added a stress test (around 3 secs of
CPU time on my system), it will only run once due to the "dg-do run"
hack).
OK for trunk?
Thomas
2017-05-05 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/80602
* m4/matmul_internal.m4: 'matmul_name`: Change
t1 to a VLA of the required size.
* generated/matmul_c10.c: Regenerated.
* generated/matmul_c16.c: Regenerated.
* generated/matmul_c4.c: Regenerated.
* generated/matmul_c8.c: Regenerated.
* generated/matmul_i1.c: Regenerated.
* generated/matmul_i16.c: Regenerated.
* generated/matmul_i2.c: Regenerated.
* generated/matmul_i4.c: Regenerated.
* generated/matmul_i8.c: Regenerated.
* generated/matmul_r10.c: Regenerated.
* generated/matmul_r16.c: Regenerated.
* generated/matmul_r4.c: Regenerated.
* generated/matmul_r8.c: Regenerated.
2017-05-05 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/80602
* gfortran.dg/matmul_15.f90: New test case.
[-- Attachment #2: p1.diff --]
[-- Type: text/x-patch, Size: 65359 bytes --]
Index: generated/matmul_c10.c
===================================================================
--- generated/matmul_c10.c (Revision 247566)
+++ generated/matmul_c10.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_c10_avx (gfc_array_c10 * const restrict ret
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_10 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_c10_avx (gfc_array_c10 * const restrict ret
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict re
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_10 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict re
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_10 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_10 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_c10 (gfc_array_c10 * const restrict retarra
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_10 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_c10 (gfc_array_c10 * const restrict retarra
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: generated/matmul_c16.c
===================================================================
--- generated/matmul_c16.c (Revision 247566)
+++ generated/matmul_c16.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_c16_avx (gfc_array_c16 * const restrict ret
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_c16_avx (gfc_array_c16 * const restrict ret
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict re
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict re
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_c16 (gfc_array_c16 * const restrict retarra
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_c16 (gfc_array_c16 * const restrict retarra
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: generated/matmul_c4.c
===================================================================
--- generated/matmul_c4.c (Revision 247566)
+++ generated/matmul_c4.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_c4_avx (gfc_array_c4 * const restrict retar
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_c4_avx (gfc_array_c4 * const restrict retar
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict reta
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict reta
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: generated/matmul_c8.c
===================================================================
--- generated/matmul_c8.c (Revision 247566)
+++ generated/matmul_c8.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_c8_avx (gfc_array_c8 * const restrict retar
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_c8_avx (gfc_array_c8 * const restrict retar
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict reta
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict reta
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_COMPLEX_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: generated/matmul_i1.c
===================================================================
--- generated/matmul_i1.c (Revision 247566)
+++ generated/matmul_i1.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retar
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_1 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_i1_avx (gfc_array_i1 * const restrict retar
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict reta
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_1 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict reta
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_1 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_1 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_1 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: generated/matmul_i16.c
===================================================================
--- generated/matmul_i16.c (Revision 247566)
+++ generated/matmul_i16.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict ret
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_i16_avx (gfc_array_i16 * const restrict ret
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict re
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict re
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarra
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_i16 (gfc_array_i16 * const restrict retarra
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: generated/matmul_i2.c
===================================================================
--- generated/matmul_i2.c (Revision 247566)
+++ generated/matmul_i2.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retar
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_i2_avx (gfc_array_i2 * const restrict retar
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict reta
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict reta
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: generated/matmul_i4.c
===================================================================
--- generated/matmul_i4.c (Revision 247566)
+++ generated/matmul_i4.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retar
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_i4_avx (gfc_array_i4 * const restrict retar
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict reta
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict reta
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: generated/matmul_i8.c
===================================================================
--- generated/matmul_i8.c (Revision 247566)
+++ generated/matmul_i8.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retar
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_i8_avx (gfc_array_i8 * const restrict retar
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict reta
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict reta
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: generated/matmul_r10.c
===================================================================
--- generated/matmul_r10.c (Revision 247566)
+++ generated/matmul_r10.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_r10_avx (gfc_array_r10 * const restrict ret
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_10 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_r10_avx (gfc_array_r10 * const restrict ret
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict re
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_10 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict re
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_10 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_10 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_r10 (gfc_array_r10 * const restrict retarra
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_10 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_r10 (gfc_array_r10 * const restrict retarra
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: generated/matmul_r16.c
===================================================================
--- generated/matmul_r16.c (Revision 247566)
+++ generated/matmul_r16.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_r16_avx (gfc_array_r16 * const restrict ret
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_r16_avx (gfc_array_r16 * const restrict ret
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict re
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict re
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_r16 (gfc_array_r16 * const restrict retarra
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_16 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_r16 (gfc_array_r16 * const restrict retarra
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: generated/matmul_r4.c
===================================================================
--- generated/matmul_r4.c (Revision 247566)
+++ generated/matmul_r4.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_r4_avx (gfc_array_r4 * const restrict retar
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_r4_avx (gfc_array_r4 * const restrict retar
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict reta
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict reta
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_4 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: generated/matmul_r8.c
===================================================================
--- generated/matmul_r8.c (Revision 247566)
+++ generated/matmul_r8.c (Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_r8_avx (gfc_array_r8 * const restrict retar
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_r8_avx (gfc_array_r8 * const restrict retar
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict reta
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict reta
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict r
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict r
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
@@ -2508,8 +2548,7 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_REAL_8 t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -2533,6 +2572,17 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
Index: m4/matmul_internal.m4
===================================================================
--- m4/matmul_internal.m4 (Revision 247566)
+++ m4/matmul_internal.m4 (Arbeitskopie)
@@ -202,8 +202,7 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
i1, i2, i3, i4, i5, i6;
/* Local variables */
- 'rtype_name` t1[65536], /* was [256][256] */
- f11, f12, f21, f22, f31, f32, f41, f42,
+ 'rtype_name` f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -227,6 +226,17 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
if (m == 0 || n == 0 || k == 0)
return;
+ /* Adjust size of t1 to what is needed. */
+ index_type t1_dim;
+ t1_dim = (a_dim1-1) * 256 + b_dim1;
+ if (t1_dim > 65536)
+ t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+ 'rtype_name` t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
[-- Attachment #3: matmul_15.f90 --]
[-- Type: text/x-fortran, Size: 846 bytes --]
! { dg-do run }
! { dg-options "-finline-matmul-limit=0" }
! Stress-test the matmul blocking code with sizes close to or
! equal to powers ot two.
program main
implicit none
integer, dimension(*), parameter :: nn = &
& [2,3,4,5, 7,8,9, 15,16,17, 31,32,33, 63,64,65, &
127 ,228,129, 255,256,257];
integer, parameter :: s = size(nn)
real, dimension(:,:),allocatable :: a, b, c
integer :: i1, i2, i3
integer :: nx, ny, count
real :: sm
sm = 0.0
do i1=1, s
nx = nn(i1)
do i2=1,s
ny = nn(i2)
do i3=1,s
count = nn(i3)
allocate (a(nx,ny), b(ny,count), c(nx,count))
call random_number(a)
call random_number(b)
c = matmul(a,b)
sm = sm + sum(c)
deallocate(a,b,c)
end do
end do
end do
end program main
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch, fortran] Reduce stack use in blocked matmul
2017-05-05 20:35 [patch, fortran] Reduce stack use in blocked matmul Thomas Koenig
@ 2017-05-08 17:02 ` Jerry DeLisle
2017-05-08 19:55 ` Thomas Koenig
2017-05-09 12:46 ` Christophe Lyon
2017-05-09 10:49 ` Andreas Schwab
2017-05-10 8:11 ` Andreas Schwab
2 siblings, 2 replies; 12+ messages in thread
From: Jerry DeLisle @ 2017-05-08 17:02 UTC (permalink / raw)
To: Thomas Koenig, fortran, gcc-patches
On 05/05/2017 01:31 PM, Thomas Koenig wrote:
> Hello world,
>
> the attached patch reduces the stack usage by the blocked
> version of matmul for cases where we don't need the full buffer.
> This should improve stack usage.
>
> Regression-tested. I also added a stress test (around 3 secs of
> CPU time on my system), it will only run once due to the "dg-do run"
> hack).
>
> OK for trunk?
>
OK, thanks.
Jerry
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch, fortran] Reduce stack use in blocked matmul
2017-05-08 17:02 ` Jerry DeLisle
@ 2017-05-08 19:55 ` Thomas Koenig
2017-05-08 21:16 ` Jerry DeLisle
2017-05-09 12:46 ` Christophe Lyon
1 sibling, 1 reply; 12+ messages in thread
From: Thomas Koenig @ 2017-05-08 19:55 UTC (permalink / raw)
To: Jerry DeLisle, fortran, gcc-patches
Am 08.05.2017 um 18:58 schrieb Jerry DeLisle:
he attached patch reduces the stack usage by the blocked
>> version of matmul for cases where we don't need the full buffer.
>> This should improve stack usage.
>>
>> OK for trunk?
>>
>
> OK, thanks.
Is this something we should consider for backporting to gcc-7?
I think the large block size caused
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79876 .
That one was fixed by adjusting the ridiculously low
stack size for multi-threaded applications on OSX, but
the underlying problem could still bite somewhere or
somebody else.
Opinions?
Regards
Thomas
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch, fortran] Reduce stack use in blocked matmul
2017-05-08 19:55 ` Thomas Koenig
@ 2017-05-08 21:16 ` Jerry DeLisle
0 siblings, 0 replies; 12+ messages in thread
From: Jerry DeLisle @ 2017-05-08 21:16 UTC (permalink / raw)
To: Thomas Koenig, fortran, gcc-patches
On 05/08/2017 12:29 PM, Thomas Koenig wrote:
> Am 08.05.2017 um 18:58 schrieb Jerry DeLisle:
>
> he attached patch reduces the stack usage by the blocked
>>> version of matmul for cases where we don't need the full buffer.
>>> This should improve stack usage.
>>>
>>> OK for trunk?
>>>
>>
>> OK, thanks.
>
> Is this something we should consider for backporting to gcc-7?
> I think the large block size caused
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79876 .
>
> That one was fixed by adjusting the ridiculously low
> stack size for multi-threaded applications on OSX, but
> the underlying problem could still bite somewhere or
> somebody else.
>
> Opinions?
>
I think it should be back ported since we really changed the matmul and this was
a fallout and using a to much stack might end up being a regression.
OK to back port.
Jerry
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch, fortran] Reduce stack use in blocked matmul
2017-05-08 17:02 ` Jerry DeLisle
2017-05-08 19:55 ` Thomas Koenig
@ 2017-05-09 12:46 ` Christophe Lyon
1 sibling, 0 replies; 12+ messages in thread
From: Christophe Lyon @ 2017-05-09 12:46 UTC (permalink / raw)
To: Jerry DeLisle; +Cc: Thomas Koenig, fortran, gcc-patches
Hi,
On 8 May 2017 at 18:58, Jerry DeLisle <jvdelisle@charter.net> wrote:
> On 05/05/2017 01:31 PM, Thomas Koenig wrote:
>> Hello world,
>>
>> the attached patch reduces the stack usage by the blocked
>> version of matmul for cases where we don't need the full buffer.
>> This should improve stack usage.
>>
>> Regression-tested. I also added a stress test (around 3 secs of
>> CPU time on my system), it will only run once due to the "dg-do run"
>> hack).
>>
>> OK for trunk?
>>
>
> OK, thanks.
>
Since this was committed (r247753), I've noticed the following failures
on arm* targets:
- PASS now FAIL [PASS => FAIL]:
Executed from: gfortran.dg/dg.exp
gfortran.dg/allocatable_function_8.f90 -O0 execution test
gfortran.dg/allocatable_function_8.f90 -O1 execution test
gfortran.dg/allocatable_function_8.f90 -O2 execution test
gfortran.dg/allocatable_function_8.f90 -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions execution
test
gfortran.dg/allocatable_function_8.f90 -O3 -g execution test
gfortran.dg/allocatable_function_8.f90 -Os execution test
gfortran.dg/generic_20.f90 -O0 execution test
gfortran.dg/generic_20.f90 -O1 execution test
gfortran.dg/generic_20.f90 -O2 execution test
gfortran.dg/generic_20.f90 -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions execution
test
gfortran.dg/generic_20.f90 -O3 -g execution test
gfortran.dg/generic_20.f90 -Os execution test
gfortran.dg/matmul_6.f90 -O0 execution test
gfortran.dg/matmul_bounds_6.f90 -O0 execution test
gfortran.dg/matmul_bounds_6.f90 -O1 execution test
gfortran.dg/matmul_bounds_6.f90 -O2 execution test
gfortran.dg/matmul_bounds_6.f90 -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions execution
test
gfortran.dg/matmul_bounds_6.f90 -O3 -g execution test
gfortran.dg/matmul_bounds_6.f90 -Os execution test
gfortran.dg/operator_1.f90 -O0 execution test
Executed from: gfortran.fortran-torture/execute/execute.exp
gfortran.fortran-torture/execute/intrinsic_matmul.f90 execution, -O0
and the new tests fail too:
- FAIL appears [ => FAIL]:
Executed from: gfortran.dg/dg.exp
gfortran.dg/matmul_15.f90 -O execution test
gfortran.dg/matmul_bounds_5.f90 -O0 output pattern test, is
qemu: uncaught target signal 11 (Segmentation fault) - core dumped
Christophe
> Jerry
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch, fortran] Reduce stack use in blocked matmul
2017-05-05 20:35 [patch, fortran] Reduce stack use in blocked matmul Thomas Koenig
2017-05-08 17:02 ` Jerry DeLisle
@ 2017-05-09 10:49 ` Andreas Schwab
2017-05-09 17:23 ` Thomas Koenig
2017-05-10 8:11 ` Andreas Schwab
2 siblings, 1 reply; 12+ messages in thread
From: Andreas Schwab @ 2017-05-09 10:49 UTC (permalink / raw)
To: Thomas Koenig; +Cc: fortran, gcc-patches
On Mai 05 2017, Thomas Koenig <tkoenig@netcologne.de> wrote:
> @@ -227,6 +226,17 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
> if (m == 0 || n == 0 || k == 0)
> return;
>
> + /* Adjust size of t1 to what is needed. */
> + index_type t1_dim;
> + t1_dim = (a_dim1-1) * 256 + b_dim1;
> + if (t1_dim > 65536)
> + t1_dim = 65536;
What happens if (a_dim1-1) * 256 + b_dim1 > 65536?
Andreas.
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch, fortran] Reduce stack use in blocked matmul
2017-05-09 10:49 ` Andreas Schwab
@ 2017-05-09 17:23 ` Thomas Koenig
0 siblings, 0 replies; 12+ messages in thread
From: Thomas Koenig @ 2017-05-09 17:23 UTC (permalink / raw)
To: Andreas Schwab; +Cc: fortran, gcc-patches
Am 09.05.2017 um 12:43 schrieb Andreas Schwab:
> On Mai 05 2017, Thomas Koenig <tkoenig@netcologne.de> wrote:
>
>> @@ -227,6 +226,17 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
>> if (m == 0 || n == 0 || k == 0)
>> return;
>>
>> + /* Adjust size of t1 to what is needed. */
>> + index_type t1_dim;
>> + t1_dim = (a_dim1-1) * 256 + b_dim1;
>> + if (t1_dim > 65536)
>> + t1_dim = 65536;
>
> What happens if (a_dim1-1) * 256 + b_dim1 > 65536?
t1 is an auxiliary variable for blocking. If that
condition is true, blocking starts to happen.
Regards
Thomas
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch, fortran] Reduce stack use in blocked matmul
2017-05-05 20:35 [patch, fortran] Reduce stack use in blocked matmul Thomas Koenig
2017-05-08 17:02 ` Jerry DeLisle
2017-05-09 10:49 ` Andreas Schwab
@ 2017-05-10 8:11 ` Andreas Schwab
2017-05-10 15:48 ` Thomas Koenig
2 siblings, 1 reply; 12+ messages in thread
From: Andreas Schwab @ 2017-05-10 8:11 UTC (permalink / raw)
To: Thomas Koenig; +Cc: fortran, gcc-patches
On Mai 05 2017, Thomas Koenig <tkoenig@netcologne.de> wrote:
> @@ -227,6 +226,17 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
> if (m == 0 || n == 0 || k == 0)
> return;
>
> + /* Adjust size of t1 to what is needed. */
> + index_type t1_dim;
> + t1_dim = (a_dim1-1) * 256 + b_dim1;
> + if (t1_dim > 65536)
> + t1_dim = 65536;
> +
> +#pragma GCC diagnostic push
> +#pragma GCC diagnostic ignored "-Wvla"
> + 'rtype_name` t1[t1_dim]; /* was [256][256] */
That does the wrong thing if b_dim1 == 0xDEADBEEF.
(gdb) p (a_dim1-1) * 256 + b_dim1
$2 = -764456190
Andreas.
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch, fortran] Reduce stack use in blocked matmul
2017-05-10 8:11 ` Andreas Schwab
@ 2017-05-10 15:48 ` Thomas Koenig
2017-05-10 15:56 ` Andreas Schwab
2017-05-10 15:56 ` Thomas Koenig
0 siblings, 2 replies; 12+ messages in thread
From: Thomas Koenig @ 2017-05-10 15:48 UTC (permalink / raw)
To: Andreas Schwab; +Cc: fortran, gcc-patches
Hi Andreas,
>> + index_type t1_dim;
>> + t1_dim = (a_dim1-1) * 256 + b_dim1;
>> + if (t1_dim > 65536)
>> + t1_dim = 65536;
>> +
>> +#pragma GCC diagnostic push
>> +#pragma GCC diagnostic ignored "-Wvla"
>> + 'rtype_name` t1[t1_dim]; /* was [256][256] */
> That does the wrong thing if b_dim1 == 0xDEADBEEF.
>
> (gdb) p (a_dim1-1) * 256 + b_dim1
> $2 = -764456190
A look into the source code shows that b_dim1 is index_type,
which is 32 bits on 32-bit sytems and 64 bits on 64-bit system.
Now, consider if it is possible to declare an array on a 32-bit
system where the number of elements along one direction exceeds 2**31-1
(so sign extension would come into play), or if it would be
possible to declare an array on a 64-bit system where the number of
elements along one direction exceeds 2**63-1.
If you manage to come up with a legal Fortran testcas which
sets b_dim1 to 0xdeadbeef, I owe you a beer :-)
Regards
Thomas
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch, fortran] Reduce stack use in blocked matmul
2017-05-10 15:48 ` Thomas Koenig
@ 2017-05-10 15:56 ` Andreas Schwab
2017-05-10 15:56 ` Thomas Koenig
1 sibling, 0 replies; 12+ messages in thread
From: Andreas Schwab @ 2017-05-10 15:56 UTC (permalink / raw)
To: Thomas Koenig; +Cc: fortran, gcc-patches
On Mai 10 2017, Thomas Koenig <tkoenig@netcologne.de> wrote:
> If you manage to come up with a legal Fortran testcas which
> sets b_dim1 to 0xdeadbeef, I owe you a beer :-)
grep is your friend.
Andreas.
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch, fortran] Reduce stack use in blocked matmul
2017-05-10 15:48 ` Thomas Koenig
2017-05-10 15:56 ` Andreas Schwab
@ 2017-05-10 15:56 ` Thomas Koenig
2017-05-10 16:28 ` Andreas Schwab
1 sibling, 1 reply; 12+ messages in thread
From: Thomas Koenig @ 2017-05-10 15:56 UTC (permalink / raw)
To: Andreas Schwab; +Cc: fortran, gcc-patches
Am 10.05.2017 um 17:42 schrieb Thomas Koenig:
>
> If you manage to come up with a legal Fortran testcas which
> sets b_dim1 to 0xdeadbeef, I owe you a beer :-)
... on a 32-bit system, of course.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [patch, fortran] Reduce stack use in blocked matmul
2017-05-10 15:56 ` Thomas Koenig
@ 2017-05-10 16:28 ` Andreas Schwab
0 siblings, 0 replies; 12+ messages in thread
From: Andreas Schwab @ 2017-05-10 16:28 UTC (permalink / raw)
To: Thomas Koenig; +Cc: fortran, gcc-patches
On Mai 10 2017, Thomas Koenig <tkoenig@netcologne.de> wrote:
> ... on a 32-bit system, of course.
http://gcc.gnu.org/ml/gcc-testresults/2017-05/msg01063.html
FAIL: gfortran.dg/generic_20.f90 -O0 execution test
FAIL: gfortran.dg/generic_20.f90 -O1 execution test
FAIL: gfortran.dg/generic_20.f90 -O2 execution test
FAIL: gfortran.dg/generic_20.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test
FAIL: gfortran.dg/generic_20.f90 -O3 -g execution test
FAIL: gfortran.dg/generic_20.f90 -Os execution test
FAIL: gfortran.dg/matmul_6.f90 -O0 execution test
FAIL: gfortran.dg/matmul_bounds_5.f90 -O0 output pattern test
FAIL: gfortran.dg/matmul_bounds_6.f90 -O0 execution test
FAIL: gfortran.dg/matmul_bounds_6.f90 -O1 execution test
FAIL: gfortran.dg/matmul_bounds_6.f90 -O2 execution test
FAIL: gfortran.dg/matmul_bounds_6.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test
FAIL: gfortran.dg/matmul_bounds_6.f90 -O3 -g execution test
FAIL: gfortran.dg/matmul_bounds_6.f90 -Os execution test
Andreas.
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2017-05-10 16:06 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-05 20:35 [patch, fortran] Reduce stack use in blocked matmul Thomas Koenig
2017-05-08 17:02 ` Jerry DeLisle
2017-05-08 19:55 ` Thomas Koenig
2017-05-08 21:16 ` Jerry DeLisle
2017-05-09 12:46 ` Christophe Lyon
2017-05-09 10:49 ` Andreas Schwab
2017-05-09 17:23 ` Thomas Koenig
2017-05-10 8:11 ` Andreas Schwab
2017-05-10 15:48 ` Thomas Koenig
2017-05-10 15:56 ` Andreas Schwab
2017-05-10 15:56 ` Thomas Koenig
2017-05-10 16:28 ` Andreas Schwab
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).