Index: m4/matmul_internal.m4 =================================================================== --- m4/matmul_internal.m4 (Revision 259152) +++ m4/matmul_internal.m4 (Arbeitskopie) @@ -234,7 +234,7 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_c10.c =================================================================== --- generated/matmul_c10.c (Revision 259152) +++ generated/matmul_c10.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_c10_avx (gfc_array_c10 * const restrict ret /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict re /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_c10 (gfc_array_c10 * const restrict retarra /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_c16.c =================================================================== --- generated/matmul_c16.c (Revision 259152) +++ generated/matmul_c16.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_c16_avx (gfc_array_c16 * const restrict ret /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict re /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_c16 (gfc_array_c16 * const restrict retarra /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_c4.c =================================================================== --- generated/matmul_c4.c (Revision 259152) +++ generated/matmul_c4.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_c4_avx (gfc_array_c4 * const restrict retar /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict reta /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_c4 (gfc_array_c4 * const restrict retarray, /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_c8.c =================================================================== --- generated/matmul_c8.c (Revision 259152) +++ generated/matmul_c8.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_c8_avx (gfc_array_c8 * const restrict retar /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict reta /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_c8 (gfc_array_c8 * const restrict retarray, /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_i1.c =================================================================== --- generated/matmul_i1.c (Revision 259152) +++ generated/matmul_i1.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retar /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict reta /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_i16.c =================================================================== --- generated/matmul_i16.c (Revision 259152) +++ generated/matmul_i16.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict ret /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict re /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarra /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_i2.c =================================================================== --- generated/matmul_i2.c (Revision 259152) +++ generated/matmul_i2.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retar /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict reta /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_i4.c =================================================================== --- generated/matmul_i4.c (Revision 259152) +++ generated/matmul_i4.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retar /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict reta /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_i8.c =================================================================== --- generated/matmul_i8.c (Revision 259152) +++ generated/matmul_i8.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retar /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict reta /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_r10.c =================================================================== --- generated/matmul_r10.c (Revision 259152) +++ generated/matmul_r10.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_r10_avx (gfc_array_r10 * const restrict ret /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict re /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_r10 (gfc_array_r10 * const restrict retarra /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_r16.c =================================================================== --- generated/matmul_r16.c (Revision 259152) +++ generated/matmul_r16.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_r16_avx (gfc_array_r16 * const restrict ret /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict re /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_r16 (gfc_array_r16 * const restrict retarra /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_r4.c =================================================================== --- generated/matmul_r4.c (Revision 259152) +++ generated/matmul_r4.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_r4_avx (gfc_array_r4 * const restrict retar /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict reta /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_r4 (gfc_array_r4 * const restrict retarray, /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_r8.c =================================================================== --- generated/matmul_r8.c (Revision 259152) +++ generated/matmul_r8.c (Arbeitskopie) @@ -318,7 +318,7 @@ matmul_r8_avx (gfc_array_r8 * const restrict retar /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -870,7 +870,7 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict reta /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1422,7 +1422,7 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1988,7 +1988,7 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict r /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2614,7 +2614,7 @@ matmul_r8 (gfc_array_r8 * const restrict retarray, /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_c10.c =================================================================== --- generated/matmulavx128_c10.c (Revision 259152) +++ generated/matmulavx128_c10.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const rest /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const rest /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_c16.c =================================================================== --- generated/matmulavx128_c16.c (Revision 259152) +++ generated/matmulavx128_c16.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const rest /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const rest /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_c4.c =================================================================== --- generated/matmulavx128_c4.c (Revision 259152) +++ generated/matmulavx128_c4.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_c8.c =================================================================== --- generated/matmulavx128_c8.c (Revision 259152) +++ generated/matmulavx128_c8.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_i1.c =================================================================== --- generated/matmulavx128_i1.c (Revision 259152) +++ generated/matmulavx128_i1.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_i16.c =================================================================== --- generated/matmulavx128_i16.c (Revision 259152) +++ generated/matmulavx128_i16.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const rest /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const rest /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_i2.c =================================================================== --- generated/matmulavx128_i2.c (Revision 259152) +++ generated/matmulavx128_i2.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_i4.c =================================================================== --- generated/matmulavx128_i4.c (Revision 259152) +++ generated/matmulavx128_i4.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_i8.c =================================================================== --- generated/matmulavx128_i8.c (Revision 259152) +++ generated/matmulavx128_i8.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_r10.c =================================================================== --- generated/matmulavx128_r10.c (Revision 259152) +++ generated/matmulavx128_r10.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const rest /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const rest /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_r16.c =================================================================== --- generated/matmulavx128_r16.c (Revision 259152) +++ generated/matmulavx128_r16.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const rest /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const rest /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_r4.c =================================================================== --- generated/matmulavx128_r4.c (Revision 259152) +++ generated/matmulavx128_r4.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_r8.c =================================================================== --- generated/matmulavx128_r8.c (Revision 259152) +++ generated/matmulavx128_r8.c (Arbeitskopie) @@ -283,7 +283,7 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -836,7 +836,7 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restri /* Adjust size of t1 to what is needed. */ index_type t1_dim; - t1_dim = (a_dim1-1) * 256 + b_dim1; + t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536;