public inbox for fortran@gcc.gnu.org
 help / color / mirror / Atom feed
* [patch, libfortran] correct buffer size in library matmul
@ 2018-08-25 19:38 Thomas Koenig
  2018-08-25 21:23 ` Jerry DeLisle
  0 siblings, 1 reply; 2+ messages in thread
From: Thomas Koenig @ 2018-08-25 19:38 UTC (permalink / raw)
  To: fortran, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1555 bytes --]

Hello world,

the attached patch fixes a regression where the calculation of the size
of the buffer for matmul was too small for ine special case.

Regression-tested. OK for trunk?

Regards

	Thomas

2018-08-25  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR libfortran/86704
	* m4/matmul_internal.m4: Correct calculation of needed buffer size
	for arrays of shape (1,n).
	* generated/matmul_c10.c: Regenerated
	* generated/matmul_c16.c: Regenerated
	* generated/matmul_c4.c: Regenerated
	* generated/matmul_c8.c: Regenerated
	* generated/matmul_i1.c: Regenerated
	* generated/matmul_i16.c: Regenerated
	* generated/matmul_i2.c: Regenerated
	* generated/matmul_i4.c: Regenerated
	* generated/matmul_i8.c: Regenerated
	* generated/matmul_r10.c: Regenerated
	* generated/matmul_r16.c: Regenerated
	* generated/matmul_r4.c: Regenerated
	* generated/matmul_r8.c: Regenerated
	* generated/matmulavx128_c10.c: Regenerated
	* generated/matmulavx128_c16.c: Regenerated
	* generated/matmulavx128_c4.c: Regenerated
	* generated/matmulavx128_c8.c: Regenerated
	* generated/matmulavx128_i1.c: Regenerated
	* generated/matmulavx128_i16.c: Regenerated
	* generated/matmulavx128_i2.c: Regenerated
	* generated/matmulavx128_i4.c: Regenerated
	* generated/matmulavx128_i8.c: Regenerated
	* generated/matmulavx128_r10.c: Regenerated
	* generated/matmulavx128_r16.c: Regenerated
	* generated/matmulavx128_r4.c: Regenerated
	* generated/matmulavx128_r8.c: Regenerated

2018-08-25  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR libfortran/86704
	* gfortran.dg/matmul_19.f90: New test.

[-- Attachment #2: p2.diff --]
[-- Type: text/x-patch, Size: 44163 bytes --]

Index: m4/matmul_internal.m4
===================================================================
--- m4/matmul_internal.m4	(Revision 263752)
+++ m4/matmul_internal.m4	(Arbeitskopie)
@@ -233,8 +233,13 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_c10.c
===================================================================
--- generated/matmul_c10.c	(Revision 263752)
+++ generated/matmul_c10.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_c10_avx (gfc_array_c10 * const restrict ret
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict re
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_c10 (gfc_array_c10 * const restrict retarra
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_c16.c
===================================================================
--- generated/matmul_c16.c	(Revision 263752)
+++ generated/matmul_c16.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_c16_avx (gfc_array_c16 * const restrict ret
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict re
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_c16 (gfc_array_c16 * const restrict retarra
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_c4.c
===================================================================
--- generated/matmul_c4.c	(Revision 263752)
+++ generated/matmul_c4.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_c4_avx (gfc_array_c4 * const restrict retar
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict reta
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_c8.c
===================================================================
--- generated/matmul_c8.c	(Revision 263752)
+++ generated/matmul_c8.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_c8_avx (gfc_array_c8 * const restrict retar
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict reta
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_i1.c
===================================================================
--- generated/matmul_i1.c	(Revision 263752)
+++ generated/matmul_i1.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_i1_avx (gfc_array_i1 * const restrict retar
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict reta
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_i16.c
===================================================================
--- generated/matmul_i16.c	(Revision 263752)
+++ generated/matmul_i16.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_i16_avx (gfc_array_i16 * const restrict ret
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict re
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_i16 (gfc_array_i16 * const restrict retarra
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_i2.c
===================================================================
--- generated/matmul_i2.c	(Revision 263752)
+++ generated/matmul_i2.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_i2_avx (gfc_array_i2 * const restrict retar
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict reta
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_i4.c
===================================================================
--- generated/matmul_i4.c	(Revision 263752)
+++ generated/matmul_i4.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_i4_avx (gfc_array_i4 * const restrict retar
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict reta
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_i8.c
===================================================================
--- generated/matmul_i8.c	(Revision 263752)
+++ generated/matmul_i8.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_i8_avx (gfc_array_i8 * const restrict retar
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict reta
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_r10.c
===================================================================
--- generated/matmul_r10.c	(Revision 263752)
+++ generated/matmul_r10.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_r10_avx (gfc_array_r10 * const restrict ret
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict re
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_r10 (gfc_array_r10 * const restrict retarra
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_r16.c
===================================================================
--- generated/matmul_r16.c	(Revision 263752)
+++ generated/matmul_r16.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_r16_avx (gfc_array_r16 * const restrict ret
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict re
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_r16 (gfc_array_r16 * const restrict retarra
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_r4.c
===================================================================
--- generated/matmul_r4.c	(Revision 263752)
+++ generated/matmul_r4.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_r4_avx (gfc_array_r4 * const restrict retar
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict reta
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_r8.c
===================================================================
--- generated/matmul_r8.c	(Revision 263752)
+++ generated/matmul_r8.c	(Arbeitskopie)
@@ -317,8 +317,13 @@ matmul_r8_avx (gfc_array_r8 * const restrict retar
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -869,8 +874,13 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict reta
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1421,8 +1431,13 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1987,8 +2002,13 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict r
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2613,8 +2633,13 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_c10.c
===================================================================
--- generated/matmulavx128_c10.c	(Revision 263752)
+++ generated/matmulavx128_c10.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const rest
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const rest
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_c16.c
===================================================================
--- generated/matmulavx128_c16.c	(Revision 263752)
+++ generated/matmulavx128_c16.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const rest
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const rest
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_c4.c
===================================================================
--- generated/matmulavx128_c4.c	(Revision 263752)
+++ generated/matmulavx128_c4.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_c8.c
===================================================================
--- generated/matmulavx128_c8.c	(Revision 263752)
+++ generated/matmulavx128_c8.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_i1.c
===================================================================
--- generated/matmulavx128_i1.c	(Revision 263752)
+++ generated/matmulavx128_i1.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_i16.c
===================================================================
--- generated/matmulavx128_i16.c	(Revision 263752)
+++ generated/matmulavx128_i16.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const rest
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const rest
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_i2.c
===================================================================
--- generated/matmulavx128_i2.c	(Revision 263752)
+++ generated/matmulavx128_i2.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_i4.c
===================================================================
--- generated/matmulavx128_i4.c	(Revision 263752)
+++ generated/matmulavx128_i4.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_i8.c
===================================================================
--- generated/matmulavx128_i8.c	(Revision 263752)
+++ generated/matmulavx128_i8.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_r10.c
===================================================================
--- generated/matmulavx128_r10.c	(Revision 263752)
+++ generated/matmulavx128_r10.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const rest
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const rest
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_r16.c
===================================================================
--- generated/matmulavx128_r16.c	(Revision 263752)
+++ generated/matmulavx128_r16.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const rest
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const rest
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_r4.c
===================================================================
--- generated/matmulavx128_r4.c	(Revision 263752)
+++ generated/matmulavx128_r4.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmulavx128_r8.c
===================================================================
--- generated/matmulavx128_r8.c	(Revision 263752)
+++ generated/matmulavx128_r8.c	(Arbeitskopie)
@@ -282,8 +282,13 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -835,8 +840,13 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restri
 	return;
 
       /* Adjust size of t1 to what is needed.  */
-      index_type t1_dim;
-      t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+      index_type t1_dim, a_sz;
+      if (aystride == 1)
+        a_sz = rystride;
+      else
+        a_sz = a_dim1;
+
+      t1_dim = a_sz * 256 + b_dim1;
       if (t1_dim > 65536)
 	t1_dim = 65536;
 

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: matmul_19.f90 --]
[-- Type: text/x-fortran; name="matmul_19.f90", Size: 566 bytes --]

! { dg-do  run }
! { dg-options "-finline-matmul-limit=0" }
! PR 86704 - this used to segfault. Test case by Stanislav Paláček.

program testmaticovenasobeni
implicit none
  
  character(len=10) :: line
  write (unit=line,fmt=*) testmatmul(120,1,3)

  contains

   function testmatmul(m,n,o)
     integer, intent(in) :: m,n,o
     real    :: A(n,m),B(n,o),C(m,o)
     logical :: testmatmul
    
     call random_number(A)
     call random_number(B)
     
     C=matmul(transpose(A),B) 
     testmatmul=.true.
   end function 

end program testmaticovenasobeni

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [patch, libfortran] correct buffer size in library matmul
  2018-08-25 19:38 [patch, libfortran] correct buffer size in library matmul Thomas Koenig
@ 2018-08-25 21:23 ` Jerry DeLisle
  0 siblings, 0 replies; 2+ messages in thread
From: Jerry DeLisle @ 2018-08-25 21:23 UTC (permalink / raw)
  To: Thomas Koenig, fortran, gcc-patches

On 08/25/2018 12:38 PM, Thomas Koenig wrote:
> Hello world,
> 
> the attached patch fixes a regression where the calculation of the size
> of the buffer for matmul was too small for ine special case.
> 
> Regression-tested. OK for trunk?
> 
> Regards
> 
>      Thomas

OK Thomas and thanks for the fix.

Jerry

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2018-08-25 21:23 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-25 19:38 [patch, libfortran] correct buffer size in library matmul Thomas Koenig
2018-08-25 21:23 ` Jerry DeLisle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).