From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 84096 invoked by alias); 14 Jun 2017 19:42:14 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 83157 invoked by uid 89); 14 Jun 2017 19:42:13 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-10.9 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_2,GIT_PATCH_3,KAM_ASCII_DIVIDERS,RCVD_IN_DNSWL_LOW,SPF_PASS,T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 spammy=Speed, Wrong, n11, relationship X-Spam-User: qpsmtpd, 2 recipients X-HELO: cc-smtpout2.netcologne.de Received: from cc-smtpout2.netcologne.de (HELO cc-smtpout2.netcologne.de) (89.1.8.212) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 14 Jun 2017 19:42:06 +0000 Received: from cc-smtpin3.netcologne.de (cc-smtpin3.netcologne.de [89.1.8.203]) by cc-smtpout2.netcologne.de (Postfix) with ESMTP id E5FB812733; Wed, 14 Jun 2017 21:42:06 +0200 (CEST) Received: from localhost (localhost [127.0.0.1]) by cc-smtpin3.netcologne.de (Postfix) with ESMTP id D833611D6F; Wed, 14 Jun 2017 21:42:06 +0200 (CEST) Received: from [78.35.152.73] (helo=cc-smtpin3.netcologne.de) by localhost with ESMTP (eXpurgate 4.1.9) (envelope-from ) id 5941918e-0242-7f0000012729-7f000001a12a-1 for ; Wed, 14 Jun 2017 21:42:06 +0200 Received: from [192.168.178.20] (xdsl-78-35-152-73.netcologne.de [78.35.152.73]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by cc-smtpin3.netcologne.de (Postfix) with ESMTPSA; Wed, 14 Jun 2017 21:42:02 +0200 (CEST) To: "fortran@gcc.gnu.org" , gcc-patches From: Thomas Koenig Subject: [patch, libfortran] Speed up cshift for dim > 1 Message-ID: <2c24688d-22e5-efde-4429-1eab756360a5@netcologne.de> Date: Wed, 14 Jun 2017 19:42:00 -0000 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.1.1 MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------AD19AA950E95714EAEB28E45" X-SW-Source: 2017-06/txt/msg01083.txt.bz2 This is a multi-part message in MIME format. --------------AD19AA950E95714EAEB28E45 Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-length: 2552 Hello world, the attached patch implements a blocked algorithm for improving the speed of cshift for dim > 1. It uses the fact that integer, dimension (n1,n2,n3) :: a, b b = cshift(a,shift,3) is identical, as far as the memory locations is concerned. integer, dimension (n1*n2*n3) :: c, d d = cshift(c, shift*n1*n2, 1) The speedup is quite large; from being really slow for dim > 1, this patch makes it go even faster. Below there are some comparisons for the attached benchmark, do-1.f90. gfortran-7 uses the old library version. Interestingly, the library version is also much faster than an implementation of straight DO loops. Regression-tested. OK for trunk? Regards Thomas $ gfortran-7 -static-libgfortran -O3 do-1.f90 && ./a.out Testing explicit DO loops Dim = 1 Elapsed CPU time = 5.71363878 Dim = 2 Elapsed CPU time = 5.40494061 Dim = 3 Elapsed CPU time = 5.40769291 Testing built-in cshift Dim = 1 Elapsed CPU time = 3.43479729 Dim = 2 Elapsed CPU time = 11.7110386 Dim = 3 Elapsed CPU time = 31.0966301 $ gfortran -static-libgfortran -O3 do-1.f90 && ./a.out Testing explicit DO loops Dim = 1 Elapsed CPU time = 5.73881340 Dim = 2 Elapsed CPU time = 5.38435745 Dim = 3 Elapsed CPU time = 5.38971329 Testing built-in cshift Dim = 1 Elapsed CPU time = 3.42018127 Dim = 2 Elapsed CPU time = 2.24075317 Dim = 3 Elapsed CPU time = 2.23136330 2017-06-14 Thomas Koenig PR fortran/52473 * m4/cshift0.m4: For arrays that are contiguous up to shift, implement blocked algorighm for cshift. * generated/cshift0_c10.c: Regenerated. * generated/cshift0_c16.c: Regenerated. * generated/cshift0_c4.c: Regenerated. * generated/cshift0_c8.c: Regenerated. * generated/cshift0_i1.c: Regenerated. * generated/cshift0_i16.c: Regenerated. * generated/cshift0_i2.c: Regenerated. * generated/cshift0_i4.c: Regenerated. * generated/cshift0_i8.c: Regenerated. * generated/cshift0_r10.c: Regenerated. * generated/cshift0_r16.c: Regenerated. * generated/cshift0_r4.c: Regenerated. * generated/cshift0_r8.c: Regenerated. 2017-06-14 Thomas Koenig PR fortran/52473 * gfortran.dg/cshift_1.f90: New test. --------------AD19AA950E95714EAEB28E45 Content-Type: text/x-patch; name="p6.diff" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="p6.diff" Content-length: 48036 Index: m4/cshift0.m4 =================================================================== --- m4/cshift0.m4 (Revision 249104) +++ m4/cshift0.m4 (Arbeitskopie) @@ -52,6 +52,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -64,33 +67,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_c10.c =================================================================== --- generated/cshift0_c10.c (Revision 249104) +++ generated/cshift0_c10.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_c16.c =================================================================== --- generated/cshift0_c16.c (Revision 249104) +++ generated/cshift0_c16.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_c4.c =================================================================== --- generated/cshift0_c4.c (Revision 249104) +++ generated/cshift0_c4.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_c8.c =================================================================== --- generated/cshift0_c8.c (Revision 249104) +++ generated/cshift0_c8.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_i1.c =================================================================== --- generated/cshift0_i1.c (Revision 249104) +++ generated/cshift0_i1.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_i16.c =================================================================== --- generated/cshift0_i16.c (Revision 249104) +++ generated/cshift0_i16.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_i2.c =================================================================== --- generated/cshift0_i2.c (Revision 249104) +++ generated/cshift0_i2.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_i4.c =================================================================== --- generated/cshift0_i4.c (Revision 249104) +++ generated/cshift0_i4.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_i8.c =================================================================== --- generated/cshift0_i8.c (Revision 249104) +++ generated/cshift0_i8.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_r10.c =================================================================== --- generated/cshift0_r10.c (Revision 249104) +++ generated/cshift0_r10.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_r16.c =================================================================== --- generated/cshift0_r16.c (Revision 249104) +++ generated/cshift0_r16.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_r4.c =================================================================== --- generated/cshift0_r4.c (Revision 249104) +++ generated/cshift0_r4.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; Index: generated/cshift0_r8.c =================================================================== --- generated/cshift0_r8.c (Revision 249104) +++ generated/cshift0_r8.c (Arbeitskopie) @@ -51,6 +51,9 @@ index_type len; index_type n; + bool do_blocked; + index_type r_ex, a_ex; + which = which - 1; sstride[0] = 0; rstride[0] = 0; @@ -63,33 +66,99 @@ soffset = 1; len = 0; - for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + r_ex = 1; + a_ex = 1; + + if (which > 0) { - if (dim == which) - { - roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); - if (roffset == 0) - roffset = 1; - soffset = GFC_DESCRIPTOR_STRIDE(array,dim); - if (soffset == 0) - soffset = 1; - len = GFC_DESCRIPTOR_EXTENT(array,dim); - } - else - { - count[n] = 0; - extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); - rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); - sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); - n++; - } + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } } - if (sstride[0] == 0) - sstride[0] = 1; - if (rstride[0] == 0) - rstride[0] = 1; + else + do_blocked = false; - dim = GFC_DESCRIPTOR_RANK (array); + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + rstride0 = rstride[0]; sstride0 = sstride[0]; rptr = ret->base_addr; --------------AD19AA950E95714EAEB28E45 Content-Type: text/x-fortran; name="cshift_1.f90" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="cshift_1.f90" Content-length: 3148 ! { dg-do run } ! Take cshift through its paces to make sure no boundary ! cases are wrong with the optimized cshift. module kinds integer, parameter :: sp = selected_real_kind(6) ! Single precision end module kinds module replacements use kinds contains subroutine cshift_sp_3_v1 (array, shift, dim, res) integer, parameter :: wp = sp real(kind=wp), dimension(:,:,:), intent(in) :: array integer, intent(in) :: shift, dim real(kind=wp), dimension(:,:,:), intent(out) :: res integer :: i,j,k integer :: sh, rsh integer :: n integer :: n2, n3 res = 0 n3 = size(array,3) n2 = size(array,2) n1 = size(array,1) if (dim == 1) then n = n1 sh = modulo(shift, n) rsh = n - sh do k=1, n3 do j=1, n2 do i=1, rsh res(i,j,k) = array(i+sh,j,k) end do do i=rsh+1,n res(i,j,k) = array(i-rsh,j,k) end do end do end do else if (dim == 2) then n = n2 sh = modulo(shift,n) rsh = n - sh do k=1, n3 do j=1, rsh do i=1, n1 res(i,j,k) = array(i,j+sh, k) end do end do do j=rsh+1, n do i=1, n1 res(i,j,k) = array(i,j-rsh, k) end do end do end do else if (dim == 3) then n = n3 sh = modulo(shift, n) rsh = n - sh do k=1, rsh do j=1, n2 do i=1, n1 res(i,j,k) = array(i, j, k+sh) end do end do end do do k=rsh+1, n do j=1, n2 do i=1, n1 res(i,j, k) = array(i, j, k-rsh) end do end do end do else stop "Wrong argument to dim" end if end subroutine cshift_sp_3_v1 end module replacements program testme use kinds use replacements implicit none integer, parameter :: wp = sp ! Working precision INTEGER, PARAMETER :: n = 7 real(kind=wp), dimension(:,:,:), allocatable :: a,b,c integer i, j, k real:: t1, t2 integer, parameter :: nrep = 20 allocate (a(n,n,n), b(n,n,n),c(n,n,n)) call random_number(a) do k = 1,3 do i=-3,3,2 call cshift_sp_3_v1 (a, i, k, b) c = cshift(a,i,k) if (any (c /= b)) call abort end do end do deallocate (b,c) allocate (b(n-1,n-1,n-1),c(n-1,n-1,n-1)) do k=1,3 do i=-3,3,2 call cshift_sp_3_v1 (a(1:n-1,1:n-1,1:n-1), i, k, b) c = cshift(a(1:n-1,1:n-1,1:n-1), i, k) if (any (c /= b)) call abort end do end do deallocate (b,c) allocate (b(n,n-1,n-1), c(n,n-1,n-1)) do k=1,3 do i=-3,3,2 call cshift_sp_3_v1 (a(:,2:n,2:n), i, k, b) c = cshift (a(:,2:n,2:n), i, k) if (any (c /= b)) call abort end do end do deallocate (b,c) allocate (b(n,n,n-1), c(n,n,n-1)) do k=1,3 do i=-3,3,2 call cshift_sp_3_v1 (a(:,:,2:n), i, k, b) c = cshift (a(:,:,2:n), i, k) if (any (c /= b)) call abort end do end do end program testme --------------AD19AA950E95714EAEB28E45 Content-Type: text/x-fortran; name="do-1.f90" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="do-1.f90" Content-length: 2681 module kinds integer, parameter :: sp = selected_real_kind(6) ! Single precision integer, parameter :: dp = selected_real_kind(15) ! Double precision end module kinds module replacements use kinds contains subroutine cshift_sp_3_v1 (array, shift, dim, res) integer, parameter :: wp = sp real(kind=wp), dimension(:,:,:), intent(in), contiguous :: array integer, intent(in) :: shift, dim real(kind=wp), dimension(:,:,:), intent(out), contiguous :: res integer :: i,j,k integer :: sh, rsh integer :: n res = 0 if (dim == 1) then n = size(array,1) sh = modulo(shift, n) rsh = n - sh do k=1, size(array,3) do j=1, size(array,2) do i=1, rsh res(i,j,k) = array(i+sh,j,k) end do do i=rsh+1,n res(i,j,k) = array(i-rsh,j,k) end do end do end do else if (dim == 2) then n = size(array,2) sh = modulo(shift,n) rsh = n - sh do k=1, size(array,3) do j=1, rsh do i=1, size(array,1) res(i,j,k) = array(i,j+sh, k) end do end do do j=rsh+1, n do i=1, size(array,1) res(i,j,k) = array(i,j-rsh, k) end do end do end do else if (dim == 3) then n = size(array,3) sh = modulo(shift, n) rsh = n - sh do k=1, rsh do j=1, size(array,2) do i=1, size(array,1) res(i,j,k) = array(i, j, k+sh) end do end do end do do k=rsh+1, n do j=1, size(array,2) do i=1, size(array,1) res(i,j, k) = array(i, j, k-rsh) end do end do end do else stop "Wrong argument to dim" end if end subroutine cshift_sp_3_v1 end module replacements program testme use kinds use replacements implicit none integer, parameter :: wp = sp ! Working precision INTEGER, PARAMETER :: n = 500 real(kind=wp) :: a(n,n,n), b(n,n,n) integer i, j, k real t1, t2 print *,"Testing explicit DO loops" call random_number(a) do k = 1,3 call cpu_time ( t1 ) do j = 1, 10 call cshift_sp_3_v1 (a, 1, k, b) end do call cpu_time ( t2 ) write ( *, * ) 'Dim = ', k, ' Elapsed CPU time = ', t2-t1 end do print *,"Testing built-in cshift" do k = 1,3 call cpu_time ( t1 ) do j = 1, 10 b = cshift(a,1,k) end do call cpu_time ( t2 ) write ( *, * ) 'Dim = ', k, ' Elapsed CPU time = ', t2-t1 end do end program testme --------------AD19AA950E95714EAEB28E45--