* [patch, fortran] Implement blocked eoshift for eoshift0
@ 2017-07-01 13:48 Thomas Koenig
2017-07-02 11:53 ` Paul Richard Thomas
0 siblings, 1 reply; 2+ messages in thread
From: Thomas Koenig @ 2017-07-01 13:48 UTC (permalink / raw)
To: fortran, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 940 bytes --]
Hello world,
the attached patch implements the blocked algorithm for
constant shift for dim > 1 for eoshift0 (which handles
the case of constant shift and constant fill value).
Speedup, as for cshift, is large. Moving a 500*500*500
array by -3 with eo_bench.f90 (also attached):
$ gfortran -O3 eo_bench.f90 && ./a.out
dim = 1 t = 0.451796889
dim = 2 t = 0.183514118
dim = 3 t = 0.184015989
$ gfortran-7 -static-libgfortran -O3 eo_bench.f90 && ./a.out
dim = 1 t = 0.955736041
dim = 2 t = 1.42228103
dim = 3 t = 3.00043702
Regression-tested. OK for trunk?
Regards
Thomas
2017-07-01 Thomas Koenig <tkoenig@gcc.gnu.org>
* intrinsics/eoshift0.c: For contiguous arrays, use
block algorithm. Use memcpy where possible.
2017-07-01 Thomas Koenig <tkoenig@gcc.gnu.org>
* gfortran/eoshift_3.f90: New test.
[-- Attachment #2: eoshift_3.f90 --]
[-- Type: text/x-fortran, Size: 4311 bytes --]
! { dg-do run }
! Check that eoshift works for three-dimensional arrays.
module x
implicit none
contains
subroutine eoshift_0 (array, shift, boundary, dim, res)
real, dimension(:,:,:), intent(in) :: array
real, dimension(:,:,:), intent(out) :: res
integer, value :: shift
real, optional, intent(in) :: boundary
integer, optional, intent(in) :: dim
integer :: s1, s2, s3
integer :: n1, n2, n3
real :: b
integer :: d
if (present(boundary)) then
b = boundary
else
b = 0.0
end if
if (present(dim)) then
d = dim
else
d = 1
end if
n1 = size(array,1)
n2 = size(array,2)
n3 = size(array,3)
select case(dim)
case(1)
if (shift > 0) then
shift = min(shift, n1)
do s3=1,n3
do s2=1,n2
do s1= 1, n1 - shift
res(s1,s2,s3) = array(s1+shift,s2,s3)
end do
do s1 = n1 - shift + 1,n1
res(s1,s2,s3) = b
end do
end do
end do
else
shift = max(shift, -n1)
do s3=1,n3
do s2=1,n2
do s1=1,-shift
res(s1,s2,s3) = b
end do
do s1= 1-shift,n1
res(s1,s2,s3) = array(s1+shift,s2,s3)
end do
end do
end do
end if
case(2)
if (shift > 0) then
shift = min(shift, n2)
do s3=1,n3
do s2=1, n2 - shift
do s1=1,n1
res(s1,s2,s3) = array(s1,s2+shift,s3)
end do
end do
do s2=n2 - shift + 1, n2
do s1=1,n1
res(s1,s2,s3) = b
end do
end do
end do
else
shift = max(shift, -n2)
do s3=1,n3
do s2=1,-shift
do s1=1,n1
res(s1,s2,s3) = b
end do
end do
do s2=1-shift,n2
do s1=1,n1
res(s1,s2,s3) = array(s1,s2+shift,s3)
end do
end do
end do
end if
case(3)
if (shift > 0) then
shift = min(shift, n3)
do s3=1,n3 - shift
do s2=1, n2
do s1=1,n1
res(s1,s2,s3) = array(s1,s2,s3+shift)
end do
end do
end do
do s3=n3 - shift + 1, n3
do s2=1, n2
do s1=1,n1
res(s1,s2,s3) = b
end do
end do
end do
else
shift = max(shift, -n3)
do s3=1,-shift
do s2=1,n2
do s1=1,n1
res(s1,s2,s3) = b
end do
end do
end do
do s3=1-shift,n3
do s2=1,n2
do s1=1,n1
res(s1,s2,s3) = array(s1,s2,s3+shift)
end do
end do
end do
end if
case default
stop "Illegal dim"
end select
end subroutine eoshift_0
end module x
program main
use x
implicit none
integer, parameter :: n1=2,n2=4,n3=2
real, dimension(n1,n2,n3) :: a,b,c
integer :: dim, shift, shift_lim
call random_number(a)
do dim=1,3
if (dim == 1) then
shift_lim = n1 + 1
else if (dim == 2) then
shift_lim = n2 + 1
else
shift_lim = n3 + 1
end if
do shift=-shift_lim, shift_lim
b = eoshift(a,shift,dim=dim)
call eoshift_0 (a, shift=shift, dim=dim, res=c)
if (any (b /= c)) then
print *,"dim = ", dim, "shift = ", shift
call abort
end if
end do
end do
call random_number(b)
c = b
do dim=1,3
if (dim == 1) then
shift_lim = n1/2 + 1
else if (dim == 2) then
shift_lim = n2/2 + 1
else
shift_lim = n3/2 + 1
end if
do shift=-shift_lim, shift_lim
b(1:n1:2,:,:) = eoshift(a(1:n1/2,:,:),shift,dim=dim)
call eoshift_0 (a(1:n1/2,:,:), shift=shift, dim=dim, res=c(1:n1:2,:,:))
if (any (b /= c)) call abort
end do
end do
end program main
[-- Attachment #3: p2.diff --]
[-- Type: text/x-patch, Size: 4493 bytes --]
Index: intrinsics/eoshift0.c
===================================================================
--- intrinsics/eoshift0.c (Revision 249632)
+++ intrinsics/eoshift0.c (Arbeitskopie)
@@ -53,7 +53,8 @@
index_type len;
index_type n;
index_type arraysize;
-
+ bool do_blocked;
+
/* The compiler cannot figure out that these are set, initialize
them to avoid warnings. */
len = 0;
@@ -102,39 +103,94 @@
count[0] = 0;
sstride[0] = -1;
rstride[0] = -1;
+
+ if (which > 0)
+ {
+ /* Test if both ret and array are contiguous. */
+ size_t r_ex, a_ex;
+ r_ex = 1;
+ a_ex = 1;
+ do_blocked = true;
+ dim = GFC_DESCRIPTOR_RANK (array);
+ for (n = 0; n < dim; n ++)
+ {
+ index_type rs, as;
+ rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+ if (rs != r_ex)
+ {
+ do_blocked = false;
+ break;
+ }
+ as = GFC_DESCRIPTOR_STRIDE (array, n);
+ if (as != a_ex)
+ {
+ do_blocked = false;
+ break;
+ }
+ r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+ a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+ }
+ }
+ else
+ do_blocked = false;
+
n = 0;
- for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+
+ if (do_blocked)
{
- if (dim == which)
- {
- roffset = GFC_DESCRIPTOR_STRIDE_BYTES(ret,dim);
- if (roffset == 0)
- roffset = size;
- soffset = GFC_DESCRIPTOR_STRIDE_BYTES(array,dim);
- if (soffset == 0)
- soffset = size;
- len = GFC_DESCRIPTOR_EXTENT(array,dim);
- }
- else
- {
- count[n] = 0;
- extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
- rstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(ret,dim);
- sstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(array,dim);
- n++;
- }
+ /* For contiguous arrays, use the relationship that
+
+ dimension(n1,n2,n3) :: a, b
+ b = eoshift(a,sh,3)
+
+ can be dealt with as if
+
+ dimension(n1*n2*n3) :: an, bn
+ bn = eoshift(a,sh*n1*n2,1)
+
+ so a block move can be used for dim>1. */
+ len = GFC_DESCRIPTOR_STRIDE(array, which)
+ * GFC_DESCRIPTOR_EXTENT(array, which);
+ shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+ roffset = size;
+ soffset = size;
+ for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+ {
+ count[n] = 0;
+ extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+ rstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(ret,dim);
+ sstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(array,dim);
+ n++;
+ }
+ count[n] = 0;
+ dim = GFC_DESCRIPTOR_RANK (array) - which;
}
- if (sstride[0] == 0)
- sstride[0] = size;
- if (rstride[0] == 0)
- rstride[0] = size;
+ else
+ {
+ for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+ {
+ if (dim == which)
+ {
+ roffset = GFC_DESCRIPTOR_STRIDE_BYTES(ret,dim);
+ if (roffset == 0)
+ roffset = size;
+ soffset = GFC_DESCRIPTOR_STRIDE_BYTES(array,dim);
+ if (soffset == 0)
+ soffset = size;
+ len = GFC_DESCRIPTOR_EXTENT(array,dim);
+ }
+ else
+ {
+ count[n] = 0;
+ extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+ rstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(ret,dim);
+ sstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(array,dim);
+ n++;
+ }
+ }
+ dim = GFC_DESCRIPTOR_RANK (array);
+ }
- dim = GFC_DESCRIPTOR_RANK (array);
- rstride0 = rstride[0];
- sstride0 = sstride[0];
- rptr = ret->base_addr;
- sptr = array->base_addr;
-
if ((shift >= 0 ? shift : -shift) > len)
{
shift = len;
@@ -148,6 +204,11 @@
len = len + shift;
}
+ rstride0 = rstride[0];
+ sstride0 = sstride[0];
+ rptr = ret->base_addr;
+ sptr = array->base_addr;
+
while (rptr)
{
/* Do the shift for this dimension. */
@@ -161,12 +222,23 @@
src = sptr;
dest = &rptr[-shift * roffset];
}
- for (n = 0; n < len; n++)
- {
- memcpy (dest, src, size);
- dest += roffset;
- src += soffset;
- }
+ /* If the elements are contiguous, perform a single block move. */
+
+ if (soffset == size && roffset == size)
+ {
+ size_t chunk = size * len;
+ memcpy (dest, src, chunk);
+ dest += chunk;
+ }
+ else
+ {
+ for (n = 0; n < len; n++)
+ {
+ memcpy (dest, src, size);
+ dest += roffset;
+ src += soffset;
+ }
+ }
if (shift >= 0)
{
n = shift;
[-- Attachment #4: eo_bench.f90 --]
[-- Type: text/x-fortran, Size: 314 bytes --]
program main
implicit none
integer, parameter :: n=500
real, dimension(n,n,n) :: a, c
real :: t1, t2
integer :: dim
call random_number(a)
do dim=1,3
call cpu_time(t1)
c = eoshift(a, -3, dim=dim)
call cpu_time(t2)
print *,"dim = ", dim, " t = ", t2-t1
end do
end program main
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [patch, fortran] Implement blocked eoshift for eoshift0
2017-07-01 13:48 [patch, fortran] Implement blocked eoshift for eoshift0 Thomas Koenig
@ 2017-07-02 11:53 ` Paul Richard Thomas
0 siblings, 0 replies; 2+ messages in thread
From: Paul Richard Thomas @ 2017-07-02 11:53 UTC (permalink / raw)
To: Thomas Koenig; +Cc: fortran, gcc-patches
Hi Thomas,
The timings are impressive! OK for trunk.
Thanks
Paul
On 1 July 2017 at 14:48, Thomas Koenig <tkoenig@netcologne.de> wrote:
> Hello world,
>
> the attached patch implements the blocked algorithm for
> constant shift for dim > 1 for eoshift0 (which handles
> the case of constant shift and constant fill value).
>
> Speedup, as for cshift, is large. Moving a 500*500*500
> array by -3 with eo_bench.f90 (also attached):
>
> $ gfortran -O3 eo_bench.f90 && ./a.out
> dim = 1 t = 0.451796889
> dim = 2 t = 0.183514118
> dim = 3 t = 0.184015989
> $ gfortran-7 -static-libgfortran -O3 eo_bench.f90 && ./a.out
> dim = 1 t = 0.955736041
> dim = 2 t = 1.42228103
> dim = 3 t = 3.00043702
>
> Regression-tested. OK for trunk?
>
> Regards
>
> Thomas
>
> 2017-07-01 Thomas Koenig <tkoenig@gcc.gnu.org>
>
> * intrinsics/eoshift0.c: For contiguous arrays, use
> block algorithm. Use memcpy where possible.
>
> 2017-07-01 Thomas Koenig <tkoenig@gcc.gnu.org>
>
> * gfortran/eoshift_3.f90: New test.
--
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2017-07-02 11:53 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-01 13:48 [patch, fortran] Implement blocked eoshift for eoshift0 Thomas Koenig
2017-07-02 11:53 ` Paul Richard Thomas
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).