From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 23196 invoked by alias); 11 Mar 2012 17:28:29 -0000 Received: (qmail 23180 invoked by uid 22791); 11 Mar 2012 17:28:27 -0000 X-SWARE-Spam-Status: No, hits=-2.8 required=5.0 tests=ALL_TRUSTED,AWL,BAYES_00 X-Spam-Check-By: sourceware.org Received: from localhost (HELO gcc.gnu.org) (127.0.0.1) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Sun, 11 Mar 2012 17:28:12 +0000 From: "talebi.hossein at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug libfortran/52537] slow trim function Date: Sun, 11 Mar 2012 17:28:00 -0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: libfortran X-Bugzilla-Keywords: missed-optimization X-Bugzilla-Severity: enhancement X-Bugzilla-Who: talebi.hossein at gmail dot com X-Bugzilla-Status: WAITING X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated Content-Type: text/plain; charset="UTF-8" MIME-Version: 1.0 Mailing-List: contact gcc-bugs-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-bugs-owner@gcc.gnu.org X-SW-Source: 2012-03/txt/msg00809.txt.bz2 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52537 --- Comment #3 from Hossein Talebi 2012-03-11 17:27:42 UTC --- Hello, Hummm, a self contained example... That will be possible when I go back to the office. I can also check it myself with a simpler example. For now, maybe you can figure out something from this subroutine. I can probably write this subroutine in another way to make it faster. But, the question still is, why intel does the same thing much faster. !============================================================================== subroutine femmesh_input_elements(this,funit,nelem,max_connectivity,fname,reflag,element_format) !attention: here the Proc0 reads and distributes to other ! procs. also, the profiling is done here partially ! for the number of element and so on. The format of saving ! element connectivities is done according to METIS to make ! it easier to distribute the elements. use strings !------------------- INOUT variables------------------ class(ty_femmesh) :: this type(ty_element_format) :: element_format integer,intent(in) :: funit character(*) :: fname, reflag integer :: nelem,max_connectivity, linenum integer :: j,ferror, ios,i character (len=1200) :: st_input integer :: maxc,funit2, buf_siz,iprc, ierror integer, allocatable :: ELEMENTS_buf(:,:) integer :: I50(0:50) character(1200) :: pcommand integer mstatus(MPI_STATUS_SIZE), tag, G_elid, elid, ipr, eltab,nn , pEidf, pEconnf,pEmatidf funit2=funit linenum=0; ferror=0; eltab=0 this%max_connect=element_format%max_connectivity this%max_Eltab=MIN_TABS+this%max_connect pEidf=element_format%pEidf pEconnf=element_format%pEconnf pEmatidf=element_format%pEmatidf ! distribute the initial mesh to the processors so every processor knows ! what element it will get. The last element gets the remaining of the elements + its share this%numElements= int( nelem / this%femcomm%nprocs) if (this%femcomm%me == this%femcomm%nprocs-1) then if (this%femcomm%me == this%femcomm%nprocs-1) this%numElements=this%numElements+ mod(nelem,this%femcomm%nprocs) endif this%femcomm%elemdist(1)=1 !make elemdist and numEl_pp do i=1, this%femcomm%nprocs this%femcomm%elemdist(i+1) =this%femcomm%elemdist(i)+ int( nelem / this%femcomm%nprocs) this%femcomm%numEl_pp(i)=int( nelem / this%femcomm%nprocs) enddo this%femcomm%elemdist(this%femcomm%nprocs+1) = this%femcomm%elemdist(this%femcomm%nprocs+1)+ mod(nelem,this%femcomm%nprocs) this%femcomm%numEl_pp(this%femcomm%nprocs) = this%femcomm%numEl_pp(this%femcomm%nprocs)+ mod(nelem,this%femcomm%nprocs) !preparing the buffer to send and recieve elements buf_siz=this%femcomm%numEl_pp(this%femcomm%nprocs) +1 eltab=2+max_connectivity Call allocate_I2d(this%Elements_input,eltab,buf_siz) Call allocate_I2d(ELEMENTS_buf,eltab,buf_siz) Call allocate_I2d(this%Elements, this%max_Eltab ,this%numElements) Call MPI_BARRIER(this%femcomm%femworld,ierror) tag = 1; G_elid=0 ;I50=0 if (this%femcomm%me .eq. 0) then if (trim(element_format%fname) /= '') then if (element_format%reflag=='txt') then funit2=openfile_txt(element_format%fname) else Call this%error%universe_one('femmesh_input_elements: I am not able to read this type of element file') endif endif ELEMENTS_buf=0 do ipr=0, this%femcomm%nprocs -1 j=0; do read (funit2,"(A200)",iostat=ferror) st_input if (ferror < 0) then call this%error%universe_one("femmesh_input_elements: unexpected end of the file") endif call compact(st_input) if (trim(st_input)=='' .or. st_input(1:1)=='#') then cycle endif j=j+1 linenum=linenum+1 read (st_input,*,IOSTAT=ios) I50(1:50) I50(0)=1 !set the zero index as 1, which sets all the unset values 1 elid=I50(1) G_elid=G_elid+1 if (G_elid /=elid) then print *,st_input call this%error%universe_one("femmesh_input_elements: Bad element numbering or number "// & " of elements or element format") endif !ELEMENTS_buf(1:eltab,j)=I50(1:eltab) ELEMENTS_buf(1,j)=I50(pEidf) ELEMENTS_buf(2,j)=I50(pEmatidf) ELEMENTS_buf(3:3+max_connectivity-1 ,j)=I50(pEconnf:pEconnf+max_connectivity-1) I50=0 if (j >= this%femcomm%numEl_pp(ipr+1)) exit enddo if (ipr ==0) then this%Elements_input=ELEMENTS_buf else call MPI_SEND(ELEMENTS_buf, eltab*buf_siz, MPI_INTEGER, ipr, tag, this%femcomm%femworld, ierror) endif enddo else call MPI_RECV(ELEMENTS_buf, eltab*buf_siz, MPI_INTEGER, 0, tag, this%femcomm%femworld, mstatus, ierror) this%Elements_input=ELEMENTS_buf endif !mode all the data to ELements do i=1,this%numElements do nn=3, eltab if ( (this%Elements_input(nn,i)==0) .or. (this%Elements_input(nn,i)==this%Elements_input(nn-1,i) ) & .and. (nn .ne. 3) ) Exit this%Elements(pEconn+nn-3,i)=this%Elements_input(nn,i) enddo this%Elements(pEid,i)=this%Elements_input(1,i) this%Elements(pEmatid,i)=this%Elements_input(2,i) this%Elements(pEnnode,i)=nn-3 enddo deallocate(this%Elements_input) !nullify the Elements_input, but maybe later it can be of use call this%error%info("Reading Elements finished") end subroutine femmesh_input_elements