[Bug tree-optimization/23970] loop-invariant-motion is not doing it's work

public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed

* [Bug tree-optimization/23970] loop-invariant-motion is not doing it's work
  2005-09-19 18:52 [Bug tree-optimization/23970] New: loop-invariant-motion is not doing it's work rguenth at gcc dot gnu dot org
@ 2005-09-19 18:52 ` rguenth at gcc dot gnu dot org
  2005-09-20 13:34 ` rakdver at gcc dot gnu dot org
  1 sibling, 0 replies; 3+ messages in thread
From: rguenth at gcc dot gnu dot org @ 2005-09-19 18:52 UTC (permalink / raw)
  To: gcc-bugs



-- 
           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |rakdver at atrey dot karlin
                   |                            |dot mff dot cuni dot cz


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23970


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Bug tree-optimization/23970] New: loop-invariant-motion is not doing it's work
@ 2005-09-19 18:52 rguenth at gcc dot gnu dot org
  2005-09-19 18:52 ` [Bug tree-optimization/23970] " rguenth at gcc dot gnu dot org
  2005-09-20 13:34 ` rakdver at gcc dot gnu dot org
  0 siblings, 2 replies; 3+ messages in thread
From: rguenth at gcc dot gnu dot org @ 2005-09-19 18:52 UTC (permalink / raw)
  To: gcc-bugs

void Ekin(double *e, int *stridee,
          double *vx, int *stridevx,
          double *vy, int *stridevy,
          double *vz, int *stridevz,
          int *sz)
{
  int i1 = sz[0];
  int j1 = sz[1];
  int k1 = sz[2];
  int i, j, k;
  for (k=0; k<k1; ++k)
   for (j=0; j<j1; ++j)
    for (i=0; i<i1; ++i)
     {
       e[i + j * stridee[1] + k * stridee[2]]
        = 0.128 * (
            ((vx[i + j * stridevx[1] + k * stridevx[2]]
              + vx[i+1 + j * stridevx[1] + k * stridevx[2]])
             * (vx[i + j * stridevx[1] + k * stridevx[2]]
                + vx[i+1 + j * stridevx[1] + k * stridevx[2]]))
          + ((vy[i + j * stridevy[1] + k * stridevy[2]]
              + vy[i + (j+1) * stridevy[1] + k * stridevy[2]])
             * (vy[i + j * stridevy[1] + k * stridevy[2]]
                + vy[i + (j+1) * stridevy[1] + k * stridevy[2]]))
          + ((vz[i + j * stridevz[1] + k * stridevz[2]]
              + vz[i + j * stridevz[1] + (k+1) * stridevz[2]])
             * (vz[i + j * stridevz[1] + k * stridevz[2]]
                + vz[i + j * stridevz[1] + (k+1) * stridevz[2]])));

     }
}

lim moves all the j*stridev?[1] and k*stridev?[2] to the j-loop but
does not move the k*stridev?[2] to the k-loop.  This results in the
following asm.

Ekin:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        subl    $44, %esp
        movl    40(%ebp), %eax
        movl    $0, -24(%ebp)
        movl    (%eax), %edx
        movl    4(%eax), %ecx
        movl    8(%eax), %eax
        movl    %edx, -40(%ebp)
        movl    %ecx, -36(%ebp)
        testl   %eax, %eax
        movl    %eax, -32(%ebp)
        jle     .L13
.L4:
        movl    -24(%ebp), %edx
        movl    -36(%ebp), %eax
        incl    %edx
        testl   %eax, %eax
        movl    %edx, -48(%ebp)
        jle     .L7
        movl    -40(%ebp), %ecx
        movl    $0, -16(%ebp)
        testl   %ecx, %ecx
        jle     .L17
.L10:
        movl    28(%ebp), %eax
        movl    -24(%ebp), %ebx
        movl    -16(%ebp), %edi
        movl    $0, -56(%ebp)
        fldl    .LC0
        movl    8(%eax), %edx
        movl    4(%eax), %ecx
        incl    %edi
        movl    %edi, -28(%ebp)
        movl    -16(%ebp), %edi
        imull   %edx, %ebx
        movl    36(%ebp), %edx
        movl    4(%edx), %eax
        movl    8(%edx), %esi
        movl    12(%ebp), %edx
        imull   %eax, %edi
        movl    -16(%ebp), %eax
        movl    %edi, -44(%ebp)
        movl    4(%edx), %edi
        movl    -24(%ebp), %edx
        imull   %edi, %eax
        movl    12(%ebp), %edi
        imull   8(%edi), %edx
        addl    %edx, %eax
        movl    -16(%ebp), %edx
        sall    $3, %eax
        movl    %eax, -20(%ebp)
        movl    20(%ebp), %eax
        movl    4(%eax), %edi
        movl    -24(%ebp), %eax
        imull   %edi, %edx
        movl    20(%ebp), %edi
        imull   8(%edi), %eax
        addl    %eax, %edx
        movl    -16(%ebp), %eax
        imull   %ecx, %eax
        addl    %ebx, %eax
        leal    0(,%eax,8), %edi
        movl    -28(%ebp), %eax
        imull   %eax, %ecx
        movl    -24(%ebp), %eax
        addl    %ecx, %ebx
        movl    -44(%ebp), %ecx
        imull   %esi, %eax
        sall    $3, %ebx
        addl    %ecx, %eax
        movl    -48(%ebp), %ecx
        sall    $3, %eax
        movl    %eax, -52(%ebp)
        imull   %ecx, %esi
        movl    -44(%ebp), %ecx
        addl    %esi, %ecx
        leal    0(,%ecx,8), %esi
        .p2align 4,,15
.L5:
        movl    16(%ebp), %eax
        movl    24(%ebp), %ecx
        incl    -56(%ebp)
        fldl    (%eax,%edx,8)
        faddl   8(%eax,%edx,8)
        incl    %edx
        movl    -52(%ebp), %eax
        addl    $8, -52(%ebp)
        fldl    (%edi,%ecx)
        addl    $8, %edi
        faddl   (%ecx,%ebx)
        addl    $8, %ebx
        movl    32(%ebp), %ecx
        fldl    (%eax,%ecx)
        faddl   (%ecx,%esi)
        fxch    %st(2)
        addl    $8, %esi
        movl    -20(%ebp), %eax
        movl    8(%ebp), %ecx
        fmul    %st(0), %st
        fxch    %st(1)
        fmul    %st(0), %st
        faddp   %st, %st(1)
        fxch    %st(1)
        fmul    %st(0), %st
        faddp   %st, %st(1)
        fmul    %st(1), %st
        fstpl   (%eax,%ecx)
        addl    $8, %eax
        movl    %eax, -20(%ebp)
        movl    -56(%ebp), %eax
        cmpl    %eax, -40(%ebp)
        jne     .L5
        fstp    %st(0)
        movl    -28(%ebp), %edx
        cmpl    %edx, -36(%ebp)
        jle     .L7
.L18:
        movl    -40(%ebp), %ecx
        movl    %edx, -16(%ebp)
        testl   %ecx, %ecx
        jg      .L10
.L17:
        movl    -16(%ebp), %ecx
        incl    %ecx
        movl    %ecx, -28(%ebp)
        movl    -28(%ebp), %edx
        cmpl    %edx, -36(%ebp)
        jg      .L18
.L7:
        movl    -48(%ebp), %eax
        cmpl    %eax, -32(%ebp)
        movl    %eax, -24(%ebp)
        jne     .L4
.L13:
        addl    $44, %esp
        popl    %ebx
        popl    %esi
        popl    %edi
        popl    %ebp
        ret

-- 
           Summary: loop-invariant-motion is not doing it's work
           Product: gcc
           Version: 4.1.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P2
         Component: tree-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: rguenth at gcc dot gnu dot org
                CC: gcc-bugs at gcc dot gnu dot org


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23970


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Bug tree-optimization/23970] loop-invariant-motion is not doing it's work
  2005-09-19 18:52 [Bug tree-optimization/23970] New: loop-invariant-motion is not doing it's work rguenth at gcc dot gnu dot org
  2005-09-19 18:52 ` [Bug tree-optimization/23970] " rguenth at gcc dot gnu dot org
@ 2005-09-20 13:34 ` rakdver at gcc dot gnu dot org
  1 sibling, 0 replies; 3+ messages in thread
From: rakdver at gcc dot gnu dot org @ 2005-09-20 13:34 UTC (permalink / raw)
  To: gcc-bugs

------- Additional Comments From rakdver at gcc dot gnu dot org  2005-09-20 13:33 -------
k*stridev?[2] cannot be moved more at the moment.  The loops looks like

for (k = ...)
  for (j = ...)
    {
      if (i >= i1)
        continue;
      do
        {
          tmp = k * stridevx[2];
          ...
        }  while (i < i1)
    }

Note that if i >= i1, the load of stridevx[2] is never executed.  And it is
possible that it will trap.  Thus, we can only move it to point where we are
sure that we do not make it executed unconditionally.  I.e. it is legal to move
it out of the innermost loop, but not out of the outer ones.

There are two optimization improvements that could help (and as this case is
fairly common, I will leave this PR open as an enhancement request).

The first optimization is predicated invariant motion.  I.e. we cannot move
stridevx[2] out of the second loop, but we can move the expression (i < i1 ?
stridevx[2] : undefined) out of it, and it has the same value in all interesting
cases.

The second optimization is invariant exit motion. I.e. we can replace

do
  {
     code1;
     if (invariant)
       break;
     code2;
  } while (something);

with

if (invariant)
  {
    code1;
  }
else
  {
    do
      {
         code1;
         code2;
      } while (something);
  }

This could be achieved through loop unswitching (for non-innermost loops), or as
a separate optimization.  Note however that for this optimization to be useful
for invariant motion, it would either have to be iterated with it, or, at the
expense of significant complications, to be implemented within invariant motion.

-- 
           What    |Removed                     |Added
----------------------------------------------------------------------------
           Severity|normal                      |enhancement
             Status|UNCONFIRMED                 |NEW
     Ever Confirmed|                            |1
   Last reconfirmed|0000-00-00 00:00:00         |2005-09-20 13:33:54
               date|                            |

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23970

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2005-09-20 13:34 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-09-19 18:52 [Bug tree-optimization/23970] New: loop-invariant-motion is not doing it's work rguenth at gcc dot gnu dot org
2005-09-19 18:52 ` [Bug tree-optimization/23970] " rguenth at gcc dot gnu dot org
2005-09-20 13:34 ` rakdver at gcc dot gnu dot org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).