From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-prs-return-20368-listarch-gcc-prs=gcc.gnu.org@gcc.gnu.org>
Received: (qmail 30571 invoked by alias); 12 Jul 2002 02:46:01 -0000
Mailing-List: contact gcc-prs-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Archive: <http://gcc.gnu.org/ml/gcc-prs/>
List-Post: <mailto:gcc-prs@gcc.gnu.org>
List-Help: <mailto:gcc-prs-help@gcc.gnu.org>
Sender: gcc-prs-owner@gcc.gnu.org
Received: (qmail 30557 invoked by uid 71); 12 Jul 2002 02:46:01 -0000
Date: Thu, 11 Jul 2002 19:46:00 -0000
Message-ID: <20020712024601.30556.qmail@sources.redhat.com>
To: nobody@gcc.gnu.org
Cc: gcc-prs@gcc.gnu.org,
From: Alan Modra <amodra@bigpond.net.au>
Subject: Re: optimization/7130: miscompiled code for gcc-3.1 in powerpc linux with -funroll-all-loops
Reply-To: Alan Modra <amodra@bigpond.net.au>
X-SW-Source: 2002-07/txt/msg00370.txt.bz2
List-Id: <gcc-prs.sourceware.org>

The following reply was made to PR optimization/7130; it has been noted by GNATS.

From: Alan Modra <amodra@bigpond.net.au>
To: yozo@cs.berkeley.edu, gcc-gnats@gcc.gnu.org, gcc-patches@gcc.gnu.org
Cc:  
Subject: Re: optimization/7130: miscompiled code for gcc-3.1 in powerpc linux with -funroll-all-loops
Date: Fri, 12 Jul 2002 12:06:06 +0930

 The testsuite failures with the previous patch were due to
 doloop_modify_runtime not taking into account loop preconditioning.
 
 No new regressions with this patch.  Yay!
 
 gcc/ChangeLog
 	PR optimization/7130
 	* loop.h (struct loop_info): Add "preconditioned".
 	* unroll.c (unroll_loop): Set it.
 	* doloop.c (doloop_modify_runtime): Correct count for unrolled loops.
 
 Hmm, I suppose I should be adding the testcase in gcc.c-torture/execute/
 since these get compiled with -funroll-all-loops.
 
 Index: gcc/loop.h
 ===================================================================
 RCS file: /cvs/gcc/gcc/gcc/loop.h,v
 retrieving revision 1.61
 diff -u -p -r1.61 loop.h
 --- gcc/loop.h	30 May 2002 20:55:11 -0000	1.61
 +++ gcc/loop.h	12 Jul 2002 01:50:43 -0000
 @@ -316,6 +316,9 @@ struct loop_info
    int has_multiple_exit_targets;
    /* Nonzero if there is an indirect jump in the current function.  */
    int has_indirect_jump;
 +  /* Whether loop unrolling has emitted copies of the loop body so
 +     that the main loop needs no exit tests.  */
 +  int preconditioned;
    /* Register or constant initial loop value.  */
    rtx initial_value;
    /* Register or constant value used for comparison test.  */
 Index: gcc/unroll.c
 ===================================================================
 RCS file: /cvs/gcc/gcc/gcc/unroll.c,v
 retrieving revision 1.169
 diff -u -p -r1.169 unroll.c
 --- gcc/unroll.c	30 Jun 2002 05:06:01 -0000	1.169
 +++ gcc/unroll.c	12 Jul 2002 01:50:46 -0000
 @@ -1135,6 +1135,9 @@ unroll_loop (loop, insn_count, strength_
    /* Keep track of the unroll factor for the loop.  */
    loop_info->unroll_number = unroll_number;
  
 +  /* And whether the loop has been preconditioned.  */
 +  loop_info->preconditioned = loop_preconditioned;
 +
    /* For each biv and giv, determine whether it can be safely split into
       a different variable for each unrolled copy of the loop body.
       We precalculate and save this info here, since computing it is
 Index: gcc/doloop.c
 ===================================================================
 RCS file: /cvs/gcc/gcc/gcc/doloop.c,v
 retrieving revision 1.20
 diff -u -p -r1.20 doloop.c
 --- gcc/doloop.c	24 Jun 2002 02:16:42 -0000	1.20
 +++ gcc/doloop.c	12 Jul 2002 01:50:42 -0000
 @@ -552,6 +552,7 @@ doloop_modify_runtime (loop, iterations_
  {
    const struct loop_info *loop_info = LOOP_INFO (loop);
    HOST_WIDE_INT abs_inc;
 +  HOST_WIDE_INT abs_loop_inc;
    int neg_inc;
    rtx diff;
    rtx sequence;
 @@ -595,13 +596,18 @@ doloop_modify_runtime (loop, iterations_
       except in cases where the loop never terminates.  So we don't
       need to use this more costly calculation.
  
 -     If the loop has been unrolled, then the loop body has been
 -     preconditioned to iterate a multiple of unroll_number times.  If
 -     abs_inc is != 1, the full calculation is
 -
 -       t1 = abs_inc * unroll_number;
 -       n = abs (final - initial) / t1;
 -       n += (abs (final - initial) % t1) > t1 - abs_inc;
 +     If the loop has been unrolled, the full calculation is
 +
 +       t1 = abs_inc * unroll_number;		increment per loop
 +       n = abs (final - initial) / t1;		full loops
 +       n += (abs (final - initial) % t1) != 0;	partial loop
 +
 +     However, in certain cases the unrolled loop will be preconditioned
 +     by emitting copies of the loop body with conditional branches,
 +     so that the unrolled loop is always a full loop and thus needs
 +     no exit tests.  In this case we don't want to add the partial
 +     loop count.  As above, when t1 is a power of two we don't need to
 +     worry about overflow.
  
       The division and modulo operations can be avoided by requiring
       that the increment is a power of 2 (precondition_loop_p enforces
 @@ -667,20 +673,22 @@ doloop_modify_runtime (loop, iterations_
  	}
      }
  
 -  if (abs_inc * loop_info->unroll_number != 1)
 +  abs_loop_inc = abs_inc * loop_info->unroll_number;
 +  if (abs_loop_inc != 1)
      {
        int shift_count;
  
 -      shift_count = exact_log2 (abs_inc * loop_info->unroll_number);
 +      shift_count = exact_log2 (abs_loop_inc);
        if (shift_count < 0)
  	abort ();
  
 -      if (abs_inc != 1)
 +      if (!loop_info->preconditioned)
  	diff = expand_simple_binop (GET_MODE (diff), PLUS,
 -				    diff, GEN_INT (abs_inc - 1),
 +				    diff, GEN_INT (abs_loop_inc - 1),
  				    diff, 1, OPTAB_LIB_WIDEN);
  
 -      /* (abs (final - initial) + abs_inc - 1) / (abs_inc * unroll_number)  */
 +      /* (abs (final - initial) + abs_inc * unroll_number - 1)
 +	 / (abs_inc * unroll_number)  */
        diff = expand_simple_binop (GET_MODE (diff), LSHIFTRT,
  				  diff, GEN_INT (shift_count),
  				  diff, 1, OPTAB_LIB_WIDEN);
 
 -- 
 Alan Modra
 IBM OzLabs - Linux Technology Centre