From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugs-return-432469-listarch-gcc-bugs=gcc.gnu.org@gcc.gnu.org>
Received: (qmail 27556 invoked by alias); 22 Oct 2013 14:18:03 -0000
Mailing-List: contact gcc-bugs-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-bugs.gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-bugs/>
List-Post: <mailto:gcc-bugs@gcc.gnu.org>
List-Help: <mailto:gcc-bugs-help@gcc.gnu.org>
Sender: gcc-bugs-owner@gcc.gnu.org
Received: (qmail 27022 invoked by uid 48); 22 Oct 2013 14:17:58 -0000
From: "ktietz at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug rtl-optimization/47477] [4.7/4.8/4.9 regression] Sub-optimal mov at end of method
Date: Tue, 22 Oct 2013 14:18:00 -0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: changed
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: rtl-optimization
X-Bugzilla-Version: 4.6.0
X-Bugzilla-Keywords: missed-optimization, ra
X-Bugzilla-Severity: normal
X-Bugzilla-Who: ktietz at gcc dot gnu.org
X-Bugzilla-Status: NEW
X-Bugzilla-Priority: P2
X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org
X-Bugzilla-Target-Milestone: 4.8.3
X-Bugzilla-Flags:
X-Bugzilla-Changed-Fields:
Message-ID: <bug-47477-4-pbUtFjHIEp@http.gcc.gnu.org/bugzilla/>
In-Reply-To: <bug-47477-4@http.gcc.gnu.org/bugzilla/>
References: <bug-47477-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: 7bit
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
X-SW-Source: 2013-10/txt/msg01613.txt.bz2

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47477
--- Comment #17 from Kai Tietz <ktietz at gcc dot gnu.org> ---
What optimization you expect here?  I see by the new type-demotion pass some
changes in optimized tree-output:

foo ()
{
  int i;
  short int _4;
  char _5;
  unsigned short _6;
  unsigned short _8;
  short int _9;
  unsigned short _10;
  unsigned short _11;
  short int _12;
  sizetype _25;

  <bb 2>:
  goto <bb 4>;

  <bb 3>:

  <bb 4>:
  # i_17 = PHI <i_14(3), 0(2)>
  _25 = (sizetype) i_17;
  _4 = MEM[symbol: a, index: _25, step: 2, offset: 0B];
  _5 = (char) _4;
  _6 = (unsigned short) _5;
  _9 = MEM[symbol: b, index: _25, step: 2, offset: 0B];
  _8 = (unsigned short) _9;
  _10 = _8 + 17;
  _11 = _10 + _6;
  _12 = (short int) _11;
  MEM[symbol: a, index: _25, step: 2, offset: 0B] = _12;
  i_14 = i_17 + 1;
  if (i_14 != 1024)
    goto <bb 3>;
  else
    goto <bb 5>;

  <bb 5>:
  return;
}

what then gets simplified to the following assembler on IA32:
_foo:
        xorl    %eax, %eax
        .p2align 4,,10
L2:
        movsbw  _a(%eax,%eax), %dx
        movzwl  _b(%eax,%eax), %ecx
        leal    17(%ecx,%edx), %edx
        movw    %dx, _a(%eax,%eax)
        addl    $1, %eax
        cmpl    $1024, %eax
        jne     L2
        rep ret

The same assembler gets produced for my with all compilers back to 4.6.0, just
tree-optimization output differs.