From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugs-return-406884-listarch-gcc-bugs=gcc.gnu.org@gcc.gnu.org>
Received: (qmail 12793 invoked by alias); 16 Nov 2012 18:28:55 -0000
Received: (qmail 12727 invoked by uid 48); 16 Nov 2012 18:28:32 -0000
From: "jakub at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug tree-optimization/55286] [4.7/4.8 Regression] Bytemark ASSIGNMENT 4% - 10% slower
Date: Fri, 16 Nov 2012 18:28:00 -0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: changed
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: tree-optimization
X-Bugzilla-Keywords:
X-Bugzilla-Severity: normal
X-Bugzilla-Who: jakub at gcc dot gnu.org
X-Bugzilla-Status: UNCONFIRMED
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org
X-Bugzilla-Target-Milestone: ---
X-Bugzilla-Changed-Fields:
Message-ID: <bug-55286-4-LLGrjiDO8R@http.gcc.gnu.org/bugzilla/>
In-Reply-To: <bug-55286-4@http.gcc.gnu.org/bugzilla/>
References: <bug-55286-4@http.gcc.gnu.org/bugzilla/>
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
Content-Type: text/plain; charset="UTF-8"
MIME-Version: 1.0
Mailing-List: contact gcc-bugs-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-bugs.gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-bugs/>
List-Post: <mailto:gcc-bugs@gcc.gnu.org>
List-Help: <mailto:gcc-bugs-help@gcc.gnu.org>
Sender: gcc-bugs-owner@gcc.gnu.org
X-SW-Source: 2012-11/txt/msg01546.txt.bz2


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55286
--- Comment #5 from Jakub Jelinek <jakub at gcc dot gnu.org> 2012-11-16 18:28:30 UTC ---
Created attachment 28712
  --> http://gcc.gnu.org/bugzilla/attachment.cgi?id=28712
assign.c

Assignment extracted into a self-contained testcase, does this also make a
similar difference for you?  On which CPU?  Yes, there is a code generation
difference with that commit, in *.optimized the difference seems to be
(-vanilla, + with Kai's patch reverted):
@@ -192,13 +192,12 @@ Assignment (long int[101] * arraybase)
   sizetype _302;
   unsigned long _303;
   sizetype _306;
   long unsigned int pretmp_307;
   long unsigned int pretmp_308;
   long int[101] * pretmp_318;
-  unsigned long _322;
   short unsigned int ivtmp_334;
   unsigned long _350;
   unsigned int _351;
   long unsigned int patt_353;
   short unsigned int _354;
   unsigned long _355;
@@ -286,27 +285,26 @@ Assignment (long int[101] * arraybase)
   <bb 5>:
   # currentmin_72 = PHI <currentmin_402(4)>
   _356 = ivtmp.312_453 & 15;
   _350 = _356 >> 3;
   _355 = -_350;
   _354 = (short unsigned int) _355;
-  _322 = _355 & 1;
-  prolog_loop_niters.10_359 = (short unsigned int) _322;
+  prolog_loop_niters.10_359 = _354 & 1;
   if (prolog_loop_niters.10_359 == 0)
     goto <bb 7>;
   else
     goto <bb 6>;

   <bb 6>:
   _272 = MEM[base: pretmp_395, offset: 0B];
   _256 = _272 - currentmin_72;
   MEM[base: pretmp_395, offset: 0B] = _256;

   <bb 7>:
   # j_269 = PHI <1(6), 0(5)>
-  prolog_loop_adjusted_niters.11_124 = _355 & 1;
+  prolog_loop_adjusted_niters.11_124 = (sizetype) prolog_loop_niters.10_359;
   niters.12_129 = 101 - prolog_loop_niters.10_359;
   base_off.19_523 = prolog_loop_adjusted_niters.11_124 * 8;
   vect_p.20_524 = pretmp_395 + base_off.19_523;
   vect_cst_.23_528 = {currentmin_72, currentmin_72};

   <bb 8>:

This change happens very late (forwprop4) and nothing afterwards cleans it up
(there is no DCE etc. that would DCE the dead assignment to _354 and there is
no PRE/FRE to replace _355 & 1 in the second case with _322.  Still just
zero-extending _359 is perhaps cheaper register pressure-wise.

That said, I can't find any measurable differences between the two.