From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 29665 invoked by alias); 3 Mar 2013 11:58:38 -0000 Received: (qmail 29633 invoked by uid 48); 3 Mar 2013 11:58:25 -0000 From: "vincenzo.innocente at cern dot ch" To: gcc-bugs@gcc.gnu.org Subject: [Bug middle-end/55266] vector expansion: 24 movs for 4 adds Date: Sun, 03 Mar 2013 11:58:00 -0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: middle-end X-Bugzilla-Keywords: X-Bugzilla-Severity: normal X-Bugzilla-Who: vincenzo.innocente at cern dot ch X-Bugzilla-Status: NEW X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated Content-Type: text/plain; charset="UTF-8" MIME-Version: 1.0 Mailing-List: contact gcc-bugs-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-bugs-owner@gcc.gnu.org X-SW-Source: 2013-03/txt/msg00157.txt.bz2 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55266 --- Comment #4 from vincenzo Innocente 2013-03-03 11:58:24 UTC --- I see still problems when calling inline functions. It seems that the code to satisfy the "calling ABI" is generated anyhow. take the example below and compare the code generated for "dotd1" wrt "dotd2" dotd2 has a "storm" of move before the reduction c++ -std=c++11 -Ofast -march=corei7 -S conversions.cc -fabi-version=0 the avx version is better but for dotd4 (actually dotd1 is lelf see like) typedef float __attribute__( ( vector_size( 16 ) ) ) float32x4_t; typedef double __attribute__( ( vector_size( 32 ) ) ) float64x4_t; inline float64x4_t convert(float32x4_t f) { return float64x4_t{f[0],f[1],f[2],f[3]}; } float dotf(float32x4_t x, float32x4_t y) { float ret=0; for (int i=0;i!=4;++i) ret+=x[i]*y[i]; return ret; } inline double dotd(float64x4_t x, float64x4_t y) { double ret=0; for (int i=0;i!=4;++i) ret+=x[i]*y[i]; return ret; } float dotd1(float32x4_t x, float32x4_t y) { float64x4_t dx,dy; for (int i=0;i!=4;++i) { dx[i]=x[i]; dy[i]=y[i]; } double ret=0; for (int i=0;i!=4;++i) ret+=dx[i]*dy[i]; return ret; } float dotd2(float32x4_t x, float32x4_t y) { float64x4_t dx=convert(x); float64x4_t dy=convert(y); return dotd(dx,dy); } float dotd3(float32x4_t x, float32x4_t y) { float64x4_t dx{x[0],x[1],x[2],x[3]}; float64x4_t dy{y[0],y[1],y[2],y[3]}; double ret=0; for (int i=0;i!=4;++i) ret+=dx[i]*dy[i]; return ret; } float dotd4(float32x4_t x, float32x4_t y) { float64x4_t dx,dy; for (int i=0;i!=4;++i) { dx[i]=x[i]; dy[i]=y[i]; } return dotd(dx,dy); }