From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugs-return-300440-listarch-gcc-bugs=gcc.gnu.org@gcc.gnu.org>
Received: (qmail 10794 invoked by alias); 27 Nov 2009 12:02:08 -0000
Received: (qmail 7562 invoked by uid 48); 27 Nov 2009 12:01:56 -0000
Date: Fri, 27 Nov 2009 12:02:00 -0000
Subject: [Bug c++/42194]  New: performance degradation with STL complex convolution operation
X-Bugzilla-Reason: CC
Message-ID: <bug-42194-18494@http.gcc.gnu.org/bugzilla/>
Reply-To: gcc-bugzilla@gcc.gnu.org
To: gcc-bugs@gcc.gnu.org
From: "jagjeet dot nain at gmail dot com" <gcc-bugzilla@gcc.gnu.org>
Mailing-List: contact gcc-bugs-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-bugs.gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-bugs/>
List-Post: <mailto:gcc-bugs@gcc.gnu.org>
List-Help: <mailto:gcc-bugs-help@gcc.gnu.org>
Sender: gcc-bugs-owner@gcc.gnu.org
X-SW-Source: 2009-11/txt/msg02353.txt.bz2

I have very simple program which basically does complex matrix convolution
operation.
I am seeing 3 times performance degradation if this program is compiled with
4.3.2 version vs compiled with 4.0.2. I am compiling this program with -O3
option, no additional optimization flags supplied. Also one more interesting
thing to note is that this behavior is seen only with complex data type, if i
use plain float data type, timings are better with 4.3.2 version.
Please help me.


#include <complex>
#include <iostream>
#include <stdio.h>
#include <time.h>

float procTimeInSeconds()
{
    return clock()/static_cast<float>(CLOCKS_PER_SEC);
}


using namespace std;

int main(int argc , char** arg )
{

    const int Nc = 32;  // total matrix
    const int Nx = 512; // columns
    const int Nn = 16;  //typical value
    const int Ns = 10;
    const int Nw = Nc * Nn;

    complex<float>* all_weights = new complex<float>[Nx*Nw*Nc];
    complex<float>* input = (complex<float>*)new complex<float>[Nx*Nw*Ns];
    complex<float>* output = (complex<float>*)new complex<float>[Nx*Nc*Ns];
    int weights_stride_c = Nx * Nw;
    int weights_stride_w = Nx;
    int weights_stride_x = 1;
    int input_stride_s = Nx * Nw;
    int input_stride_w = Nx;
    int input_stride_x = 1;
    int output_stride_s = Nx * Nc;
    int output_stride_c = Nx;
    int output_stride_x = 1;

    // ================================================================
    //                      Round 1
    //    Do array reductions as we decend into the loop nesting, 
    //    keeping temporary pointers for each result.
    //    Results: Faster for unoptimized compilation, but slower for 
    //             compiler optimization on.
    // ================================================================
    int count = 0;
    float startTime = procTimeInSeconds();
        complex<float>* input_s;
        complex<float>* output_s ;
        complex<float>* curr_weight_c;
        complex<float>* output_sc;

        complex<float>* curr_weight_cw;
        complex<float>* input_sw;
    for(int is = 0; is < Ns; ++is )
    {
        input_s = &input[is*input_stride_s];
        output_s = &output[is*output_stride_s];

        for (int ic=0; ic<Nc; ++ic)
        {
            curr_weight_c = &all_weights[ic * weights_stride_c];
            output_sc = &output_s[ic*output_stride_c];
            // for that matrix, loop through w
            for (int iw=0; iw<Nw; ++iw)
            {
                curr_weight_cw = &curr_weight_c[weights_stride_w * iw];
                input_sw = &input_s[iw*input_stride_w];
               for (int ix=0; ix<Nx; ++ix)
                {
                    output_sc[ix*output_stride_x] +=
curr_weight_cw[ix*weights_stride_x] * input_sw[ix*input_stride_x];
                    ++count;
                }
            }
        }
    }

    //delete [] all_weights;
    float netTime = procTimeInSeconds() - startTime;
    cout << count << " in " << netTime << " seconds, round 1" << std::endl;

    return 0;
}


-- 
           Summary: performance degradation with STL complex convolution
                    operation
           Product: gcc
           Version: 4.3.3
            Status: UNCONFIRMED
          Severity: major
          Priority: P3
         Component: c++
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: jagjeet dot nain at gmail dot com


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42194