From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 5863 invoked by alias); 10 Feb 2008 07:57:01 -0000 Received: (qmail 5743 invoked by uid 48); 10 Feb 2008 07:56:15 -0000 Date: Sun, 10 Feb 2008 07:57:00 -0000 Message-ID: <20080210075615.5742.qmail@sourceware.org> X-Bugzilla-Reason: CC References: Subject: [Bug c++/35117] Vectorization on power PC In-Reply-To: Reply-To: gcc-bugzilla@gcc.gnu.org To: gcc-bugs@gcc.gnu.org From: "eyal at geomage dot com" Mailing-List: contact gcc-bugs-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-bugs-owner@gcc.gnu.org X-SW-Source: 2008-02/txt/msg01072.txt.bz2 ------- Comment #20 from eyal at geomage dot com 2008-02-10 07:56 ------- Hi, I've tried putting the loop to be vectorized in a different method and the compiler output looks better, but the performance is still the same as the non-vectorized code. #include #include #include typedef float ARRTYPE; void Calc( ARRTYPE *pSum, ARRTYPE *pSum1, ARRTYPE *pVec1, ARRTYPE *pVec2, int m_nSamples, int itBegin, int itEnd ); int main ( int argc, char *argv[] ) { int m_nSamples = atoi( argv[1] ); int itBegin = atoi( argv[2] ); int itEnd = atoi( argv[3] ); int iSizeMain = atoi( argv[ 4 ] ); ARRTYPE *pSum1 = new ARRTYPE[ 100000 ]; ARRTYPE *pSum = new ARRTYPE[ 100000 ]; for ( int it = 0; it < m_nSamples; it++ ) { pSum[ it ] = it / itBegin; pSum1[ it ] = itBegin / ( it + 1 ); } ARRTYPE *pVec1 = NULL, *pVec2 = NULL; Calc( pSum, pSum1, pVec1, pVec2, m_nSamples, itBegin, itEnd ); std::cout << "pVec1[10] = " << pVec1[ 10 ] << std::endl; std::cout << "pVec1[102] = " << pVec1[ 102 ] << std::endl; free( pVec1 ); free( pVec2 ); } void Calc( ARRTYPE *pSum, ARRTYPE *pSum1, ARRTYPE *pVec1, ARRTYPE *pVec2, int m_nSamples, int itBegin, int itEnd ) { pVec1 = (ARRTYPE*) malloc (sizeof(ARRTYPE) *m_nSamples); pVec2 = (ARRTYPE*) malloc (sizeof(ARRTYPE) *m_nSamples); for ( int i = 0; i < m_nSamples - 5; i++ ) { for( int it = itBegin; it < itEnd; it++ ) pVec1[ it ] += pSum[ it ] + pSum1[ it ]; } } Eyal.cpp:36: note: dependence distance = 0. Eyal.cpp:36: note: accesses have the same alignment. Eyal.cpp:36: note: dependence distance modulo vf == 0 between *D.22348_22 and *D.22348_22 Eyal.cpp:36: note: === vect_analyze_slp === Eyal.cpp:36: note: === vect_make_slp_decision === Eyal.cpp:36: note: === vect_detect_hybrid_slp ===(analyze_scalar_evolution (loop_nb = 2) (scalar = it_60) (get_scalar_evolution (scalar = it_60) (scalar_evolution = {itBegin_14(D), +, 1}_2)) (set_scalar_evolution (scalar = it_60) (scalar_evolution = {itBegin_14(D), +, 1}_2)) ) (instantiate_parameters (loop_nb = 2) (chrec = {itBegin_14(D), +, 1}_2) (res = {itBegin_14(D), +, 1}_2)) (get_loop_exit_condition if (itEnd_16(D) > it_36)) Eyal.cpp:36: note: Alignment of access forced using peeling. Eyal.cpp:36: note: Vectorizing an unaligned access. Eyal.cpp:36: note: Vectorizing an unaligned access. Eyal.cpp:36: note: === vect_update_slp_costs_according_to_vf ===(analyze_scalar_evolution (loop_nb = 2) (scalar = it_60) (get_scalar_evolution (scalar = it_60) (scalar_evolution = {itBegin_14(D), +, 1}_2)) (set_scalar_evolution (scalar = it_60) (scalar_evolution = {itBegin_14(D), +, 1}_2)) ) (instantiate_parameters (loop_nb = 2) (chrec = {itBegin_14(D), +, 1}_2) (res = {itBegin_14(D), +, 1}_2)) (get_loop_exit_condition if (itEnd_16(D) > it_36)) (get_loop_exit_condition if (itEnd_16(D) > it_36)) (get_loop_exit_condition if (itEnd_16(D) > it_84)) (get_loop_exit_condition if (ivtmp.267_92 < prolog_loop_niters.266_70)) loop at Eyal.cpp:37: if (ivtmp.267_92 < prolog_loop_niters.266_70)(get_loop_exit_condition if (itEnd_16(D) > it_36)) (analyze_scalar_evolution (loop_nb = 2) (scalar = it_60) (get_scalar_evolution (scalar = it_60) (scalar_evolution = )) (analyze_initial_condition (loop_phi_node = it_60 = PHI ) (init_cond = it_86)) (analyze_evolution_in_loop (loop_phi_node = it_60 = PHI ) (add_to_evolution (loop_nb = 2) (chrec_before = it_86) (to_add = 1) (res = {it_86, +, 1}_2)) (evolution_function = {it_86, +, 1}_2)) (set_scalar_evolution (scalar = it_60) (scalar_evolution = {it_86, +, 1}_2)) ) (get_loop_exit_condition if (itEnd_16(D) > it_36)) (get_loop_exit_condition if (ivtmp.329_211 < bnd.269_99)) loop at Eyal.cpp:37: if (ivtmp.329_211 < bnd.269_99) Registering new PHI nodes in block #0 Registering new PHI nodes in block #2 Updating SSA information for statement D.22335_6 = malloc (D.22334_5); Updating SSA information for statement malloc (D.22334_5); Registering new PHI nodes in block #3 Registering new PHI nodes in block #9 Registering new PHI nodes in block #7 Registering new PHI nodes in block #8 Registering new PHI nodes in block #10 Registering new PHI nodes in block #14 Registering new PHI nodes in block #12 Updating SSA information for statement D.22349_76 = *D.22348_75; Updating SSA information for statement *D.22348_75 = D.22355_82; Registering new PHI nodes in block #13 Registering new PHI nodes in block #16 Registering new PHI nodes in block #15 Registering new PHI nodes in block #21 Registering new PHI nodes in block #22 Registering new PHI nodes in block #19 Updating SSA information for statement D.22349_106 = *D.22348_105; Updating SSA information for statement *D.22348_105 = D.22355_112; Registering new PHI nodes in block #20 Registering new PHI nodes in block #25 Registering new PHI nodes in block #24 Registering new PHI nodes in block #18 Registering new PHI nodes in block #26 Updating SSA information for statement vect_var_.279_143 = A*vect_p.280_142; Updating SSA information for statement vect_var_.300_174 = A*vect_p.301_173; Registering new PHI nodes in block #5 Updating SSA information for statement vect_var_.278_134 = *ivtmp.277_132; Updating SSA information for statement D.22349_23 = *D.22348_22; Updating SSA information for statement vect_var_.298_164 = A*ivtmp.297_162; Updating SSA information for statement vect_var_.319_195 = A*ivtmp.318_193; Updating SSA information for statement *ivtmp.328_208 = vect_var_.322_198; Registering new PHI nodes in block #4 Registering new PHI nodes in block #23 Registering new PHI nodes in block #17 Registering new PHI nodes in block #6 Registering new PHI nodes in block #11 Symbols to be put in SSA form { HEAP.249 NMT.252 NMT.253 } Incremental SSA update started at block: 0 Number of blocks in CFG: 27 Number of blocks to update: 26 ( 96%) Affected blocks: 0 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 Eyal.cpp:36: note: LOOP VECTORIZED. Eyal.cpp:30: note: vectorized 1 loops in function. Merging blocks 10 and 14 Merging blocks 15 and 21 Merging blocks 17 and 6 Merging blocks 24 and 18 Created preheader block for loop 3 void Calc(ARRTYPE*, ARRTYPE*, ARRTYPE*, ARRTYPE*, int, int, int) (pSum, pSum1, pVec1, pVec2, m_nSamples, itBegin, itEnd) { unsigned int ivtmp.329; __vector float * ivtmp.328; float * D.25089; __vector float * vect_p.327; unsigned int D.25086; long unsigned int base_off.325; float * D.25082; long unsigned int D.25083; long unsigned int D.25084; float * batmp.324; __vector float * vect_p.323; __vector float vect_var_.322; __vector float vect_var_.321; __vector float vect_var_.320; __vector float vect_var_.319; __vector float * ivtmp.318; float * D.25072; __vector float * vect_p.317; long unsigned int offset.315; unsigned int D.25068; long unsigned int base_off.314; long unsigned int D.25065; long unsigned int D.25066; float * batmp.313; __vector float * vect_p.312; __vector float vect_var_.311; __vector signed char vect_var_.310; float * D.25060; __vector float * vect_p.309; unsigned int D.25057; long unsigned int base_off.307; long unsigned int D.25054; long unsigned int D.25055; float * batmp.306; float * D.25052; __vector float * vect_p.305; unsigned int D.25049; long unsigned int base_off.303; long unsigned int D.25046; long unsigned int D.25047; float * batmp.302; __vector float * vect_p.301; __vector float vect_var_.300; __vector float vect_var_.299; __vector float vect_var_.298; __vector float * ivtmp.297; float * D.25039; __vector float * vect_p.296; long unsigned int offset.294; unsigned int D.25035; long unsigned int base_off.293; long unsigned int D.25032; long unsigned int D.25033; float * batmp.292; __vector float * vect_p.291; __vector float vect_var_.290; __vector signed char vect_var_.289; float * D.25027; __vector float * vect_p.288; unsigned int D.25024; long unsigned int base_off.286; long unsigned int D.25021; long unsigned int D.25022; float * batmp.285; float * D.25019; __vector float * vect_p.284; unsigned int D.25016; long unsigned int base_off.282; long unsigned int D.25013; long unsigned int D.25014; float * batmp.281; __vector float * vect_p.280; __vector float vect_var_.279; __vector float vect_var_.278; __vector float * ivtmp.277; float * D.25007; __vector float * vect_p.276; unsigned int D.25004; long unsigned int base_off.274; float * D.25000; long unsigned int D.25001; long unsigned int D.25002; float * batmp.273; __vector float * vect_p.272; int D.24997; int tmp.271; unsigned int ratio_mult_vf.270; unsigned int bnd.269; int D.24989; unsigned int D.24990; unsigned int D.24991; unsigned int D.24992; unsigned int D.24993; unsigned int niters.268; unsigned int ivtmp.267; long unsigned int D.24981; long unsigned int D.24982; long unsigned int D.24983; long unsigned int D.24984; unsigned int D.24985; unsigned int D.24986; unsigned int prolog_loop_niters.266; __vector float * vect_p.265; float * D.24974; long unsigned int D.24975; long unsigned int D.24976; float * batmp.262; int D.24969; unsigned int D.24970; unsigned int D.24971; unsigned int D.24972; unsigned int niters.261; int it; int i; float D.22355; float D.22354; float D.22353; ARRTYPE * D.22352; float D.22351; ARRTYPE * D.22350; float D.22349; ARRTYPE * D.22348; long unsigned int D.22347; long unsigned int D.22346; int D.22340; void * D.22335; long unsigned int D.22334; long unsigned int D.22333; : D.22333_4 = (long unsigned int) m_nSamples_3(D); D.22334_5 = D.22333_4 * 4; D.22335_6 = malloc (D.22334_5); pVec1_7 = (ARRTYPE *) D.22335_6; malloc (D.22334_5); D.22340_9 = m_nSamples_3(D) + -5; if (D.22340_9 > 0) goto ; else goto ; : goto ; : : # ivtmp.329_210 = PHI # ivtmp.328_208 = PHI # ivtmp.318_193 = PHI # vect_var_.311_183 = PHI # ivtmp.297_162 = PHI # vect_var_.290_152 = PHI # ivtmp.277_132 = PHI # it_60 = PHI D.22346_20 = (long unsigned int) it_60; D.22347_21 = D.22346_20 * 4; D.22348_22 = pVec1_7 + D.22347_21; vect_var_.278_134 = *ivtmp.277_132; D.22349_23 = *D.22348_22; D.22350_27 = pSum_26(D) + D.22347_21; vect_var_.298_164 = A*ivtmp.297_162; vect_var_.299_165 = REALIGN_LOAD ; D.22351_28 = *D.22350_27; D.22352_32 = pSum1_31(D) + D.22347_21; vect_var_.319_195 = A*ivtmp.318_193; vect_var_.320_196 = REALIGN_LOAD ; D.22353_33 = *D.22352_32; vect_var_.321_197 = vect_var_.299_165 + vect_var_.278_134; D.22354_34 = D.22351_28 + D.22349_23; vect_var_.322_198 = vect_var_.321_197 + vect_var_.320_196; D.22355_35 = D.22354_34 + D.22353_33; *ivtmp.328_208 = vect_var_.322_198; it_36 = it_60 + 1; ivtmp.277_133 = ivtmp.277_132 + 16; ivtmp.297_163 = ivtmp.297_162 + 16; ivtmp.318_194 = ivtmp.318_193 + 16; ivtmp.328_209 = ivtmp.328_208 + 16; ivtmp.329_211 = ivtmp.329_210 + 1; if (ivtmp.329_211 < bnd.269_99) goto ; else goto ; : # it_117 = PHI D.24997_121 = (int) ratio_mult_vf.270_100; tmp.271_122 = it_86 + D.24997_121; if (niters.268_98 == ratio_mult_vf.270_100) goto ; else goto ; : # it_116 = PHI : # it_102 = PHI D.22346_103 = (long unsigned int) it_102; D.22347_104 = D.22346_103 * 4; D.22348_105 = pVec1_7 + D.22347_104; D.22349_106 = *D.22348_105; D.22350_107 = pSum_26(D) + D.22347_104; D.22351_108 = *D.22350_107; D.22352_109 = pSum1_31(D) + D.22347_104; D.22353_110 = *D.22352_109; D.22354_111 = D.22351_108 + D.22349_106; D.22355_112 = D.22354_111 + D.22353_110; *D.22348_105 = D.22355_112; it_114 = it_102 + 1; if (itEnd_16(D) > it_114) goto ; else goto ; : goto ; : : : : i_37 = i_24 + 1; if (D.22340_9 > i_37) goto ; else goto ; : : # i_24 = PHI if (itBegin_14(D) < itEnd_16(D)) goto ; else goto ; : D.24969_19 = ~itBegin_14(D); D.24970_1 = (unsigned int) D.24969_19; D.24971_38 = (unsigned int) itEnd_16(D); D.24972_8 = D.24970_1 + D.24971_38; niters.261_59 = D.24972_8 + 1; D.24974_2 = (float *) D.22335_6; D.24975_39 = (long unsigned int) itBegin_14(D); D.24976_25 = D.24975_39 * 4; batmp.262_30 = D.24974_2 + D.24976_25; vect_p.265_63 = (__vector float *) batmp.262_30; D.24981_64 = (long unsigned int) vect_p.265_63; D.24982_65 = D.24981_64 & 15; D.24983_66 = D.24982_65 >> 2; D.24984_67 = 4 - D.24983_66; D.24985_68 = (unsigned int) D.24984_67; D.24986_69 = D.24985_68 & 3; prolog_loop_niters.266_70 = MIN_EXPR ; if (prolog_loop_niters.266_70 == 0) goto ; else goto ; : # ivtmp.267_89 = PHI <0(16)> # it_215 = PHI : # ivtmp.267_91 = PHI # it_72 = PHI D.22346_73 = (long unsigned int) it_72; D.22347_74 = D.22346_73 * 4; D.22348_75 = pVec1_7 + D.22347_74; D.22349_76 = *D.22348_75; D.22350_77 = pSum_26(D) + D.22347_74; D.22351_78 = *D.22350_77; D.22352_79 = pSum1_31(D) + D.22347_74; D.22353_80 = *D.22352_79; D.22354_81 = D.22351_78 + D.22349_76; D.22355_82 = D.22354_81 + D.22353_80; *D.22348_75 = D.22355_82; it_84 = it_72 + 1; ivtmp.267_92 = ivtmp.267_91 + 1; if (ivtmp.267_92 < prolog_loop_niters.266_70) goto ; else goto ; : # it_87 = PHI if (niters.261_59 == prolog_loop_niters.266_70) goto ; else goto ; : # it_86 = PHI D.24989_93 = ~itBegin_14(D); D.24990_94 = (unsigned int) D.24989_93; D.24991_95 = (unsigned int) itEnd_16(D); D.24992_96 = D.24990_94 + D.24991_95; D.24993_97 = D.24992_96 - prolog_loop_niters.266_70; niters.268_98 = D.24993_97 + 1; bnd.269_99 = niters.268_98 >> 2; ratio_mult_vf.270_100 = bnd.269_99 << 2; if (ratio_mult_vf.270_100 <= 3) goto ; else goto ; : D.25000_123 = (float *) D.22335_6; D.25001_124 = (long unsigned int) itBegin_14(D); D.25002_125 = D.25001_124 * 4; batmp.273_126 = D.25000_123 + D.25002_125; D.25004_127 = prolog_loop_niters.266_70 * 4; base_off.274_128 = (long unsigned int) D.25004_127; D.25007_129 = batmp.273_126 + base_off.274_128; vect_p.276_130 = (__vector float *) D.25007_129; vect_p.272_131 = vect_p.276_130; D.25013_135 = (long unsigned int) itBegin_14(D); D.25014_136 = D.25013_135 * 4; batmp.281_137 = pSum_26(D) + D.25014_136; D.25016_138 = prolog_loop_niters.266_70 * 4; base_off.282_139 = (long unsigned int) D.25016_138; D.25019_140 = batmp.281_137 + base_off.282_139; vect_p.284_141 = (__vector float *) D.25019_140; vect_p.280_142 = vect_p.284_141; vect_var_.279_143 = A*vect_p.280_142; D.25021_144 = (long unsigned int) itBegin_14(D); D.25022_145 = D.25021_144 * 4; batmp.285_146 = pSum_26(D) + D.25022_145; D.25024_147 = prolog_loop_niters.266_70 * 4; base_off.286_148 = (long unsigned int) D.25024_147; D.25027_149 = batmp.285_146 + base_off.286_148; vect_p.288_150 = (__vector float *) D.25027_149; vect_var_.289_151 = __builtin_altivec_mask_for_load (vect_p.288_150); D.25032_153 = (long unsigned int) itBegin_14(D); D.25033_154 = D.25032_153 * 4; batmp.292_155 = pSum_26(D) + D.25033_154; D.25035_156 = prolog_loop_niters.266_70 * 4; base_off.293_157 = (long unsigned int) D.25035_156; offset.294_158 = base_off.293_157 + 12; D.25039_159 = batmp.292_155 + offset.294_158; vect_p.296_160 = (__vector float *) D.25039_159; vect_p.291_161 = vect_p.296_160; D.25046_166 = (long unsigned int) itBegin_14(D); D.25047_167 = D.25046_166 * 4; batmp.302_168 = pSum1_31(D) + D.25047_167; D.25049_169 = prolog_loop_niters.266_70 * 4; base_off.303_170 = (long unsigned int) D.25049_169; D.25052_171 = batmp.302_168 + base_off.303_170; vect_p.305_172 = (__vector float *) D.25052_171; vect_p.301_173 = vect_p.305_172; vect_var_.300_174 = A*vect_p.301_173; D.25054_175 = (long unsigned int) itBegin_14(D); D.25055_176 = D.25054_175 * 4; batmp.306_177 = pSum1_31(D) + D.25055_176; D.25057_178 = prolog_loop_niters.266_70 * 4; base_off.307_179 = (long unsigned int) D.25057_178; D.25060_180 = batmp.306_177 + base_off.307_179; vect_p.309_181 = (__vector float *) D.25060_180; vect_var_.310_182 = __builtin_altivec_mask_for_load (vect_p.309_181); D.25065_184 = (long unsigned int) itBegin_14(D); D.25066_185 = D.25065_184 * 4; batmp.313_186 = pSum1_31(D) + D.25066_185; D.25068_187 = prolog_loop_niters.266_70 * 4; base_off.314_188 = (long unsigned int) D.25068_187; offset.315_189 = base_off.314_188 + 12; D.25072_190 = batmp.313_186 + offset.315_189; vect_p.317_191 = (__vector float *) D.25072_190; vect_p.312_192 = vect_p.317_191; D.25082_199 = (float *) D.22335_6; D.25083_200 = (long unsigned int) itBegin_14(D); D.25084_201 = D.25083_200 * 4; batmp.324_202 = D.25082_199 + D.25084_201; D.25086_203 = prolog_loop_niters.266_70 * 4; base_off.325_204 = (long unsigned int) D.25086_203; D.25089_205 = batmp.324_202 + base_off.325_204; vect_p.327_206 = (__vector float *) D.25089_205; vect_p.323_207 = vect_p.327_206; goto ; : goto ; : return; } ;; Function int main(int, char**) (main) (get_loop_exit_condition if (ivtmp.553_471 != 0)) (number_of_iterations_in_loop (analyze_scalar_evolution (loop_nb = 3) (scalar = ivtmp.553_471) (get_scalar_evolution (scalar = ivtmp.553_471) (scalar_evolution = )) (analyze_scalar_evolution (loop_nb = 3) (scalar = ivtmp.553_1) (get_scalar_evolution (scalar = ivtmp.553_1) (scalar_evolution = )) (analyze_initial_condition (loop_phi_node = ivtmp.553_1 = PHI <256(21), ivtmp.553_471(23)>) (init_cond = 256)) (analyze_evolution_in_loop (loop_phi_node = ivtmp.553_1 = PHI <256(21), ivtmp.553_471(23)>) (add_to_evolution (loop_nb = 3) (chrec_before = 256) (to_add = 1) (res = {256, +, 0x0ffffffffffffffff}_3)) (evolution_function = {256, +, 0x0ffffffffffffffff}_3)) (set_scalar_evolution (scalar = ivtmp.553_1) (scalar_evolution = {256, +, 0x0ffffffffffffffff}_3)) ) (analyze_scalar_evolution (loop_nb = 3) (scalar = 1) (get_scalar_evolution (scalar = 1) (scalar_evolution = 1)) ) (set_scalar_evolution (scalar = ivtmp.553_471) (scalar_evolution = {255, +, 0x0ffffffffffffffff}_3)) ) (analyze_scalar_evolution (loop_nb = 3) (scalar = 0) (get_scalar_evolution (scalar = 0) (scalar_evolution = 0)) ) Analyzing # of iterations of loop 3 exit condition [255, + , 0x0ffffffffffffffff] != 0 bounds on difference of bases: -255 ... -255 result: # of iterations 255, bounded by 255 (set_nb_iterations_in_loop = 255)) (get_loop_exit_condition if (ivtmp.553_471 != 0)) Creating dr for __tmp[__i_473] analyze_innermost: (analyze_scalar_evolution (loop_nb = 3) (scalar = &__tmp) (get_scalar_evolution (scalar = &__tmp) (scalar_evolution = )) ) (analyze_scalar_evolution (loop_nb = 3) (scalar = (long unsigned int) __i_473) (get_scalar_evolution (scalar = (long unsigned int) __i_473) (scalar_evolution = )) (analyze_scalar_evolution (loop_nb = 3) (scalar = __i_473) (get_scalar_evolution (scalar = __i_473) (scalar_evolution = )) (analyze_initial_condition (loop_phi_node = __i_473 = PHI <0(21), __i_139(23)>) (init_cond = 0)) (analyze_evolution_in_loop (loop_phi_node = __i_473 = PHI <0(21), __i_139(23)>) (add_to_evolution (loop_nb = 3) (chrec_before = 0) (to_add = 1) (res = {0, +, 1}_3)) (evolution_function = {0, +, 1}_3)) (set_scalar_evolution (scalar = __i_473) (scalar_evolution = {0, +, 1}_3)) ) ) success. (analyze_scalar_evolution (loop_nb = 3) (scalar = __i_473) (get_scalar_evolution (scalar = __i_473) (scalar_evolution = {0, +, 1}_3)) (set_scalar_evolution (scalar = __i_473) (scalar_evolution = {0, +, 1}_3)) ) base_address: &__tmp offset from base address: 0 constant offset from base address: 0 step: 1 aligned to: 128 base_object: __tmp[0] symbol tag: __tmp (analyze_scalar_evolution (loop_nb = 3) (scalar = ivtmp.553_1) (get_scalar_evolution (scalar = ivtmp.553_1) (scalar_evolution = {256, +, 0x0ffffffffffffffff}_3)) (set_scalar_evolution (scalar = ivtmp.553_1) (scalar_evolution = {256, +, 0x0ffffffffffffffff}_3)) ) (analyze_scalar_evolution (loop_nb = 3) (scalar = __i_473) (get_scalar_evolution (scalar = __i_473) (scalar_evolution = {0, +, 1}_3)) (set_scalar_evolution (scalar = __i_473) (scalar_evolution = {0, +, 1}_3)) ) /usr/local/gcc43/lib/gcc/powerpc64-unknown-linux-gnu/4.3.0/../../../../include/c++/4.3.0/bits/locale_facets.h:1168: note: === vect_analyze_slp === /usr/local/gcc43/lib/gcc/powerpc64-unknown-linux-gnu/4.3.0/../../../../include/c++/4.3.0/bits/locale_facets.h:1168: note: === vect_make_slp_decision === /usr/local/gcc43/lib/gcc/powerpc64-unknown-linux-gnu/4.3.0/../../../../include/c++/4.3.0/bits/locale_facets.h:1168: note: === vect_detect_hybrid_slp ===(analyze_scalar_evolution (loop_nb = 3) (scalar = ivtmp.553_1) (get_scalar_evolution (scalar = ivtmp.553_1) (scalar_evolution = {256, +, 0x0ffffffffffffffff}_3)) (set_scalar_evolution (scalar = ivtmp.553_1) (scalar_evolution = {256, +, 0x0ffffffffffffffff}_3)) ) (instantiate_parameters (loop_nb = 3) (chrec = {256, +, 0x0ffffffffffffffff}_3) (res = {256, +, 0x0ffffffffffffffff}_3)) (analyze_scalar_evolution (loop_nb = 3) (scalar = __i_473) (get_scalar_evolution (scalar = __i_473) (scalar_evolution = {0, +, 1}_3)) (set_scalar_evolution (scalar = __i_473) (scalar_evolution = {0, +, 1}_3)) ) (instantiate_parameters (loop_nb = 3) (chrec = {0, +, 1}_3) (res = {0, +, 1}_3)) (get_loop_exit_condition if (ivtmp.553_471 != 0)) /usr/local/gcc43/lib/gcc/powerpc64-unknown-linux-gnu/4.3.0/../../../../include/c++/4.3.0/bits/locale_facets.h:1168: note: not vectorized: relevant stmt not supported: D.25590_137 = (char) __i_473(get_loop_exit_condition if (ivtmp.554_138 != 0)) (number_of_iterations_in_loop (analyze_scalar_evolution (loop_nb = 2) (scalar = ivtmp.554_138) (get_scalar_evolution (scalar = ivtmp.554_138) (scalar_evolution = )) (analyze_scalar_evolution (loop_nb = 2) (scalar = ivtmp.554_469) (get_scalar_evolution (scalar = ivtmp.554_469) (scalar_evolution = )) (analyze_initial_condition (loop_phi_node = ivtmp.554_469 = PHI <256(11), ivtmp.554_138(13)>) (init_cond = 256)) (analyze_evolution_in_loop (loop_phi_node = ivtmp.554_469 = PHI <256(11), ivtmp.554_138(13)>) (add_to_evolution (loop_nb = 2) (chrec_before = 256) (to_add = 1) (res = {256, +, 0x0ffffffffffffffff}_2)) (evolution_function = {256, +, 0x0ffffffffffffffff}_2)) (set_scalar_evolution (scalar = ivtmp.554_469) (scalar_evolution = {256, +, 0x0ffffffffffffffff}_2)) ) (analyze_scalar_evolution (loop_nb = 2) (scalar = 1) (get_scalar_evolution (scalar = 1) (scalar_evolution = 1)) ) (set_scalar_evolution (scalar = ivtmp.554_138) (scalar_evolution = {255, +, 0x0ffffffffffffffff}_2)) ) (analyze_scalar_evolution (loop_nb = 2) (scalar = 0) (get_scalar_evolution (scalar = 0) (scalar_evolution = 0)) ) Analyzing # of iterations of loop 2 exit condition [255, + , 0x0ffffffffffffffff] != 0 bounds on difference of bases: -255 ... -255 result: # of iterations 255, bounded by 255 (set_nb_iterations_in_loop = 255)) (get_loop_exit_condition if (ivtmp.554_138 != 0)) Creating dr for __tmp[__i_477] analyze_innermost: (analyze_scalar_evolution (loop_nb = 2) (scalar = &__tmp) (get_scalar_evolution (scalar = &__tmp) (scalar_evolution = )) ) (analyze_scalar_evolution (loop_nb = 2) (scalar = (long unsigned int) __i_477) (get_scalar_evolution (scalar = (long unsigned int) __i_477) (scalar_evolution = )) (analyze_scalar_evolution (loop_nb = 2) (scalar = __i_477) (get_scalar_evolution (scalar = __i_477) (scalar_evolution = )) (analyze_initial_condition (loop_phi_node = __i_477 = PHI <0(11), __i_96(13)>) (init_cond = 0)) (analyze_evolution_in_loop (loop_phi_node = __i_477 = PHI <0(11), __i_96(13)>) (add_to_evolution (loop_nb = 2) (chrec_before = 0) (to_add = 1) (res = {0, +, 1}_2)) (evolution_function = {0, +, 1}_2)) (set_scalar_evolution (scalar = __i_477) (scalar_evolution = {0, +, 1}_2)) ) ) success. (analyze_scalar_evolution (loop_nb = 2) (scalar = __i_477) (get_scalar_evolution (scalar = __i_477) (scalar_evolution = {0, +, 1}_2)) (set_scalar_evolution (scalar = __i_477) (scalar_evolution = {0, +, 1}_2)) ) base_address: &__tmp offset from base address: 0 constant offset from base address: 0 step: 1 aligned to: 128 base_object: __tmp[0] symbol tag: __tmp (analyze_scalar_evolution (loop_nb = 2) (scalar = ivtmp.554_469) (get_scalar_evolution (scalar = ivtmp.554_469) (scalar_evolution = {256, +, 0x0ffffffffffffffff}_2)) (set_scalar_evolution (scalar = ivtmp.554_469) (scalar_evolution = {256, +, 0x0ffffffffffffffff}_2)) ) (analyze_scalar_evolution (loop_nb = 2) (scalar = __i_477) (get_scalar_evolution (scalar = __i_477) (scalar_evolution = {0, +, 1}_2)) (set_scalar_evolution (scalar = __i_477) (scalar_evolution = {0, +, 1}_2)) ) /usr/local/gcc43/lib/gcc/powerpc64-unknown-linux-gnu/4.3.0/../../../../include/c++/4.3.0/bits/locale_facets.h:1168: note: === vect_analyze_slp === /usr/local/gcc43/lib/gcc/powerpc64-unknown-linux-gnu/4.3.0/../../../../include/c++/4.3.0/bits/locale_facets.h:1168: note: === vect_make_slp_decision === /usr/local/gcc43/lib/gcc/powerpc64-unknown-linux-gnu/4.3.0/../../../../include/c++/4.3.0/bits/locale_facets.h:1168: note: === vect_detect_hybrid_slp ===(analyze_scalar_evolution (loop_nb = 2) (scalar = ivtmp.554_469) (get_scalar_evolution (scalar = ivtmp.554_469) (scalar_evolution = {256, +, 0x0ffffffffffffffff}_2)) (set_scalar_evolution (scalar = ivtmp.554_469) (scalar_evolution = {256, +, 0x0ffffffffffffffff}_2)) ) (instantiate_parameters (loop_nb = 2) (chrec = {256, +, 0x0ffffffffffffffff}_2) (res = {256, +, 0x0ffffffffffffffff}_2)) (analyze_scalar_evolution (loop_nb = 2) (scalar = __i_477) (get_scalar_evolution (scalar = __i_477) (scalar_evolution = {0, +, 1}_2)) (set_scalar_evolution (scalar = __i_477) (scalar_evolution = {0, +, 1}_2)) ) (instantiate_parameters (loop_nb = 2) (chrec = {0, +, 1}_2) (res = {0, +, 1}_2)) (get_loop_exit_condition if (ivtmp.554_138 != 0)) /usr/local/gcc43/lib/gcc/powerpc64-unknown-linux-gnu/4.3.0/../../../../include/c++/4.3.0/bits/locale_facets.h:1168: note: not vectorized: relevant stmt not supported: D.25541_94 = (char) __i_477(get_loop_exit_condition if (it_28 < m_nSamples_45)) (number_of_iterations_in_loop (analyze_scalar_evolution (loop_nb = 1) (scalar = it_28) (get_scalar_evolution (scalar = it_28) (scalar_evolution = )) (analyze_scalar_evolution (loop_nb = 1) (scalar = it_476) (get_scalar_evolution (scalar = it_476) (scalar_evolution = )) (analyze_initial_condition (loop_phi_node = it_476 = PHI ) (init_cond = 0)) (analyze_evolution_in_loop (loop_phi_node = it_476 = PHI ) (add_to_evolution (loop_nb = 1) (chrec_before = 0) (to_add = 1) (res = {0, +, 1}_1)) (evolution_function = {0, +, 1}_1)) (set_scalar_evolution (scalar = it_476) (scalar_evolution = {0, +, 1}_1)) ) (analyze_scalar_evolution (loop_nb = 1) (scalar = 1) (get_scalar_evolution (scalar = 1) (scalar_evolution = 1)) ) (set_scalar_evolution (scalar = it_28) (scalar_evolution = {1, +, 1}_1)) ) (analyze_scalar_evolution (loop_nb = 1) (scalar = m_nSamples_45) (get_scalar_evolution (scalar = m_nSamples_45) (scalar_evolution = )) ) Analyzing # of iterations of loop 1 exit condition [1, + , 1](no_overflow) < (int) D.24890_44 bounds on difference of bases: 0 ... 2147483646 result: # of iterations (unsigned int) D.24890_44 + 4294967295, bounded by 2147483646 (instantiate_parameters (loop_nb = 1) (chrec = (unsigned int) D.24890_44 + 4294967295) (analyze_scalar_evolution (loop_nb = 0) (scalar = D.24890_44) (get_scalar_evolution (scalar = D.24890_44) (scalar_evolution = )) (set_scalar_evolution (scalar = D.24890_44) (scalar_evolution = D.24890_44)) ) (res = (unsigned int) D.24890_44 + 4294967295)) (set_nb_iterations_in_loop = (unsigned int) D.24890_44 + 4294967295)) (get_loop_exit_condition if (it_28 < m_nSamples_45)) Creating dr for *D.22306_22 analyze_innermost: (analyze_scalar_evolution (loop_nb = 1) (scalar = (float *) D.22306_22) (get_scalar_evolution (scalar = (float *) D.22306_22) (scalar_evolution = )) (analyze_scalar_evolution (loop_nb = 1) (scalar = D.22306_22) (get_scalar_evolution (scalar = D.22306_22) (scalar_evolution = )) (analyze_scalar_evolution (loop_nb = 1) (scalar = pSum_18) (get_scalar_evolution (scalar = pSum_18) (scalar_evolution = )) ) (analyze_scalar_evolution (loop_nb = 1) (scalar = D.22305_21) (get_scalar_evolution (scalar = D.22305_21) (scalar_evolution = )) (analyze_scalar_evolution (loop_nb = 1) (scalar = D.22304_20) (get_scalar_evolution (scalar = D.22304_20) (scalar_evolution = )) (analyze_scalar_evolution (loop_nb = 1) (scalar = it_476) (get_scalar_evolution (scalar = it_476) (scalar_evolution = {0, +, 1}_1)) (set_scalar_evolution (scalar = it_476) (scalar_evolution = {0, +, 1}_1)) ) (set_scalar_evolution (scalar = D.22304_20) (scalar_evolution = {0, +, 1}_1)) ) (analyze_scalar_evolution (loop_nb = 1) (scalar = 4) (get_scalar_evolution (scalar = 4) (scalar_evolution = 4)) ) (set_scalar_evolution (scalar = D.22305_21) (scalar_evolution = {0, +, 4}_1)) ) (set_scalar_evolution (scalar = D.22306_22) (scalar_evolution = {pSum_18, +, 4}_1)) ) ) success. (analyze_scalar_evolution (loop_nb = 1) (scalar = D.22306_22) (get_scalar_evolution (scalar = D.22306_22) (scalar_evolution = {pSum_18, +, 4}_1)) (set_scalar_evolution (scalar = D.22306_22) (scalar_evolution = {pSum_18, +, 4}_1)) ) base_address: D.22299_17 offset from base address: 0 constant offset from base address: 0 step: 4 aligned to: 128 base_object: *(ARRTYPE *) D.22299_17 symbol tag: SMT.506 Creating dr for *D.22309_27 analyze_innermost: (analyze_scalar_evolution (loop_nb = 1) (scalar = (float *) D.22309_27) (get_scalar_evolution (scalar = (float *) D.22309_27) (scalar_evolution = )) (analyze_scalar_evolution (loop_nb = 1) (scalar = D.22309_27) (get_scalar_evolution (scalar = D.22309_27) (scalar_evolution = )) (analyze_scalar_evolution (loop_nb = 1) (scalar = pSum1_16) (get_scalar_evolution (scalar = pSum1_16) (scalar_evolution = )) ) (analyze_scalar_evolution (loop_nb = 1) (scalar = D.22305_21) (get_scalar_evolution (scalar = D.22305_21) (scalar_evolution = {0, +, 4}_1)) (set_scalar_evolution (scalar = D.22305_21) (scalar_evolution = {0, +, 4}_1)) ) (set_scalar_evolution (scalar = D.22309_27) (scalar_evolution = {pSum1_16, +, 4}_1)) ) ) success. (analyze_scalar_evolution (loop_nb = 1) (scalar = D.22309_27) (get_scalar_evolution (scalar = D.22309_27) (scalar_evolution = {pSum1_16, +, 4}_1)) (set_scalar_evolution (scalar = D.22309_27) (scalar_evolution = {pSum1_16, +, 4}_1)) ) base_address: D.22298_15 offset from base address: 0 constant offset from base address: 0 step: 4 aligned to: 128 base_object: *(ARRTYPE *) D.22298_15 symbol tag: SMT.506 (compute_affine_dependence (stmt_a = *D.22306_22 = D.22308_24) (stmt_b = *D.22309_27 = D.22312_30) ) (analyze_scalar_evolution (loop_nb = 1) (scalar = it_476) (get_scalar_evolution (scalar = it_476) (scalar_evolution = {0, +, 1}_1)) (set_scalar_evolution (scalar = it_476) (scalar_evolution = {0, +, 1}_1)) ) Eyal.cpp:17: note: versioning for alias required: can't determine dependence between *D.22306_22 and *D.22309_27 Eyal.cpp:17: note: mark for run-time aliasing test between *D.22306_22 and *D.22309_27 Eyal.cpp:17: note: === vect_analyze_slp === Eyal.cpp:17: note: === vect_make_slp_decision === Eyal.cpp:17: note: === vect_detect_hybrid_slp === Eyal.cpp:17: note: Alignment of access forced using versioning. Eyal.cpp:17: note: Alignment of access forced using versioning. Eyal.cpp:17: note: not vectorized: relevant stmt not supported: D.22307_23 = it_476 / itBegin_47 Eyal.cpp:9: note: vectorized 0 loops in function. int main(int, char**) (argc, argv) { size_t ivtmp.554; size_t ivtmp.553; const struct ctype & D.25583; const struct ctype & D.25583; char D.25590; int (*__vtbl_ptr_type) (void) * D.25591; int (*__vtbl_ptr_type) (void) * D.25592; int (*__vtbl_ptr_type) (void) D.25593; char * D.25594; int D.25595; char __tmp[256]; size_t __i; int (*__vtbl_ptr_type) (void) D.25598; int (*__vtbl_ptr_type) (void) * D.25597; int (*__vtbl_ptr_type) (void) * D.25596; char D.25588; char D.25587; char D.25587; struct basic_ios * __os.63; int (*__vtbl_ptr_type) (void) * D.25573; int (*__vtbl_ptr_type) (void) * D.25574; long int * D.25575; long int D.25576; long unsigned int D.25577; struct basic_ios * D.25578; struct basic_ostream & D.25580; const struct ctype & D.25534; const struct ctype & D.25534; char D.25541; int (*__vtbl_ptr_type) (void) * D.25542; int (*__vtbl_ptr_type) (void) * D.25543; int (*__vtbl_ptr_type) (void) D.25544; char * D.25545; int D.25546; char __tmp[256]; size_t __i; int (*__vtbl_ptr_type) (void) D.25549; int (*__vtbl_ptr_type) (void) * D.25548; int (*__vtbl_ptr_type) (void) * D.25547; char D.25539; char D.25538; char D.25538; struct basic_ios * __os.63; int (*__vtbl_ptr_type) (void) * D.25524; int (*__vtbl_ptr_type) (void) * D.25525; long int * D.25526; long int D.25527; long unsigned int D.25528; struct basic_ios * D.25529; struct basic_ostream & D.25531; double D.24917; struct basic_ostream & D.24916; struct basic_ostream & D.24916; double D.24908; struct basic_ostream & D.24907; struct basic_ostream & D.24907; long int D.24898; long int D.24898; long int D.24894; long int D.24894; long int D.24890; long int D.24890; int it; ARRTYPE * pSum; ARRTYPE * pSum1; int itEnd; int itBegin; int m_nSamples; float D.22319; float D.22315; float D.22312; int D.22311; ARRTYPE * D.22309; float D.22308; int D.22307; ARRTYPE * D.22306; long unsigned int D.22305; long unsigned int D.22304; void * D.22299; void * D.22298; char * D.22297; char * * D.22296; char * D.22295; char * * D.22294; char * D.22293; char * * D.22292; char * D.22291; char * * D.22290; : D.22290_3 = argv_2(D) + 8; D.22291_4 = *D.22290_3; D.24890_44 = __strtol_internal (D.22291_4, 0B, 10, 0); m_nSamples_45 = (int) D.24890_44; D.22292_6 = argv_2(D) + 16; D.22293_7 = *D.22292_6; D.24894_46 = __strtol_internal (D.22293_7, 0B, 10, 0); itBegin_47 = (int) D.24894_46; D.22294_9 = argv_2(D) + 24; D.22295_10 = *D.22294_9; D.24898_48 = __strtol_internal (D.22295_10, 0B, 10, 0); D.22296_12 = argv_2(D) + 32; D.22297_13 = *D.22296_12; __strtol_internal (D.22297_13, 0B, 10, 0); D.22298_15 = operator new [] (400000); pSum1_16 = (ARRTYPE *) D.22298_15; D.22299_17 = operator new [] (400000); pSum_18 = (ARRTYPE *) D.22299_17; if (m_nSamples_45 > 0) goto ; else goto ; : : # it_476 = PHI D.22304_20 = (long unsigned int) it_476; D.22305_21 = D.22304_20 * 4; D.22306_22 = pSum_18 + D.22305_21; D.22307_23 = it_476 / itBegin_47; D.22308_24 = (float) D.22307_23; *D.22306_22 = D.22308_24; D.22309_27 = pSum1_16 + D.22305_21; it_28 = it_476 + 1; D.22311_29 = itBegin_47 / it_28; D.22312_30 = (float) D.22311_29; *D.22309_27 = D.22312_30; if (it_28 < m_nSamples_45) goto ; else goto ; : goto ; : : itEnd_49 = (int) D.24898_48; Calc (pSum_18, pSum1_16, 0B, 0B, m_nSamples_45, itBegin_47, itEnd_49); __ostream_insert (&cout, &"pVec1[10] = "[0], 13); D.22315_36 ={v} *40B; D.24908_52 = (double) D.22315_36; D.24907_53 = _M_insert (&cout, D.24908_52); __os.63_80 = (struct basic_ios *) D.24907_53; D.25524_81 = D.24907_53->_vptr.basic_ostream; D.25525_82 = D.25524_81 + -24; D.25526_83 = (long int *) D.25525_82; D.25527_84 = *D.25526_83; D.25528_85 = (long unsigned int) D.25527_84; D.25529_86 = __os.63_80 + D.25528_85; D.25534_90 = D.25529_86->_M_ctype; if (D.25534_90 == 0B) goto ; else goto ; : __throw_bad_cast (); : D.25539_91 = D.25534_90->_M_widen_ok; if (D.25539_91 != 0) goto ; else goto ; : D.25538_93 = D.25534_90->_M_widen[10]; goto ; : : # ivtmp.554_469 = PHI <256(11), ivtmp.554_138(13)> # __i_477 = PHI <0(11), __i_96(13)> D.25541_94 = (char) __i_477; __tmp[__i_477] = D.25541_94; __i_96 = __i_477 + 1; ivtmp.554_138 = ivtmp.554_469 - 1; if (ivtmp.554_138 != 0) goto ; else goto ; : goto ; : D.25542_97 = D.25534_90->D.15856._vptr.facet; D.25543_98 = D.25542_97 + 56; D.25544_99 = *D.25543_98; D.25545_100 = &D.25534_90->_M_widen[0]; OBJ_TYPE_REF(D.25544_99;D.25534_90->7) (D.25534_90, &__tmp[0], &__tmp[256], D.25545_100); D.25534_90->_M_widen_ok = 1; D.25546_102 = __builtin_memcmp (&__tmp[0], D.25545_100, 256); if (D.25546_102 != 0) goto ; else goto ; : D.25534_90->_M_widen_ok = 2; : D.25547_103 = D.25534_90->D.15856._vptr.facet; D.25548_104 = D.25547_103 + 48; D.25549_105 = *D.25548_104; D.25538_106 = OBJ_TYPE_REF(D.25549_105;D.25534_90->6) (D.25534_90, 10); : # D.25538_107 = PHI D.25531_88 = put (D.24907_53, D.25538_107); flush (D.25531_88); __ostream_insert (&cout, &"pVec1[102] = "[0], 14); D.22319_40 ={v} *408B; D.24917_55 = (double) D.22319_40; D.24916_56 = _M_insert (&cout, D.24917_55); __os.63_123 = (struct basic_ios *) D.24916_56; D.25573_124 = D.24916_56->_vptr.basic_ostream; D.25574_125 = D.25573_124 + -24; D.25575_126 = (long int *) D.25574_125; D.25576_127 = *D.25575_126; D.25577_128 = (long unsigned int) D.25576_127; D.25578_129 = __os.63_123 + D.25577_128; D.25583_133 = D.25578_129->_M_ctype; if (D.25583_133 == 0B) goto ; else goto ; : __throw_bad_cast (); : D.25588_134 = D.25583_133->_M_widen_ok; if (D.25588_134 != 0) goto ; else goto ; : D.25587_136 = D.25583_133->_M_widen[10]; goto ; : : # ivtmp.553_1 = PHI <256(21), ivtmp.553_471(23)> # __i_473 = PHI <0(21), __i_139(23)> D.25590_137 = (char) __i_473; __tmp[__i_473] = D.25590_137; __i_139 = __i_473 + 1; ivtmp.553_471 = ivtmp.553_1 - 1; if (ivtmp.553_471 != 0) goto ; else goto ; : goto ; : D.25591_140 = D.25583_133->D.15856._vptr.facet; D.25592_141 = D.25591_140 + 56; D.25593_142 = *D.25592_141; D.25594_143 = &D.25583_133->_M_widen[0]; OBJ_TYPE_REF(D.25593_142;D.25583_133->7) (D.25583_133, &__tmp[0], &__tmp[256], D.25594_143); D.25583_133->_M_widen_ok = 1; D.25595_145 = __builtin_memcmp (&__tmp[0], D.25594_143, 256); if (D.25595_145 != 0) goto ; else goto ; : D.25583_133->_M_widen_ok = 2; : D.25596_146 = D.25583_133->D.15856._vptr.facet; D.25597_147 = D.25596_146 + 48; D.25598_148 = *D.25597_147; D.25587_149 = OBJ_TYPE_REF(D.25598_148;D.25583_133->6) (D.25583_133, 10); : # D.25587_150 = PHI D.25580_131 = put (D.24916_56, D.25587_150); flush (D.25580_131); free (0B); free (0B); return 0; } -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35117