Index: testsuite/ChangeLog.autovect =================================================================== --- testsuite/ChangeLog.autovect (revision 127057) +++ testsuite/ChangeLog.autovect (working copy) @@ -1,3 +1,12 @@ +2007-07-31 Ira Rosen + + * gcc.dg/vect/slp-16.c: Now SLPable. + * gcc.dg/vect/slp-1.c: Add new test cases for different group sizes. + * gcc.dg/vect/slp-2.c, gcc.dg/vect/slp-3.c: Likewise. + * gcc.dg/vect/slp-19.c: Change the group size. + * gcc.dg/vect/slp-33.c: New. + * gcc.dg/vect/slp-34.c, gcc.dg/vect/slp-35.c: Likewise. + 2007-07-03 Dorit Nuzman * gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: Loops now Index: testsuite/gcc.dg/vect/slp-33.c =================================================================== --- testsuite/gcc.dg/vect/slp-33.c (revision 0) +++ testsuite/gcc.dg/vect/slp-33.c (revision 0) @@ -0,0 +1,112 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include +#include "tree-vect.h" + +#define N 8 + +int +main1 () +{ + int i; + unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7; + unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; + float out2[N*8]; + + /* SLP with unrolling by 4. */ + for (i = 0; i < N; i++) + { + a0 = in[i*7] + 5; + a1 = in[i*7 + 1] + 6; + a2 = in[i*7 + 2] + 7; + a3 = in[i*7 + 3] + 8; + a4 = in[i*7 + 4] + 9; + a5 = in[i*7 + 5] + 10; + a6 = in[i*7 + 6] + 11; + + b0 = a0 * 3; + b1 = a1 * 2; + b2 = a2 * 12; + b3 = a3 * 5; + b4 = a4 * 8; + b5 = a5 * 4; + b6 = a6 * 3; + + out[i*7] = b0 - 2; + out[i*7 + 1] = b1 - 3; + out[i*7 + 2] = b2 - 2; + out[i*7 + 3] = b3 - 1; + out[i*7 + 4] = b4 - 8; + out[i*7 + 5] = b5 - 7; + out[i*7 + 6] = b6 - 3; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (out[i*7] != (in[i*7] + 5) * 3 - 2 + || out[i*7 + 1] != (in[i*7 + 1] + 6) * 2 - 3 + || out[i*7 + 2] != (in[i*7 + 2] + 7) * 12 - 2 + || out[i*7 + 3] != (in[i*7 + 3] + 8) * 5 - 1 + || out[i*7 + 4] != (in[i*7 + 4] + 9) * 8 - 8 + || out[i*7 + 5] != (in[i*7 + 5] + 10) * 4 - 7 + || out[i*7 + 6] != (in[i*7 + 6] + 11) * 3 - 3) + abort (); + } + + /* SLP with unrolling by 4. */ + for (i = 0; i < N*2; i++) + { + out[i*3] = (in[i*3] + 2) * 3; + out[i*3 + 1] = (in[i*3 + 1] + 2) * 7; + out[i*3 + 2] = (in[i*3 + 2] + 7) * 3; + } + + /* check results: */ + for (i = 0; i < N*2; i++) + { + if (out[i*3] != (in[i*3] + 2) * 3 + || out[i*3 + 1] != (in[i*3 + 1] + 2) * 7 + || out[i*3 + 2] != (in[i*3 + 2] + 7) * 3) + abort (); + } + + /* SLP with unrolling by 4. */ + for (i = 0; i < N*2; i++) + { + out2[i*3] = (float) (in[i*3] * 2 + 6) ; + out2[i*3 + 1] = (float) (in[i*3 + 1] * 3 + 7); + out2[i*3 + 2] = (float) (in[i*3 + 2] * 5 + 4); + } + + /* check results: */ + for (i = 0; i < N*2; i++) + { + if (out2[i*3] != (float) (in[i*3] * 2 + 6) + || out2[i*3 + 1] != (float) (in[i*3 + 1] * 3 + 7) + || out2[i*3 + 2] != (float) (in[i*3 + 2] * 5 + 4)) + abort (); + } + + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target {vect_intfloat_cvt && vect_int_mult} } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target {{! { vect_intfloat_cvt}} && vect_int_mult} } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target {{! { vect_intfloat_cvt}} && {!{vect_int_mult}}} } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target {vect_intfloat_cvt && vect_int_mult} } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target {{! { vect_intfloat_cvt}} && vect_int_mult} } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target {{! { vect_intfloat_cvt}} && {!{vect_int_mult}}} } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + Index: testsuite/gcc.dg/vect/slp-16.c =================================================================== --- testsuite/gcc.dg/vect/slp-16.c (revision 127057) +++ testsuite/gcc.dg/vect/slp-16.c (working copy) @@ -15,7 +15,8 @@ main1 () unsigned int in2[N*16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; unsigned int out2[N*16]; - /* SLP groups of size that is not a multiple of vector size is not supported yet. */ + /* SLP group of size that is not a multiple of vector size. + Unrolling by 2. */ for (i = 0; i < N; i++) { a0 = in[i*2] + 5; @@ -63,8 +64,7 @@ int main (void) return 0; } -/* Needs vect_mult_int. */ -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_int_mult } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/slp-34.c =================================================================== --- testsuite/gcc.dg/vect/slp-34.c (revision 0) +++ testsuite/gcc.dg/vect/slp-34.c (revision 0) @@ -0,0 +1,61 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include +#include "tree-vect.h" + +#define N 8 + +int +main1 () +{ + int i; + unsigned short out[N*8]; + unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; + unsigned short in2[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; + unsigned short out2[N*8]; + + /* SLP with unrolling by 8. */ + for (i = 0; i < N; i++) + { + out[i*3] = in[i*3] + 5; + out[i*3 + 1] = in[i*3 + 1] + 6; + out[i*3 + 2] = in[i*3 + 2] + 16; + + out2[i*5] = in2[i*5] + 2; + out2[i*5 + 1] = in2[i*5 + 1] + 2; + out2[i*5 + 2] = in2[i*5 + 2] + 1; + out2[i*5 + 3] = in2[i*5 + 3] + 3; + out2[i*5 + 4] = in2[i*5 + 4] + 13; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (out[i*3] != in[i*3] + 5 + || out[i*3 + 1] != in[i*3 + 1] + 6 + || out[i*3 + 2] != in[i*3 + 2] + 16 + || out2[i*5] != in2[i*5] + 2 + || out2[i*5 + 1] != in2[i*5 + 1] + 2 + || out2[i*5 + 2] != in2[i*5 + 2] + 1 + || out2[i*5 + 3] != in2[i*5 + 3] + 3 + || out2[i*5 + 4] != in2[i*5 + 4] + 13) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + Index: testsuite/gcc.dg/vect/slp-35.c =================================================================== --- testsuite/gcc.dg/vect/slp-35.c (revision 0) +++ testsuite/gcc.dg/vect/slp-35.c (revision 0) @@ -0,0 +1,73 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +#define N 128 + +typedef struct { + int a; + int b; + int c; + int d; + int e; +} s; + +int +main1 (s *arr) +{ + int i; + s *ptr = arr; + s res[N]; + + /* SLP with unrolling by 4. */ + for (i = 0; i < N; i++) + { + res[i].c = ptr->c + ptr->c; + res[i].a = ptr->a + ptr->a; + res[i].d = ptr->d + ptr->d; + res[i].b = ptr->b + ptr->b; + res[i].e = ptr->e + ptr->e; + ptr++; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (res[i].c != arr[i].c + arr[i].c + || res[i].a != arr[i].a + arr[i].a + || res[i].d != arr[i].d + arr[i].d + || res[i].b != arr[i].b + arr[i].b + || res[i].e != arr[i].e + arr[i].e) + abort(); + } + +} + +int main (void) +{ + int i; + s arr[N]; + + check_vect (); + + for (i = 0; i < N; i++) + { + arr[i].a = i; + arr[i].b = i * 2; + arr[i].c = 17; + arr[i].d = i+34; + arr[i].e = i * 3 + 5; + if (arr[i].a == 178) + abort(); + } + + main1 (arr); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + Index: testsuite/gcc.dg/vect/slp-1.c =================================================================== --- testsuite/gcc.dg/vect/slp-1.c (revision 127057) +++ testsuite/gcc.dg/vect/slp-1.c (working copy) @@ -56,7 +56,55 @@ main1 () abort (); } + /* SLP with unrolling by 8. */ + for (i = 0; i < N; i++) + { + out[i*5] = 8; + out[i*5 + 1] = 7; + out[i*5 + 2] = 81; + out[i*5 + 3] = 28; + out[i*5 + 4] = 18; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (out[i*5] != 8 + || out[i*5 + 1] != 7 + || out[i*5 + 2] != 81 + || out[i*5 + 3] != 28 + || out[i*5 + 4] != 18) + abort (); + } + /* SLP with unrolling by 8. */ + for (i = 0; i < N/2; i++) + { + out[i*9] = 8; + out[i*9 + 1] = 7; + out[i*9 + 2] = 81; + out[i*9 + 3] = 28; + out[i*9 + 4] = 18; + out[i*9 + 5] = 85; + out[i*9 + 6] = 5; + out[i*9 + 7] = 4; + out[i*9 + 8] = 14; + } + + /* check results: */ + for (i = 0; i < N/2; i++) + { + if (out[i*9] != 8 + || out[i*9 + 1] != 7 + || out[i*9 + 2] != 81 + || out[i*9 + 3] != 28 + || out[i*9 + 4] != 18 + || out[i*9 + 5] != 85 + || out[i*9 + 6] != 5 + || out[i*9 + 7] != 4 + || out[i*9 + 8] != 14) + abort (); + } return 0; } @@ -70,7 +118,7 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/slp-19.c =================================================================== --- testsuite/gcc.dg/vect/slp-19.c (revision 127057) +++ testsuite/gcc.dg/vect/slp-19.c (working copy) @@ -108,21 +108,15 @@ main1 () abort (); } - /* Hybrid SLP. */ - for (i = 0; i < N/2; i++) + /* Hybrid SLP with unrolling by 2. */ + for (i = 0; i < N; i++) { - out[i*12] = in[i*12]; - out[i*12 + 1] = in[i*12 + 1]; - out[i*12 + 2] = in[i*12 + 2]; - out[i*12 + 3] = in[i*12 + 3]; - out[i*12 + 4] = in[i*12 + 4]; - out[i*12 + 5] = in[i*12 + 5]; - out[i*12 + 6] = in[i*12 + 6]; - out[i*12 + 7] = in[i*12 + 7]; - out[i*12 + 8] = in[i*12 + 8]; - out[i*12 + 9] = in[i*12 + 9]; - out[i*12 + 10] = in[i*12 + 10]; - out[i*12 + 11] = in[i*12 + 11]; + out[i*6] = in[i*6]; + out[i*6 + 1] = in[i*6 + 1]; + out[i*6 + 2] = in[i*6 + 2]; + out[i*6 + 3] = in[i*6 + 3]; + out[i*6 + 4] = in[i*6 + 4]; + out[i*6 + 5] = in[i*6 + 5]; ia[i] = i; } @@ -130,18 +124,12 @@ main1 () /* check results: */ for (i = 0; i < N/2; i++) { - if (out[i*12] != in[i*12] - || out[i*12 + 1] != in[i*12 + 1] - || out[i*12 + 2] != in[i*12 + 2] - || out[i*12 + 3] != in[i*12 + 3] - || out[i*12 + 4] != in[i*12 + 4] - || out[i*12 + 5] != in[i*12 + 5] - || out[i*12 + 6] != in[i*12 + 6] - || out[i*12 + 7] != in[i*12 + 7] - || out[i*12 + 8] != in[i*12 + 8] - || out[i*12 + 9] != in[i*12 + 9] - || out[i*12 + 10] != in[i*12 + 10] - || out[i*12 + 11] != in[i*12 + 11] + if (out[i*6] != in[i*6] + || out[i*6 + 1] != in[i*6 + 1] + || out[i*6 + 2] != in[i*6 + 2] + || out[i*6 + 3] != in[i*6 + 3] + || out[i*6 + 4] != in[i*6 + 4] + || out[i*6 + 5] != in[i*6 + 5] || ia[i] != i) abort (); } Index: testsuite/gcc.dg/vect/slp-2.c =================================================================== --- testsuite/gcc.dg/vect/slp-2.c (revision 127057) +++ testsuite/gcc.dg/vect/slp-2.c (working copy) @@ -77,6 +77,55 @@ main1 (unsigned short a0, unsigned short abort (); } + /* SLP with unrolling by 8. */ + for (i = 0; i < N; i++) + { + out[i*3] = a8; + out[i*3 + 1] = a1; + out[i*3 + 2] = a2; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (out[i*3] != a8 + || out[i*3 + 1] != a1 + || out[i*3 + 2] != a2) + abort (); + } + + /* SLP with unrolling by 8. */ + for (i = 0; i < N; i++) + { + out[i*11] = a8; + out[i*11 + 1] = a7; + out[i*11 + 2] = a1; + out[i*11 + 3] = a2; + out[i*11 + 4] = a8; + out[i*11 + 5] = a5; + out[i*11 + 6] = a5; + out[i*11 + 7] = a4; + out[i*11 + 8] = a12; + out[i*11 + 9] = a13; + out[i*11 + 10] = a14; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (out[i*11] != a8 + || out[i*11 + 1] != a7 + || out[i*11 + 2] != a1 + || out[i*11 + 3] != a2 + || out[i*11 + 4] != a8 + || out[i*11 + 5] != a5 + || out[i*11 + 6] != a5 + || out[i*11 + 7] != a4 + || out[i*11 + 8] != a12 + || out[i*11 + 9] != a13 + || out[i*11 + 10] != a14) + abort (); + } return 0; @@ -91,7 +140,7 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/slp-3.c =================================================================== --- testsuite/gcc.dg/vect/slp-3.c (revision 127057) +++ testsuite/gcc.dg/vect/slp-3.c (working copy) @@ -99,6 +99,35 @@ main1 () abort (); } + /* SLP with unrolling by 8. */ + for (i = 0; i < N/2; i++) + { + out[i*9] = in[i*9]; + out[i*9 + 1] = in[i*9 + 1]; + out[i*9 + 2] = in[i*9 + 2]; + out[i*9 + 3] = in[i*9 + 3]; + out[i*9 + 4] = in[i*9 + 4]; + out[i*9 + 5] = in[i*9 + 5]; + out[i*9 + 6] = in[i*9 + 6]; + out[i*9 + 7] = in[i*9 + 7]; + out[i*9 + 8] = in[i*9 + 8]; + } + + /* check results: */ + for (i = 0; i < N/2; i++) + { + if (out[i*9] != in[i*9] + || out[i*9 + 1] != in[i*9 + 1] + || out[i*9 + 2] != in[i*9 + 2] + || out[i*9 + 3] != in[i*9 + 3] + || out[i*9 + 4] != in[i*9 + 4] + || out[i*9 + 5] != in[i*9 + 5] + || out[i*9 + 6] != in[i*9 + 6] + || out[i*9 + 7] != in[i*9 + 7] + || out[i*9 + 8] != in[i*9 + 8]) + abort (); + } + return 0; } Index: ChangeLog.autovect =================================================================== --- ChangeLog.autovect (revision 127057) +++ ChangeLog.autovect (working copy) @@ -1,3 +1,12 @@ +2007-07-31 Ira Rosen + + * tree-vect-analyze.c (vect_analyze_operations): Set vectorization + factor according to SLP unrolling factors. + (vect_analyze_data_ref_access): Remove SLP group size constraints. + (vect_analyze_slp_instance): Calculate SLP instance unrolling factor. + * tree-vect-transform.c (vect_get_constant_vectors): Fix number of + copies calculation to handle general group sizes. + 2007-07-23 Dorit Nuzman merge revision 124373 from trunk: Index: tree-vect-analyze.c =================================================================== --- tree-vect-analyze.c (revision 127057) +++ tree-vect-analyze.c (working copy) @@ -505,6 +505,14 @@ vect_analyze_operations (loop_vec_info l if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "PURE SLP: VF = %d", vectorization_factor); } + else + { + vectorization_factor = least_common_multiple (vectorization_factor, + LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); + LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "VF = %d", vectorization_factor); + } /* After VF is set, SLP costs should be updated since the number of created vector stmts depends on VF. */ @@ -1913,7 +1921,6 @@ vect_analyze_data_ref_access (struct dat struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); HOST_WIDE_INT stride; - int nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type (scalar_type)); bool slp_impossible = false; /* Don't allow invariant accesses. */ @@ -2119,24 +2126,6 @@ vect_analyze_data_ref_access (struct dat if (slp_impossible) return false; - - if (stride < nunits && (nunits % stride) != 0) - { - /* SLP is possible only with conceptual unrolling by - nunits/stride, hence nunits/stride must be an integer. */ - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "Possible SLP group of illegal size"); - return false; - } - - if (stride > nunits && (stride % nunits) != 0) - { - /* For SLP, the group should be distributed into stride/nunits - vectors, hence stride/nunits should be an integer. */ - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "Possible SLP group of illegal size"); - return false; - } } DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride; if (vect_print_dump_info (REPORT_DETAILS)) @@ -2147,6 +2136,7 @@ vect_analyze_data_ref_access (struct dat if (!DR_IS_READ (dr) && !slp_impossible) VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt); } + return true; } @@ -2644,11 +2634,7 @@ vect_analyze_slp_instance (loop_vec_info SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; /* Calculate the unrolling factor. */ - if (nunits > group_size) - { - unrolling_factor = nunits / group_size; - gcc_assert (!(nunits % group_size)); - } + unrolling_factor = least_common_multiple (nunits, group_size) / group_size; /* Calculate the number of vector stmts to create based on the unrolling factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is Index: tree-vect-transform.c =================================================================== --- tree-vect-transform.c (revision 127057) +++ tree-vect-transform.c (working copy) @@ -1736,8 +1736,8 @@ vect_get_constant_vectors (slp_tree slp_ if (STMT_VINFO_DATA_REF (stmt_vinfo)) is_store = true; - /* If group size is less than the number of units in vector, we - will put NUNITS / GROUP_SIZE copies of each operand. + /* NUMBER_OF_COPIES is the number of times we need to use the same values in + created vectors. It is greater than 1 if unrolling is performed. For example, we have two scalar operands, s1 and s2 (e.g., group of strided accesses of size two), while NUINTS is four (i.e., four scalars @@ -1745,17 +1745,17 @@ vect_get_constant_vectors (slp_tree slp_ two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES will be 2). - Otherwise, we create GROUP_SIZE / NUNITS vectors containing the operands. + If GROUP_SIZE > NUNITS, the scalars will be split into several vectors + containing the operands. For example, NUINTS is four as before, and the group size is 8 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and {s5, s6, s7, s8}. */ - - if (group_size < nunits) - number_of_copies = nunits / group_size; + + number_of_copies = least_common_multiple (nunits, group_size) / group_size; number_of_places_left_in_vector = nunits; - for (j = 0; j < number_of_copies; j ++) + for (j = 0; j < number_of_copies; j++) { for (i = group_size - 1; VEC_iterate (tree, stmts, i, stmt); i--) {