* [autovect, patch] Relax loop-aware SLP group size constraints
@ 2007-07-31 11:08 Ira Rosen
0 siblings, 0 replies; only message in thread
From: Ira Rosen @ 2007-07-31 11:08 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1211 bytes --]
This patch removes constraints from SLP group size (until now only groups
of size multiple of vector size or vice versa were supported). The
adjustment to the vector size is done using conceptual unrolling by lcm
(group size, vector size) / group size.
For example, the loop (with group size 6 and vector size 4)
for (i=0; i < n; i++)
{
a[6*i + 0] = 0;
a[6*i + 1] = 1;
a[6*i + 2] = 2;
a[6*i + 3] = 3;
a[6*i + 4] = 4;
a[6*i + 5] = 5;
}
will now be SLPed with conceptual unrolling by 2 (lcm (6,4) / 6 = 12/6 =
2):
for (i=0; i < n/2; i++)
{
a[6*i + 0: 6*i +3] = {0,1,2,3};
a[6*i + 4: 6*i +7] = {4,5,0,1};
a[6*i + 8: 6*i +11] = {2,3,4,5};
}
Bootstrapped and tested on ppc-linux. Committed to autovect-branch.
Ira
ChangeLog entry:
* tree-vect-analyze.c (vect_analyze_operations): Set vectorization
factor according to SLP unrolling factors.
(vect_analyze_data_ref_access): Remove SLP group size constraints.
(vect_analyze_slp_instance): Calculate SLP instance unrolling
factor.
* tree-vect-transform.c (vect_get_constant_vectors): Fix number of
copies calculation to handle general group sizes.
(See attached file: group_size.txt)
[-- Attachment #2: group_size.txt --]
[-- Type: text/plain, Size: 21188 bytes --]
Index: testsuite/ChangeLog.autovect
===================================================================
--- testsuite/ChangeLog.autovect (revision 127057)
+++ testsuite/ChangeLog.autovect (working copy)
@@ -1,3 +1,12 @@
+2007-07-31 Ira Rosen <irar@il.ibm.com>
+
+ * gcc.dg/vect/slp-16.c: Now SLPable.
+ * gcc.dg/vect/slp-1.c: Add new test cases for different group sizes.
+ * gcc.dg/vect/slp-2.c, gcc.dg/vect/slp-3.c: Likewise.
+ * gcc.dg/vect/slp-19.c: Change the group size.
+ * gcc.dg/vect/slp-33.c: New.
+ * gcc.dg/vect/slp-34.c, gcc.dg/vect/slp-35.c: Likewise.
+
2007-07-03 Dorit Nuzman <dorit@il.ibm.com>
* gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: Loops now
Index: testsuite/gcc.dg/vect/slp-33.c
===================================================================
--- testsuite/gcc.dg/vect/slp-33.c (revision 0)
+++ testsuite/gcc.dg/vect/slp-33.c (revision 0)
@@ -0,0 +1,112 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 8
+
+int
+main1 ()
+{
+ int i;
+ unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
+ unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+ float out2[N*8];
+
+ /* SLP with unrolling by 4. */
+ for (i = 0; i < N; i++)
+ {
+ a0 = in[i*7] + 5;
+ a1 = in[i*7 + 1] + 6;
+ a2 = in[i*7 + 2] + 7;
+ a3 = in[i*7 + 3] + 8;
+ a4 = in[i*7 + 4] + 9;
+ a5 = in[i*7 + 5] + 10;
+ a6 = in[i*7 + 6] + 11;
+
+ b0 = a0 * 3;
+ b1 = a1 * 2;
+ b2 = a2 * 12;
+ b3 = a3 * 5;
+ b4 = a4 * 8;
+ b5 = a5 * 4;
+ b6 = a6 * 3;
+
+ out[i*7] = b0 - 2;
+ out[i*7 + 1] = b1 - 3;
+ out[i*7 + 2] = b2 - 2;
+ out[i*7 + 3] = b3 - 1;
+ out[i*7 + 4] = b4 - 8;
+ out[i*7 + 5] = b5 - 7;
+ out[i*7 + 6] = b6 - 3;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*7] != (in[i*7] + 5) * 3 - 2
+ || out[i*7 + 1] != (in[i*7 + 1] + 6) * 2 - 3
+ || out[i*7 + 2] != (in[i*7 + 2] + 7) * 12 - 2
+ || out[i*7 + 3] != (in[i*7 + 3] + 8) * 5 - 1
+ || out[i*7 + 4] != (in[i*7 + 4] + 9) * 8 - 8
+ || out[i*7 + 5] != (in[i*7 + 5] + 10) * 4 - 7
+ || out[i*7 + 6] != (in[i*7 + 6] + 11) * 3 - 3)
+ abort ();
+ }
+
+ /* SLP with unrolling by 4. */
+ for (i = 0; i < N*2; i++)
+ {
+ out[i*3] = (in[i*3] + 2) * 3;
+ out[i*3 + 1] = (in[i*3 + 1] + 2) * 7;
+ out[i*3 + 2] = (in[i*3 + 2] + 7) * 3;
+ }
+
+ /* check results: */
+ for (i = 0; i < N*2; i++)
+ {
+ if (out[i*3] != (in[i*3] + 2) * 3
+ || out[i*3 + 1] != (in[i*3 + 1] + 2) * 7
+ || out[i*3 + 2] != (in[i*3 + 2] + 7) * 3)
+ abort ();
+ }
+
+ /* SLP with unrolling by 4. */
+ for (i = 0; i < N*2; i++)
+ {
+ out2[i*3] = (float) (in[i*3] * 2 + 6) ;
+ out2[i*3 + 1] = (float) (in[i*3 + 1] * 3 + 7);
+ out2[i*3 + 2] = (float) (in[i*3 + 2] * 5 + 4);
+ }
+
+ /* check results: */
+ for (i = 0; i < N*2; i++)
+ {
+ if (out2[i*3] != (float) (in[i*3] * 2 + 6)
+ || out2[i*3 + 1] != (float) (in[i*3 + 1] * 3 + 7)
+ || out2[i*3 + 2] != (float) (in[i*3 + 2] * 5 + 4))
+ abort ();
+ }
+
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target {vect_intfloat_cvt && vect_int_mult} } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target {{! { vect_intfloat_cvt}} && vect_int_mult} } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target {{! { vect_intfloat_cvt}} && {!{vect_int_mult}}} } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target {vect_intfloat_cvt && vect_int_mult} } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target {{! { vect_intfloat_cvt}} && vect_int_mult} } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target {{! { vect_intfloat_cvt}} && {!{vect_int_mult}}} } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: testsuite/gcc.dg/vect/slp-16.c
===================================================================
--- testsuite/gcc.dg/vect/slp-16.c (revision 127057)
+++ testsuite/gcc.dg/vect/slp-16.c (working copy)
@@ -15,7 +15,8 @@ main1 ()
unsigned int in2[N*16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
unsigned int out2[N*16];
- /* SLP groups of size that is not a multiple of vector size is not supported yet. */
+ /* SLP group of size that is not a multiple of vector size.
+ Unrolling by 2. */
for (i = 0; i < N; i++)
{
a0 = in[i*2] + 5;
@@ -63,8 +64,7 @@ int main (void)
return 0;
}
-/* Needs vect_mult_int. */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_int_mult } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/slp-34.c
===================================================================
--- testsuite/gcc.dg/vect/slp-34.c (revision 0)
+++ testsuite/gcc.dg/vect/slp-34.c (revision 0)
@@ -0,0 +1,61 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 8
+
+int
+main1 ()
+{
+ int i;
+ unsigned short out[N*8];
+ unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+ unsigned short in2[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+ unsigned short out2[N*8];
+
+ /* SLP with unrolling by 8. */
+ for (i = 0; i < N; i++)
+ {
+ out[i*3] = in[i*3] + 5;
+ out[i*3 + 1] = in[i*3 + 1] + 6;
+ out[i*3 + 2] = in[i*3 + 2] + 16;
+
+ out2[i*5] = in2[i*5] + 2;
+ out2[i*5 + 1] = in2[i*5 + 1] + 2;
+ out2[i*5 + 2] = in2[i*5 + 2] + 1;
+ out2[i*5 + 3] = in2[i*5 + 3] + 3;
+ out2[i*5 + 4] = in2[i*5 + 4] + 13;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*3] != in[i*3] + 5
+ || out[i*3 + 1] != in[i*3 + 1] + 6
+ || out[i*3 + 2] != in[i*3 + 2] + 16
+ || out2[i*5] != in2[i*5] + 2
+ || out2[i*5 + 1] != in2[i*5 + 1] + 2
+ || out2[i*5 + 2] != in2[i*5 + 2] + 1
+ || out2[i*5 + 3] != in2[i*5 + 3] + 3
+ || out2[i*5 + 4] != in2[i*5 + 4] + 13)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: testsuite/gcc.dg/vect/slp-35.c
===================================================================
--- testsuite/gcc.dg/vect/slp-35.c (revision 0)
+++ testsuite/gcc.dg/vect/slp-35.c (revision 0)
@@ -0,0 +1,73 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+typedef struct {
+ int a;
+ int b;
+ int c;
+ int d;
+ int e;
+} s;
+
+int
+main1 (s *arr)
+{
+ int i;
+ s *ptr = arr;
+ s res[N];
+
+ /* SLP with unrolling by 4. */
+ for (i = 0; i < N; i++)
+ {
+ res[i].c = ptr->c + ptr->c;
+ res[i].a = ptr->a + ptr->a;
+ res[i].d = ptr->d + ptr->d;
+ res[i].b = ptr->b + ptr->b;
+ res[i].e = ptr->e + ptr->e;
+ ptr++;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (res[i].c != arr[i].c + arr[i].c
+ || res[i].a != arr[i].a + arr[i].a
+ || res[i].d != arr[i].d + arr[i].d
+ || res[i].b != arr[i].b + arr[i].b
+ || res[i].e != arr[i].e + arr[i].e)
+ abort();
+ }
+
+}
+
+int main (void)
+{
+ int i;
+ s arr[N];
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ arr[i].a = i;
+ arr[i].b = i * 2;
+ arr[i].c = 17;
+ arr[i].d = i+34;
+ arr[i].e = i * 3 + 5;
+ if (arr[i].a == 178)
+ abort();
+ }
+
+ main1 (arr);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: testsuite/gcc.dg/vect/slp-1.c
===================================================================
--- testsuite/gcc.dg/vect/slp-1.c (revision 127057)
+++ testsuite/gcc.dg/vect/slp-1.c (working copy)
@@ -56,7 +56,55 @@ main1 ()
abort ();
}
+ /* SLP with unrolling by 8. */
+ for (i = 0; i < N; i++)
+ {
+ out[i*5] = 8;
+ out[i*5 + 1] = 7;
+ out[i*5 + 2] = 81;
+ out[i*5 + 3] = 28;
+ out[i*5 + 4] = 18;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*5] != 8
+ || out[i*5 + 1] != 7
+ || out[i*5 + 2] != 81
+ || out[i*5 + 3] != 28
+ || out[i*5 + 4] != 18)
+ abort ();
+ }
+ /* SLP with unrolling by 8. */
+ for (i = 0; i < N/2; i++)
+ {
+ out[i*9] = 8;
+ out[i*9 + 1] = 7;
+ out[i*9 + 2] = 81;
+ out[i*9 + 3] = 28;
+ out[i*9 + 4] = 18;
+ out[i*9 + 5] = 85;
+ out[i*9 + 6] = 5;
+ out[i*9 + 7] = 4;
+ out[i*9 + 8] = 14;
+ }
+
+ /* check results: */
+ for (i = 0; i < N/2; i++)
+ {
+ if (out[i*9] != 8
+ || out[i*9 + 1] != 7
+ || out[i*9 + 2] != 81
+ || out[i*9 + 3] != 28
+ || out[i*9 + 4] != 18
+ || out[i*9 + 5] != 85
+ || out[i*9 + 6] != 5
+ || out[i*9 + 7] != 4
+ || out[i*9 + 8] != 14)
+ abort ();
+ }
return 0;
}
@@ -70,7 +118,7 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/slp-19.c
===================================================================
--- testsuite/gcc.dg/vect/slp-19.c (revision 127057)
+++ testsuite/gcc.dg/vect/slp-19.c (working copy)
@@ -108,21 +108,15 @@ main1 ()
abort ();
}
- /* Hybrid SLP. */
- for (i = 0; i < N/2; i++)
+ /* Hybrid SLP with unrolling by 2. */
+ for (i = 0; i < N; i++)
{
- out[i*12] = in[i*12];
- out[i*12 + 1] = in[i*12 + 1];
- out[i*12 + 2] = in[i*12 + 2];
- out[i*12 + 3] = in[i*12 + 3];
- out[i*12 + 4] = in[i*12 + 4];
- out[i*12 + 5] = in[i*12 + 5];
- out[i*12 + 6] = in[i*12 + 6];
- out[i*12 + 7] = in[i*12 + 7];
- out[i*12 + 8] = in[i*12 + 8];
- out[i*12 + 9] = in[i*12 + 9];
- out[i*12 + 10] = in[i*12 + 10];
- out[i*12 + 11] = in[i*12 + 11];
+ out[i*6] = in[i*6];
+ out[i*6 + 1] = in[i*6 + 1];
+ out[i*6 + 2] = in[i*6 + 2];
+ out[i*6 + 3] = in[i*6 + 3];
+ out[i*6 + 4] = in[i*6 + 4];
+ out[i*6 + 5] = in[i*6 + 5];
ia[i] = i;
}
@@ -130,18 +124,12 @@ main1 ()
/* check results: */
for (i = 0; i < N/2; i++)
{
- if (out[i*12] != in[i*12]
- || out[i*12 + 1] != in[i*12 + 1]
- || out[i*12 + 2] != in[i*12 + 2]
- || out[i*12 + 3] != in[i*12 + 3]
- || out[i*12 + 4] != in[i*12 + 4]
- || out[i*12 + 5] != in[i*12 + 5]
- || out[i*12 + 6] != in[i*12 + 6]
- || out[i*12 + 7] != in[i*12 + 7]
- || out[i*12 + 8] != in[i*12 + 8]
- || out[i*12 + 9] != in[i*12 + 9]
- || out[i*12 + 10] != in[i*12 + 10]
- || out[i*12 + 11] != in[i*12 + 11]
+ if (out[i*6] != in[i*6]
+ || out[i*6 + 1] != in[i*6 + 1]
+ || out[i*6 + 2] != in[i*6 + 2]
+ || out[i*6 + 3] != in[i*6 + 3]
+ || out[i*6 + 4] != in[i*6 + 4]
+ || out[i*6 + 5] != in[i*6 + 5]
|| ia[i] != i)
abort ();
}
Index: testsuite/gcc.dg/vect/slp-2.c
===================================================================
--- testsuite/gcc.dg/vect/slp-2.c (revision 127057)
+++ testsuite/gcc.dg/vect/slp-2.c (working copy)
@@ -77,6 +77,55 @@ main1 (unsigned short a0, unsigned short
abort ();
}
+ /* SLP with unrolling by 8. */
+ for (i = 0; i < N; i++)
+ {
+ out[i*3] = a8;
+ out[i*3 + 1] = a1;
+ out[i*3 + 2] = a2;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*3] != a8
+ || out[i*3 + 1] != a1
+ || out[i*3 + 2] != a2)
+ abort ();
+ }
+
+ /* SLP with unrolling by 8. */
+ for (i = 0; i < N; i++)
+ {
+ out[i*11] = a8;
+ out[i*11 + 1] = a7;
+ out[i*11 + 2] = a1;
+ out[i*11 + 3] = a2;
+ out[i*11 + 4] = a8;
+ out[i*11 + 5] = a5;
+ out[i*11 + 6] = a5;
+ out[i*11 + 7] = a4;
+ out[i*11 + 8] = a12;
+ out[i*11 + 9] = a13;
+ out[i*11 + 10] = a14;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*11] != a8
+ || out[i*11 + 1] != a7
+ || out[i*11 + 2] != a1
+ || out[i*11 + 3] != a2
+ || out[i*11 + 4] != a8
+ || out[i*11 + 5] != a5
+ || out[i*11 + 6] != a5
+ || out[i*11 + 7] != a4
+ || out[i*11 + 8] != a12
+ || out[i*11 + 9] != a13
+ || out[i*11 + 10] != a14)
+ abort ();
+ }
return 0;
@@ -91,7 +140,7 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/slp-3.c
===================================================================
--- testsuite/gcc.dg/vect/slp-3.c (revision 127057)
+++ testsuite/gcc.dg/vect/slp-3.c (working copy)
@@ -99,6 +99,35 @@ main1 ()
abort ();
}
+ /* SLP with unrolling by 8. */
+ for (i = 0; i < N/2; i++)
+ {
+ out[i*9] = in[i*9];
+ out[i*9 + 1] = in[i*9 + 1];
+ out[i*9 + 2] = in[i*9 + 2];
+ out[i*9 + 3] = in[i*9 + 3];
+ out[i*9 + 4] = in[i*9 + 4];
+ out[i*9 + 5] = in[i*9 + 5];
+ out[i*9 + 6] = in[i*9 + 6];
+ out[i*9 + 7] = in[i*9 + 7];
+ out[i*9 + 8] = in[i*9 + 8];
+ }
+
+ /* check results: */
+ for (i = 0; i < N/2; i++)
+ {
+ if (out[i*9] != in[i*9]
+ || out[i*9 + 1] != in[i*9 + 1]
+ || out[i*9 + 2] != in[i*9 + 2]
+ || out[i*9 + 3] != in[i*9 + 3]
+ || out[i*9 + 4] != in[i*9 + 4]
+ || out[i*9 + 5] != in[i*9 + 5]
+ || out[i*9 + 6] != in[i*9 + 6]
+ || out[i*9 + 7] != in[i*9 + 7]
+ || out[i*9 + 8] != in[i*9 + 8])
+ abort ();
+ }
+
return 0;
}
Index: ChangeLog.autovect
===================================================================
--- ChangeLog.autovect (revision 127057)
+++ ChangeLog.autovect (working copy)
@@ -1,3 +1,12 @@
+2007-07-31 Ira Rosen <irar@il.ibm.com>
+
+ * tree-vect-analyze.c (vect_analyze_operations): Set vectorization
+ factor according to SLP unrolling factors.
+ (vect_analyze_data_ref_access): Remove SLP group size constraints.
+ (vect_analyze_slp_instance): Calculate SLP instance unrolling factor.
+ * tree-vect-transform.c (vect_get_constant_vectors): Fix number of
+ copies calculation to handle general group sizes.
+
2007-07-23 Dorit Nuzman <dorit@il.ibm.com>
merge revision 124373 from trunk:
Index: tree-vect-analyze.c
===================================================================
--- tree-vect-analyze.c (revision 127057)
+++ tree-vect-analyze.c (working copy)
@@ -505,6 +505,14 @@ vect_analyze_operations (loop_vec_info l
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "PURE SLP: VF = %d", vectorization_factor);
}
+ else
+ {
+ vectorization_factor = least_common_multiple (vectorization_factor,
+ LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "VF = %d", vectorization_factor);
+ }
/* After VF is set, SLP costs should be updated since the number of created
vector stmts depends on VF. */
@@ -1913,7 +1921,6 @@ vect_analyze_data_ref_access (struct dat
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
HOST_WIDE_INT stride;
- int nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type (scalar_type));
bool slp_impossible = false;
/* Don't allow invariant accesses. */
@@ -2119,24 +2126,6 @@ vect_analyze_data_ref_access (struct dat
if (slp_impossible)
return false;
-
- if (stride < nunits && (nunits % stride) != 0)
- {
- /* SLP is possible only with conceptual unrolling by
- nunits/stride, hence nunits/stride must be an integer. */
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "Possible SLP group of illegal size");
- return false;
- }
-
- if (stride > nunits && (stride % nunits) != 0)
- {
- /* For SLP, the group should be distributed into stride/nunits
- vectors, hence stride/nunits should be an integer. */
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "Possible SLP group of illegal size");
- return false;
- }
}
DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
if (vect_print_dump_info (REPORT_DETAILS))
@@ -2147,6 +2136,7 @@ vect_analyze_data_ref_access (struct dat
if (!DR_IS_READ (dr) && !slp_impossible)
VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
}
+
return true;
}
@@ -2644,11 +2634,7 @@ vect_analyze_slp_instance (loop_vec_info
SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
/* Calculate the unrolling factor. */
- if (nunits > group_size)
- {
- unrolling_factor = nunits / group_size;
- gcc_assert (!(nunits % group_size));
- }
+ unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
/* Calculate the number of vector stmts to create based on the unrolling
factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
Index: tree-vect-transform.c
===================================================================
--- tree-vect-transform.c (revision 127057)
+++ tree-vect-transform.c (working copy)
@@ -1736,8 +1736,8 @@ vect_get_constant_vectors (slp_tree slp_
if (STMT_VINFO_DATA_REF (stmt_vinfo))
is_store = true;
- /* If group size is less than the number of units in vector, we
- will put NUNITS / GROUP_SIZE copies of each operand.
+ /* NUMBER_OF_COPIES is the number of times we need to use the same values in
+ created vectors. It is greater than 1 if unrolling is performed.
For example, we have two scalar operands, s1 and s2 (e.g., group of
strided accesses of size two), while NUINTS is four (i.e., four scalars
@@ -1745,17 +1745,17 @@ vect_get_constant_vectors (slp_tree slp_
two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
will be 2).
- Otherwise, we create GROUP_SIZE / NUNITS vectors containing the operands.
+ If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
+ containing the operands.
For example, NUINTS is four as before, and the group size is 8
(s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
{s5, s6, s7, s8}. */
-
- if (group_size < nunits)
- number_of_copies = nunits / group_size;
+
+ number_of_copies = least_common_multiple (nunits, group_size) / group_size;
number_of_places_left_in_vector = nunits;
- for (j = 0; j < number_of_copies; j ++)
+ for (j = 0; j < number_of_copies; j++)
{
for (i = group_size - 1; VEC_iterate (tree, stmts, i, stmt); i--)
{
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2007-07-31 8:54 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-07-31 11:08 [autovect, patch] Relax loop-aware SLP group size constraints Ira Rosen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).