From: Ira Rosen <ira.rosen@linaro.org>
To: gcc-patches@gcc.gnu.org
Cc: Patch Tracking <patches@linaro.org>
Subject: Re: [patch] Fix PR tree-optimization/49038
Date: Thu, 02 Jun 2011 16:06:00 -0000 [thread overview]
Message-ID: <BANLkTinWkMEwg7+Zp4+1Tfc2yzFeZwMkfQ@mail.gmail.com> (raw)
In-Reply-To: <BANLkTimVHPK0gKUYsfKNdoknJ9PpfDJ9qQ@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 2220 bytes --]
On 26 May 2011 10:52, Ira Rosen <ira.rosen@linaro.org> wrote:
> Hi,
>
> The vectorizer supports strided loads with gaps, e.g., when only a[4i]
> and a[4i+2] are accessed, it generates a vector load a[4i:4i+3], i.e.,
> creating an access to a[4i+3], which doesn't exist in the scalar code.
> This access maybe invalid as described in the PR.
>
> This patch creates an epilogue loop (with at least one iteration) for
> such cases.
>
> Bootstrapped and tested on powerpc64-suse-linux.
> Applied to trunk. I'll prepare patches for 4.5 and 4.6 next week.
>
Here are the patches. Bootstrapped and tested on x86_64-suse-linux
(4.5) and on powerpc64-suse-linux (4.6).
OK to apply?
Thanks,
Ira
4.6 ChangeLog:
PR tree-optimization/49038
* tree-vect-loop-manip.c (vect_generate_tmps_on_preheader):
Ensure at least one epilogue iteration if required by data
accesses with gaps.
* tree-vectorizer.h (struct _loop_vec_info): Add new field
to mark loops that require peeling for gaps.
* tree-vect-loop.c (new_loop_vec_info): Initialize new field.
(vect_get_known_peeling_cost): Take peeling for gaps into
account.
(vect_transform_loop): Generate epilogue if required by data
access with gaps.
* tree-vect-data-refs.c (vect_analyze_group_access): Mark the
loop as requiring an epilogue if there are gaps in the end of
the strided group.
4.5 ChangeLog:
PR tree-optimization/49038
* tree-vect-loop-manip.c (vect_generate_tmps_on_preheader):
Ensure at least one epilogue iteration if required by data
accesses with gaps.
* tree-vectorizer.h (struct _loop_vec_info): Add new field
to mark loops that require peeling for gaps.
* tree-vect-loop.c (new_loop_vec_info): Initialize new field.
(vect_estimate_min_profitable_iters): Take peeling for gaps into
account.
(vect_transform_loop): Generate epilogue if required by data
access with gaps.
* tree-vect-data-refs.c (vect_analyze_group_access): Mark the
loop as requiring an epilogue if there are gaps in the end of
the strided group.
4.6 and 4.5 testsuite/ChangeLog:
PR tree-optimization/49038
* gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c: New test.
* gcc.dg/vect/pr49038.c: New test.
[-- Attachment #2: gaps.4.5.txt --]
[-- Type: text/plain, Size: 10641 bytes --]
Index: tree-vect-loop-manip.c
===================================================================
--- tree-vect-loop-manip.c (revision 174565)
+++ tree-vect-loop-manip.c (working copy)
@@ -1516,7 +1516,7 @@ vect_generate_tmps_on_preheader (loop_ve
edge pe;
basic_block new_bb;
gimple_seq stmts;
- tree ni_name;
+ tree ni_name, ni_minus_gap_name;
tree var;
tree ratio_name;
tree ratio_mult_vf_name;
@@ -1533,9 +1533,39 @@ vect_generate_tmps_on_preheader (loop_ve
ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
+ /* If epilogue loop is required because of data accesses with gaps, we
+ subtract one iteration from the total number of iterations here for
+ correct calculation of RATIO. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+ {
+ ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
+ ni_name,
+ build_one_cst (TREE_TYPE (ni_name)));
+ if (!is_gimple_val (ni_minus_gap_name))
+ {
+ var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
+ add_referenced_var (var);
+
+ stmts = NULL;
+ ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
+ true, var);
+ if (cond_expr_stmt_list)
+ gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
+ else
+ {
+ pe = loop_preheader_edge (loop);
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
+ }
+ }
+ else
+ ni_minus_gap_name = ni_name;
+
/* Create: ratio = ni >> log2(vf) */
- ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
+ ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
+ ni_minus_gap_name, log_vf);
if (!is_gimple_val (ratio_name))
{
var = create_tmp_var (TREE_TYPE (ni), "bnd");
Index: testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c (revision 0)
+++ testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c (revision 0)
@@ -0,0 +1,116 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 160
+
+typedef struct {
+ unsigned char a;
+ unsigned char b;
+ unsigned char c;
+ unsigned char d;
+ unsigned char e;
+ unsigned char f;
+ unsigned char g;
+ unsigned char h;
+} s;
+
+__attribute__ ((noinline)) int
+main1 (s *arr, int n)
+{
+ int i;
+ s *ptr = arr;
+ s res[N];
+ unsigned char x;
+
+ for (i = 0; i < N; i++)
+ {
+ res[i].a = 0;
+ res[i].b = 0;
+ res[i].c = 0;
+ res[i].d = 0;
+ res[i].e = 0;
+ res[i].f = 0;
+ res[i].g = 0;
+ res[i].h = 0;
+ __asm__ volatile ("");
+ }
+
+ /* Check peeling for gaps for unknown loop bound. */
+ for (i = 0; i < n; i++)
+ {
+ res[i].c = ptr->b + ptr->c;
+ x = ptr->c + ptr->f;
+ res[i].a = x + ptr->b;
+ res[i].d = ptr->b + ptr->c;
+ res[i].b = ptr->c;
+ res[i].f = ptr->f + ptr->e;
+ res[i].e = ptr->b + ptr->e;
+ res[i].h = ptr->c;
+ res[i].g = ptr->b + ptr->c;
+ ptr++;
+ }
+
+ /* check results: */
+ for (i = 0; i < n; i++)
+ {
+ if (res[i].c != arr[i].b + arr[i].c
+ || res[i].a != arr[i].c + arr[i].f + arr[i].b
+ || res[i].d != arr[i].b + arr[i].c
+ || res[i].b != arr[i].c
+ || res[i].f != arr[i].f + arr[i].e
+ || res[i].e != arr[i].b + arr[i].e
+ || res[i].h != arr[i].c
+ || res[i].g != arr[i].b + arr[i].c)
+ abort ();
+ }
+
+ /* Check also that we don't do more iterations than needed. */
+ for (i = n; i < N; i++)
+ {
+ if (res[i].c == arr[i].b + arr[i].c
+ || res[i].a == arr[i].c + arr[i].f + arr[i].b
+ || res[i].d == arr[i].b + arr[i].c
+ || res[i].b == arr[i].c
+ || res[i].f == arr[i].f + arr[i].e
+ || res[i].e == arr[i].b + arr[i].e
+ || res[i].h == arr[i].c
+ || res[i].g == arr[i].b + arr[i].c)
+ abort ();
+ }
+
+ return 0;
+}
+
+
+int main (void)
+{
+ int i;
+ s arr[N];
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ arr[i].a = 5;
+ arr[i].b = 6;
+ arr[i].c = 17;
+ arr[i].d = 3;
+ arr[i].e = 16;
+ arr[i].f = 16;
+ arr[i].g = 3;
+ arr[i].h = 56;
+ if (arr[i].a == 178)
+ abort();
+ }
+
+ main1 (arr, N-2);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: testsuite/gcc.dg/vect/pr49038.c
===================================================================
--- testsuite/gcc.dg/vect/pr49038.c (revision 0)
+++ testsuite/gcc.dg/vect/pr49038.c (revision 0)
@@ -0,0 +1,42 @@
+#include <sys/mman.h>
+#include <stdio.h>
+
+#define COUNT 320
+#define MMAP_SIZE 0x10000
+#define ADDRESS 0x1122000000
+#define TYPE unsigned short
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+void __attribute__((noinline))
+foo (TYPE *__restrict a, TYPE *__restrict b)
+{
+ int n;
+
+ for (n = 0; n < COUNT; n++)
+ a[n] = b[n * 2];
+}
+
+int
+main (void)
+{
+ void *x;
+ size_t b_offset;
+
+ x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (x == MAP_FAILED)
+ {
+ perror ("mmap");
+ return 1;
+ }
+
+ b_offset = MMAP_SIZE - (2 * COUNT - 1) * sizeof (TYPE);
+ foo ((unsigned short *) x,
+ (unsigned short *) ((char *) x + b_offset));
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h (revision 174565)
+++ tree-vectorizer.h (working copy)
@@ -242,6 +242,12 @@ typedef struct _loop_vec_info {
/* The unrolling factor needed to SLP the loop. In case of that pure SLP is
applied to the loop, i.e., no unrolling is needed, this is 1. */
unsigned slp_unrolling_factor;
+
+ /* When we have strided data accesses with gaps, we may introduce invalid
+ memory accesses. We peel the last iteration of the loop to prevent
+ this. */
+ bool peeling_for_gaps;
+
} *loop_vec_info;
/* Access Functions. */
@@ -266,6 +272,7 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_STRIDED_STORES(L) (L)->strided_stores
#define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
#define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
+#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
VEC_length (gimple, (L)->may_misalign_stmts) > 0
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c (revision 174565)
+++ tree-vect-loop.c (working copy)
@@ -711,6 +711,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
+ LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
return res;
}
@@ -2053,6 +2054,10 @@ vect_estimate_min_profitable_iters (loop
peel_iters_prologue = niters < peel_iters_prologue ?
niters : peel_iters_prologue;
peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
+ /* If we need to peel for gaps, but no peeling is required, we have
+ to peel VF iterations. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !peel_iters_epilogue)
+ peel_iters_epilogue = vf;
}
}
@@ -4212,7 +4217,8 @@ vect_transform_loop (loop_vec_info loop_
do_peeling_for_loop_bound
= (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|| (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
+ && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
|| LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c (revision 174565)
+++ tree-vect-data-refs.c (working copy)
@@ -1450,7 +1450,7 @@ vect_analyze_group_access (struct data_r
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
- HOST_WIDE_INT stride;
+ HOST_WIDE_INT stride, last_accessed_element = 1;
bool slp_impossible = false;
/* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
@@ -1479,6 +1479,16 @@ vect_analyze_group_access (struct data_r
fprintf (vect_dump, " step ");
print_generic_expr (vect_dump, step, TDF_SLIM);
}
+
+ if (loop_vinfo)
+ {
+ LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Data access with gaps requires scalar "
+ "epilogue loop");
+ }
+
return true;
}
if (vect_print_dump_info (REPORT_DETAILS))
@@ -1531,6 +1541,7 @@ vect_analyze_group_access (struct data_r
next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
continue;
}
+
prev = next;
/* Check that all the accesses have the same STEP. */
@@ -1561,6 +1572,8 @@ vect_analyze_group_access (struct data_r
gaps += diff - 1;
}
+ last_accessed_element += diff;
+
/* Store the gap from the previous member of the group. If there is no
gap in the access, DR_GROUP_GAP is always 1. */
DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
@@ -1652,6 +1665,15 @@ vect_analyze_group_access (struct data_r
VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
stmt);
}
+
+ /* There is a gap in the end of the group. */
+ if (stride - last_accessed_element > 0 && loop_vinfo)
+ {
+ LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Data access with gaps requires scalar "
+ "epilogue loop");
+ }
}
return true;
[-- Attachment #3: gaps.4.6.txt --]
[-- Type: text/plain, Size: 10641 bytes --]
Index: tree-vect-loop-manip.c
===================================================================
--- tree-vect-loop-manip.c (revision 174558)
+++ tree-vect-loop-manip.c (working copy)
@@ -1551,7 +1551,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loo
edge pe;
basic_block new_bb;
gimple_seq stmts;
- tree ni_name;
+ tree ni_name, ni_minus_gap_name;
tree var;
tree ratio_name;
tree ratio_mult_vf_name;
@@ -1568,9 +1568,39 @@ vect_generate_tmps_on_preheader (loop_vec_info loo
ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
+ /* If epilogue loop is required because of data accesses with gaps, we
+ subtract one iteration from the total number of iterations here for
+ correct calculation of RATIO. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+ {
+ ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
+ ni_name,
+ build_one_cst (TREE_TYPE (ni_name)));
+ if (!is_gimple_val (ni_minus_gap_name))
+ {
+ var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
+ add_referenced_var (var);
+
+ stmts = NULL;
+ ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
+ true, var);
+ if (cond_expr_stmt_list)
+ gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
+ else
+ {
+ pe = loop_preheader_edge (loop);
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
+ }
+ }
+ else
+ ni_minus_gap_name = ni_name;
+
/* Create: ratio = ni >> log2(vf) */
- ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
+ ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
+ ni_minus_gap_name, log_vf);
if (!is_gimple_val (ratio_name))
{
var = create_tmp_var (TREE_TYPE (ni), "bnd");
Index: testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c (revision 0)
+++ testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c (revision 0)
@@ -0,0 +1,116 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 160
+
+typedef struct {
+ unsigned char a;
+ unsigned char b;
+ unsigned char c;
+ unsigned char d;
+ unsigned char e;
+ unsigned char f;
+ unsigned char g;
+ unsigned char h;
+} s;
+
+__attribute__ ((noinline)) int
+main1 (s *arr, int n)
+{
+ int i;
+ s *ptr = arr;
+ s res[N];
+ unsigned char x;
+
+ for (i = 0; i < N; i++)
+ {
+ res[i].a = 0;
+ res[i].b = 0;
+ res[i].c = 0;
+ res[i].d = 0;
+ res[i].e = 0;
+ res[i].f = 0;
+ res[i].g = 0;
+ res[i].h = 0;
+ __asm__ volatile ("");
+ }
+
+ /* Check peeling for gaps for unknown loop bound. */
+ for (i = 0; i < n; i++)
+ {
+ res[i].c = ptr->b + ptr->c;
+ x = ptr->c + ptr->f;
+ res[i].a = x + ptr->b;
+ res[i].d = ptr->b + ptr->c;
+ res[i].b = ptr->c;
+ res[i].f = ptr->f + ptr->e;
+ res[i].e = ptr->b + ptr->e;
+ res[i].h = ptr->c;
+ res[i].g = ptr->b + ptr->c;
+ ptr++;
+ }
+
+ /* check results: */
+ for (i = 0; i < n; i++)
+ {
+ if (res[i].c != arr[i].b + arr[i].c
+ || res[i].a != arr[i].c + arr[i].f + arr[i].b
+ || res[i].d != arr[i].b + arr[i].c
+ || res[i].b != arr[i].c
+ || res[i].f != arr[i].f + arr[i].e
+ || res[i].e != arr[i].b + arr[i].e
+ || res[i].h != arr[i].c
+ || res[i].g != arr[i].b + arr[i].c)
+ abort ();
+ }
+
+ /* Check also that we don't do more iterations than needed. */
+ for (i = n; i < N; i++)
+ {
+ if (res[i].c == arr[i].b + arr[i].c
+ || res[i].a == arr[i].c + arr[i].f + arr[i].b
+ || res[i].d == arr[i].b + arr[i].c
+ || res[i].b == arr[i].c
+ || res[i].f == arr[i].f + arr[i].e
+ || res[i].e == arr[i].b + arr[i].e
+ || res[i].h == arr[i].c
+ || res[i].g == arr[i].b + arr[i].c)
+ abort ();
+ }
+
+ return 0;
+}
+
+
+int main (void)
+{
+ int i;
+ s arr[N];
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ arr[i].a = 5;
+ arr[i].b = 6;
+ arr[i].c = 17;
+ arr[i].d = 3;
+ arr[i].e = 16;
+ arr[i].f = 16;
+ arr[i].g = 3;
+ arr[i].h = 56;
+ if (arr[i].a == 178)
+ abort();
+ }
+
+ main1 (arr, N-2);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: testsuite/gcc.dg/vect/pr49038.c
===================================================================
--- testsuite/gcc.dg/vect/pr49038.c (revision 0)
+++ testsuite/gcc.dg/vect/pr49038.c (revision 0)
@@ -0,0 +1,42 @@
+#include <sys/mman.h>
+#include <stdio.h>
+
+#define COUNT 320
+#define MMAP_SIZE 0x10000
+#define ADDRESS 0x1122000000
+#define TYPE unsigned short
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+void __attribute__((noinline))
+foo (TYPE *__restrict a, TYPE *__restrict b)
+{
+ int n;
+
+ for (n = 0; n < COUNT; n++)
+ a[n] = b[n * 2];
+}
+
+int
+main (void)
+{
+ void *x;
+ size_t b_offset;
+
+ x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (x == MAP_FAILED)
+ {
+ perror ("mmap");
+ return 1;
+ }
+
+ b_offset = MMAP_SIZE - (2 * COUNT - 1) * sizeof (TYPE);
+ foo ((unsigned short *) x,
+ (unsigned short *) ((char *) x + b_offset));
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h (revision 174558)
+++ tree-vectorizer.h (working copy)
@@ -251,6 +251,11 @@ typedef struct _loop_vec_info {
/* Hash table used to choose the best peeling option. */
htab_t peeling_htab;
+ /* When we have strided data accesses with gaps, we may introduce invalid
+ memory accesses. We peel the last iteration of the loop to prevent
+ this. */
+ bool peeling_for_gaps;
+
} *loop_vec_info;
/* Access Functions. */
@@ -278,6 +283,7 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
#define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
#define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab
+#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
VEC_length (gimple, (L)->may_misalign_stmts) > 0
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c (revision 174558)
+++ tree-vect-loop.c (working copy)
@@ -760,6 +760,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
LOOP_VINFO_PEELING_HTAB (res) = NULL;
+ LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
return res;
}
@@ -2149,6 +2150,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vi
peel_iters_prologue = niters < peel_iters_prologue ?
niters : peel_iters_prologue;
*peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
+ /* If we need to peel for gaps, but no peeling is required, we have to
+ peel VF iterations. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
+ *peel_iters_epilogue = vf;
}
return (peel_iters_prologue * scalar_single_iter_cost)
@@ -4721,7 +4726,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
do_peeling_for_loop_bound
= (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|| (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
+ && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
|| LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c (revision 174559)
+++ tree-vect-data-refs.c (working copy)
@@ -2045,7 +2045,7 @@ vect_analyze_group_access (struct data_reference *
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
- HOST_WIDE_INT stride;
+ HOST_WIDE_INT stride, last_accessed_element = 1;
bool slp_impossible = false;
/* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
@@ -2074,6 +2074,16 @@ vect_analyze_group_access (struct data_reference *
fprintf (vect_dump, " step ");
print_generic_expr (vect_dump, step, TDF_SLIM);
}
+
+ if (loop_vinfo)
+ {
+ LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Data access with gaps requires scalar "
+ "epilogue loop");
+ }
+
return true;
}
@@ -2139,6 +2149,7 @@ vect_analyze_group_access (struct data_reference *
next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
continue;
}
+
prev = next;
/* Check that all the accesses have the same STEP. */
@@ -2169,6 +2180,8 @@ vect_analyze_group_access (struct data_reference *
gaps += diff - 1;
}
+ last_accessed_element += diff;
+
/* Store the gap from the previous member of the group. If there is no
gap in the access, DR_GROUP_GAP is always 1. */
DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
@@ -2260,6 +2273,15 @@ vect_analyze_group_access (struct data_reference *
VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
stmt);
}
+
+ /* There is a gap in the end of the group. */
+ if (stride - last_accessed_element > 0 && loop_vinfo)
+ {
+ LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Data access with gaps requires scalar "
+ "epilogue loop");
+ }
}
return true;
next prev parent reply other threads:[~2011-06-02 16:06 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-05-26 9:25 Ira Rosen
2011-05-31 8:35 ` H.J. Lu
2011-06-02 16:06 ` Ira Rosen [this message]
2011-06-03 12:22 ` Richard Guenther
2011-07-15 13:12 ` Ulrich Weigand
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=BANLkTinWkMEwg7+Zp4+1Tfc2yzFeZwMkfQ@mail.gmail.com \
--to=ira.rosen@linaro.org \
--cc=gcc-patches@gcc.gnu.org \
--cc=patches@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).