From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1059) id 835DF39518AA; Fri, 28 Aug 2020 16:06:03 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 835DF39518AA DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1598630763; bh=BDjpFlMg2hCDNHfp79UwC0E5s+l2RSUnsh6HUzXZDIQ=; h=From:To:Subject:Date:From; b=EPlXS0yJRiZyCCvvE2p/2c+B9qLrIGwbzZmeLTMcpoo5j5sUlYms00FkqXtbW43jX Acd7bJ7mKlU6VFLzsvgImdKk51BwH9NBUrkd+GTH7iR6fZLpaHSZH53lFuqxoJtIer Ubcqp0CFHVa9DoBO0E6Eri8KKYJgAPDjTvpprVQo= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Nathan Sidwell To: gcc-cvs@gcc.gnu.org Subject: [gcc/devel/c++-modules] tree-optimization/96783 - fix vectorization of negative step SLP X-Act-Checkin: gcc X-Git-Author: Richard Biener X-Git-Refname: refs/heads/devel/c++-modules X-Git-Oldrev: 1f53d8f1d3e7519bd81cc5874e43ed9896cf6180 X-Git-Newrev: 71b6257e3a90995e1c1d3d2716a0eec5eef243db Message-Id: <20200828160603.835DF39518AA@sourceware.org> Date: Fri, 28 Aug 2020 16:06:03 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 28 Aug 2020 16:06:03 -0000 https://gcc.gnu.org/g:71b6257e3a90995e1c1d3d2716a0eec5eef243db commit 71b6257e3a90995e1c1d3d2716a0eec5eef243db Author: Richard Biener Date: Wed Aug 26 14:24:01 2020 +0200 tree-optimization/96783 - fix vectorization of negative step SLP This appropriately uses VMAT_ELEMENTWISE when the vectors cannot be filled from a single SLP group until we get more explicit support for negative stride SLP. 2020-08-26 Richard Biener PR tree-optimization/96783 * tree-vect-stmts.c (get_group_load_store_type): Use VMAT_ELEMENTWISE for negative strides when we cannot use VMAT_STRIDED_SLP. * gcc.dg/vect/pr96783-1.c: New testcase. * gcc.dg/vect/pr96783-2.c: Likewise. Diff: --- gcc/testsuite/gcc.dg/vect/pr96783-1.c | 38 +++++++++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/vect/pr96783-2.c | 29 ++++++++++++++++++++++++++ gcc/tree-vect-stmts.c | 10 ++++++++- 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr96783-1.c b/gcc/testsuite/gcc.dg/vect/pr96783-1.c new file mode 100644 index 00000000000..55d1364f056 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr96783-1.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ + +#include "tree-vect.h" + +void __attribute__((noipa)) +foo (long *a, int off, int n) +{ + for (int i = 0; i < n; ++i) + { + long tem1 = a[0]; + long tem2 = a[1]; + long tem3 = a[2]; + long tem4 = a[off + 1]; + a[0] = tem4; + long tem5 = a[off + 2]; + a[1] = tem5; + long tem6 = a[off + 3]; + a[2] = tem6; + a[off + 1] = tem1; + a[off + 2] = tem2; + a[off + 3] = tem3; + a -= 3; + } +} + +int main () +{ + long a[3 * 9]; + check_vect (); + for (int i = 0; i < 3 * 9; ++i) + a[i] = i; + foo (a + 3 * 5, 6-1, 5); + const long b[3 * 8] = { 0, 1, 2, 21, 22, 23, 18, 19, 20, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 }; + for (int i = 0; i < 3 * 8; ++i) + if (a[i] != b[i]) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr96783-2.c b/gcc/testsuite/gcc.dg/vect/pr96783-2.c new file mode 100644 index 00000000000..33c37109e3a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr96783-2.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include "tree-vect.h" + +long a[1024]; +long b[1024]; + +void __attribute__((noipa)) foo () +{ + for (int i = 0; i < 256; ++i) + { + a[3*i] = b[1023 - 3*i - 2]; + a[3*i + 1] = b[1023 - 3*i - 1]; + a[3*i + 2] = b[1023 - 3*i]; + } +} + +int main() +{ + for (int i = 0; i < 1024; ++i) + b[i] = i; + foo (); + for (int i = 0; i < 256; ++i) + if (a[3*i] != 1023 - 3*i - 2 + || a[3*i+1] != 1023 - 3*i - 1 + || a[3*i+2] != 1023 - 3*i) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 65e30bac424..224be018af9 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2192,7 +2192,15 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, *memory_access_type = get_negative_load_store_type (vinfo, stmt_info, vectype, vls_type, 1); else - *memory_access_type = VMAT_STRIDED_SLP; + { + /* Try to use consecutive accesses of DR_GROUP_SIZE elements, + separated by the stride, until we have a complete vector. + Fall back to scalar accesses if that isn't possible. */ + if (multiple_p (nunits, group_size)) + *memory_access_type = VMAT_STRIDED_SLP; + else + *memory_access_type = VMAT_ELEMENTWISE; + } } else {