From: Feng Xue OS <fxue@os.amperecomputing.com>
To: Richard Biener <richard.guenther@gmail.com>
Cc: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH] vect: Fix shift-by-induction for single-lane slp
Date: Thu, 27 Jun 2024 15:15:17 +0000 [thread overview]
Message-ID: <LV2PR01MB7839D2B95CA368305F1788D5F7D72@LV2PR01MB7839.prod.exchangelabs.com> (raw)
In-Reply-To: <CAFiYyc0thFH3a50if94qEZbjNgXaxtG4i-HZrrkdwrTUQDoWCA@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 6306 bytes --]
I added two test cases for the examples your mentioned.
BTW: would you please look over another 3 lane-reducing patches that have been updated? If ok, I would consider to check them in.
Thanks,
Feng
--
Allow shift-by-induction for slp node, when it is single lane, which is
aligned with the original loop-based handling.
gcc/
* tree-vect-stmts.cc (vectorizable_shift): Allow shift-by-induction
for single-lane slp node.
gcc/testsuite/
* gcc.dg/vect/vect-shift-6.c
* gcc.dg/vect/vect-shift-7.c
---
gcc/testsuite/gcc.dg/vect/vect-shift-6.c | 51 +++++++++++++++++++
gcc/testsuite/gcc.dg/vect/vect-shift-7.c | 65 ++++++++++++++++++++++++
gcc/tree-vect-stmts.cc | 2 +-
3 files changed, 117 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-shift-6.c
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-shift-7.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-6.c b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c
new file mode 100644
index 00000000000..940f7f2a4db
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 32
+
+int A[N];
+int B[N];
+
+#define FN(name) \
+__attribute__((noipa)) \
+void name(int *a) \
+{ \
+ for (int i = 0; i < N / 2; i++) \
+ { \
+ a[2 * i + 0] <<= i; \
+ a[2 * i + 1] <<= i; \
+ } \
+}
+
+
+FN(foo_vec)
+
+#pragma GCC push_options
+#pragma GCC optimize ("O0")
+FN(foo_novec)
+#pragma GCC pop_options
+
+int main ()
+{
+ int i;
+
+ check_vect ();
+
+#pragma GCC novector
+ for (i = 0; i < N; i++)
+ A[i] = B[i] = -(i + 1);
+
+ foo_vec(A);
+ foo_novec(B);
+
+ /* check results: */
+#pragma GCC novector
+ for (i = 0; i < N; i++)
+ if (A[i] != B[i])
+ abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-7.c b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c
new file mode 100644
index 00000000000..a33b120343b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c
@@ -0,0 +1,65 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 32
+#define M 64
+
+int A[N];
+int B[N];
+
+#define FN(name) \
+__attribute__((noipa)) \
+void name(int *a) \
+{ \
+ for (int i = 0; i < N / 2; i++) \
+ { \
+ int s1 = i; \
+ int s2 = s1 + 1; \
+ int r1 = 0; \
+ int r2 = 1; \
+ \
+ for (int j = 0; j < M; j++) \
+ { \
+ r1 += j << s1; \
+ r2 += j << s2; \
+ s1++; \
+ s2++; \
+ } \
+ \
+ a[2 * i + 0] = r1; \
+ a[2 * i + 1] = r2; \
+ } \
+}
+
+
+FN(foo_vec)
+
+#pragma GCC push_options
+#pragma GCC optimize ("O0")
+FN(foo_novec)
+#pragma GCC pop_options
+
+int main ()
+{
+ int i;
+
+ check_vect ();
+
+#pragma GCC novector
+ for (i = 0; i < N; i++)
+ A[i] = B[i] = 0;
+
+ foo_vec(A);
+ foo_novec(B);
+
+ /* check results: */
+#pragma GCC novector
+ for (i = 0; i < N; i++)
+ if (A[i] != B[i])
+ abort ();
+
+ return 0;
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ca6052662a3..840e162c7f0 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6247,7 +6247,7 @@ vectorizable_shift (vec_info *vinfo,
if ((dt[1] == vect_internal_def
|| dt[1] == vect_induction_def
|| dt[1] == vect_nested_cycle)
- && !slp_node)
+ && (!slp_node || SLP_TREE_LANES (slp_node) == 1))
scalar_shift_arg = false;
else if (dt[1] == vect_constant_def
|| dt[1] == vect_external_def
--
2.17.1
________________________________________
From: Richard Biener <richard.guenther@gmail.com>
Sent: Thursday, June 27, 2024 12:49 AM
To: Feng Xue OS
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH] vect: Fix shift-by-induction for single-lane slp
On Wed, Jun 26, 2024 at 4:58 PM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> Allow shift-by-induction for slp node, when it is single lane, which is
> aligned with the original loop-based handling.
OK.
Did you try whether we handle multiple lanes correctly? The simplest
case would be a loop
body with say
a[2*i] = x << i;
a[2*i+1] = x << i;
I'm not sure how we match up multiple (different) inductions in the
same SLP node,
but one node could be x << (i + 1).
Note you enable a nested cycle def the same way, I think that could be
treated like
an internal def and also generally. There's probably no test coverage
for that though.
Sth like
for (m ...)
{
i = m;
j = i + 1;
for (k ...)
{
res1 += k << i;
res2 += k << j;
i++;
j++;
}
a[2*m] = res1;
a[2*m+1] = res2;
}
Thanks,
Richard.
> Thanks,
> Feng
>
> ---
> gcc/tree-vect-stmts.cc | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index ca6052662a3..840e162c7f0 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -6247,7 +6247,7 @@ vectorizable_shift (vec_info *vinfo,
> if ((dt[1] == vect_internal_def
> || dt[1] == vect_induction_def
> || dt[1] == vect_nested_cycle)
> - && !slp_node)
> + && (!slp_node || SLP_TREE_LANES (slp_node) == 1))
> scalar_shift_arg = false;
> else if (dt[1] == vect_constant_def
> || dt[1] == vect_external_def
> --
> 2.17.1
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-vect-Fix-shift-by-induction-for-single-lane-slp.patch --]
[-- Type: text/x-patch; name="0001-vect-Fix-shift-by-induction-for-single-lane-slp.patch", Size: 4287 bytes --]
From 106e4f4a67027f38f0dce569c7b8f0cd504f0cff Mon Sep 17 00:00:00 2001
From: Feng Xue <fxue@os.amperecomputing.com>
Date: Wed, 26 Jun 2024 22:02:53 +0800
Subject: [PATCH] vect: Fix shift-by-induction for single-lane slp
Allow shift-by-induction for slp node, when it is single lane, which is
aligned with the original loop-based handling.
2024-06-26 Feng Xue <fxue@os.amperecomputing.com>
gcc/
* tree-vect-stmts.cc (vectorizable_shift): Allow shift-by-induction
for single-lane slp node.
gcc/testsuite/
* gcc.dg/vect/vect-shift-6.c
* gcc.dg/vect/vect-shift-7.c
---
gcc/testsuite/gcc.dg/vect/vect-shift-6.c | 51 +++++++++++++++++++
gcc/testsuite/gcc.dg/vect/vect-shift-7.c | 65 ++++++++++++++++++++++++
gcc/tree-vect-stmts.cc | 2 +-
3 files changed, 117 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-shift-6.c
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-shift-7.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-6.c b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c
new file mode 100644
index 00000000000..940f7f2a4db
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 32
+
+int A[N];
+int B[N];
+
+#define FN(name) \
+__attribute__((noipa)) \
+void name(int *a) \
+{ \
+ for (int i = 0; i < N / 2; i++) \
+ { \
+ a[2 * i + 0] <<= i; \
+ a[2 * i + 1] <<= i; \
+ } \
+}
+
+
+FN(foo_vec)
+
+#pragma GCC push_options
+#pragma GCC optimize ("O0")
+FN(foo_novec)
+#pragma GCC pop_options
+
+int main ()
+{
+ int i;
+
+ check_vect ();
+
+#pragma GCC novector
+ for (i = 0; i < N; i++)
+ A[i] = B[i] = -(i + 1);
+
+ foo_vec(A);
+ foo_novec(B);
+
+ /* check results: */
+#pragma GCC novector
+ for (i = 0; i < N; i++)
+ if (A[i] != B[i])
+ abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-7.c b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c
new file mode 100644
index 00000000000..a33b120343b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c
@@ -0,0 +1,65 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 32
+#define M 64
+
+int A[N];
+int B[N];
+
+#define FN(name) \
+__attribute__((noipa)) \
+void name(int *a) \
+{ \
+ for (int i = 0; i < N / 2; i++) \
+ { \
+ int s1 = i; \
+ int s2 = s1 + 1; \
+ int r1 = 0; \
+ int r2 = 1; \
+ \
+ for (int j = 0; j < M; j++) \
+ { \
+ r1 += j << s1; \
+ r2 += j << s2; \
+ s1++; \
+ s2++; \
+ } \
+ \
+ a[2 * i + 0] = r1; \
+ a[2 * i + 1] = r2; \
+ } \
+}
+
+
+FN(foo_vec)
+
+#pragma GCC push_options
+#pragma GCC optimize ("O0")
+FN(foo_novec)
+#pragma GCC pop_options
+
+int main ()
+{
+ int i;
+
+ check_vect ();
+
+#pragma GCC novector
+ for (i = 0; i < N; i++)
+ A[i] = B[i] = 0;
+
+ foo_vec(A);
+ foo_novec(B);
+
+ /* check results: */
+#pragma GCC novector
+ for (i = 0; i < N; i++)
+ if (A[i] != B[i])
+ abort ();
+
+ return 0;
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ca6052662a3..840e162c7f0 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6247,7 +6247,7 @@ vectorizable_shift (vec_info *vinfo,
if ((dt[1] == vect_internal_def
|| dt[1] == vect_induction_def
|| dt[1] == vect_nested_cycle)
- && !slp_node)
+ && (!slp_node || SLP_TREE_LANES (slp_node) == 1))
scalar_shift_arg = false;
else if (dt[1] == vect_constant_def
|| dt[1] == vect_external_def
--
2.17.1
next prev parent reply other threads:[~2024-06-27 15:15 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-26 14:58 Feng Xue OS
2024-06-26 16:49 ` Richard Biener
2024-06-27 15:15 ` Feng Xue OS [this message]
2024-06-28 5:39 ` Richard Biener
-- strict thread matches above, loose matches on Subject: below --
2024-06-26 14:57 Feng Xue OS
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=LV2PR01MB7839D2B95CA368305F1788D5F7D72@LV2PR01MB7839.prod.exchangelabs.com \
--to=fxue@os.amperecomputing.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=richard.guenther@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).