From: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
To: Richard Sandiford <richard.sandiford@arm.com>
Cc: gcc Patches <gcc-patches@gcc.gnu.org>
Subject: Re: [SVE] PR91272
Date: Thu, 24 Oct 2019 05:44:00 -0000 [thread overview]
Message-ID: <CAAgBjMkAfFLFQdsxMr_rKRDBbsxm90xcxoDPg1fvgK4wo=+gOQ@mail.gmail.com> (raw)
In-Reply-To: <mptlftdi5tv.fsf@arm.com>
[-- Attachment #1: Type: text/plain, Size: 4918 bytes --]
On Tue, 22 Oct 2019 at 13:12, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> > index acdd90784dc..dfd33b142ed 100644
> > --- a/gcc/tree-vect-stmts.c
> > +++ b/gcc/tree-vect-stmts.c
> > @@ -10016,25 +10016,26 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
> > /* See whether another part of the vectorized code applies a loop
> > mask to the condition, or to its inverse. */
> >
> > + vec_loop_masks *masks = NULL;
> > if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
> > {
> > - scalar_cond_masked_key cond (cond_expr, ncopies);
> > - if (loop_vinfo->scalar_cond_masked_set.contains (cond))
> > - {
> > - vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
> > - loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
> > - }
> > + if (reduction_type == EXTRACT_LAST_REDUCTION)
> > + masks = &LOOP_VINFO_MASKS (loop_vinfo);
> > else
> > {
> > - bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
> > - cond.code = invert_tree_comparison (cond.code, honor_nans);
> > + scalar_cond_masked_key cond (cond_expr, ncopies);
> > if (loop_vinfo->scalar_cond_masked_set.contains (cond))
> > + masks = &LOOP_VINFO_MASKS (loop_vinfo);
> > + else
> > {
> > - vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
> > - loop_mask = vect_get_loop_mask (gsi, masks, ncopies,
> > - vectype, j);
> > - cond_code = cond.code;
> > - swap_cond_operands = true;
> > + bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
> > + cond.code = invert_tree_comparison (cond.code, honor_nans);
> > + if (loop_vinfo->scalar_cond_masked_set.contains (cond))
> > + {
> > + masks = &LOOP_VINFO_MASKS (loop_vinfo);
> > + cond_code = cond.code;
> > + swap_cond_operands = true;
> > + }
> > }
> > }
> > }
> > @@ -10116,6 +10117,13 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
> > vec_then_clause = vec_oprnds2[i];
> > vec_else_clause = vec_oprnds3[i];
> >
> > + if (masks)
> > + {
> > + unsigned vec_num = vec_oprnds0.length ();
> > + loop_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
> > + vectype, vec_num * j + i);
> > + }
> > +
>
> I don't think we need an extra "if" here. "loop_mask" only feeds
> the later "if (loop_mask)" block, so we might as well change that
> later "if" to "if (masks)" and make the "loop_mask" variable local
> to the "if" body.
>
> > if (swap_cond_operands)
> > std::swap (vec_then_clause, vec_else_clause);
> >
> > @@ -10194,23 +10202,6 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
> > vec_compare = tmp;
> > }
> >
> > - tree tmp2 = make_ssa_name (vec_cmp_type);
> > - gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR,
> > - vec_compare, loop_mask);
> > - vect_finish_stmt_generation (stmt_info, g, gsi);
> > - vec_compare = tmp2;
> > - }
> > -
> > - if (reduction_type == EXTRACT_LAST_REDUCTION)
> > - {
> > - if (!is_gimple_val (vec_compare))
> > - {
> > - tree vec_compare_name = make_ssa_name (vec_cmp_type);
> > - gassign *new_stmt = gimple_build_assign (vec_compare_name,
> > - vec_compare);
> > - vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
> > - vec_compare = vec_compare_name;
> > - }
>
> This form is simpler than:
>
> if (COMPARISON_CLASS_P (vec_compare))
> {
> tree tmp = make_ssa_name (vec_cmp_type);
> tree op0 = TREE_OPERAND (vec_compare, 0);
> tree op1 = TREE_OPERAND (vec_compare, 1);
> gassign *g = gimple_build_assign (tmp,
> TREE_CODE (vec_compare),
> op0, op1);
> vect_finish_stmt_generation (stmt_info, g, gsi);
> vec_compare = tmp;
> }
>
> so I think it'd be better to keep the EXTRACT_LAST_REDUCTION version.
Does the attached version look OK ?
Thanks,
Prathamesh
>
> Thanks,
> Richard
[-- Attachment #2: pr91272-3.diff --]
[-- Type: text/x-patch, Size: 10984 bytes --]
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c
index d4f9b0b6a94..d3ea52dea47 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define N 32
@@ -17,4 +17,5 @@ condition_reduction (int *a, int min_v)
return last;
}
-/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c
index 2c49bd3b0f0..c222b707912 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#include <stdint.h>
@@ -23,4 +23,5 @@ condition_reduction (TYPE *a, TYPE min_v)
return last;
}
-/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c
index 35344f446c6..5aaa71f948d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c
@@ -1,8 +1,9 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define TYPE uint8_t
#include "clastb_2.c"
-/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.b} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\tb[0-9]+, p[0-7], b[0-9]+, z[0-9]+\.b} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c
index ce58abd6161..b4db170ea06 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c
@@ -1,8 +1,9 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define TYPE int16_t
#include "clastb_2.c"
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.h} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c
index 2b9783d6627..28d40a01f93 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c
@@ -1,8 +1,9 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define TYPE uint64_t
#include "clastb_2.c"
-/* { dg-final { scan-assembler {\tclastb\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\.d} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c
index c47d303f730..38632a21be1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define N 32
@@ -21,4 +21,5 @@ condition_reduction (TYPE *a, TYPE min_v)
return last;
}
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c
index 3345f874a39..e5307d2edc8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c
@@ -1,7 +1,8 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define TYPE double
#include "clastb_6.c"
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
/* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
index d86a428a7fa..583fc8d8d6d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -msve-vector-bits=256 --save-temps" } */
#include <stdint.h>
@@ -19,6 +19,7 @@ TEST_TYPE (uint16_t);
TEST_TYPE (uint32_t);
TEST_TYPE (uint64_t);
+/* { dg-final { scan-tree-dump-times "using a fully-masked loop." 4 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\t(b[0-9]+), p[0-7], \1, z[0-9]+\.b\n} } } */
/* { dg-final { scan-assembler {\tclastb\t(h[0-9]+), p[0-7], \1, z[0-9]+\.h\n} } } */
/* { dg-final { scan-assembler {\tclastb\t(s[0-9]+), p[0-7], \1, z[0-9]+\.s\n} } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index a70d52eb2ca..82814e2c2af 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -6428,6 +6428,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
{
if (reduction_type != FOLD_LEFT_REDUCTION
+ && reduction_type != EXTRACT_LAST_REDUCTION
&& !mask_by_cond_expr
&& (cond_fn == IFN_LAST
|| !direct_internal_fn_supported_p (cond_fn, vectype_in,
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index acdd90784dc..a947757744e 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -10016,25 +10016,26 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
/* See whether another part of the vectorized code applies a loop
mask to the condition, or to its inverse. */
+ vec_loop_masks *masks = NULL;
if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
{
- scalar_cond_masked_key cond (cond_expr, ncopies);
- if (loop_vinfo->scalar_cond_masked_set.contains (cond))
- {
- vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
- loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
- }
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
+ masks = &LOOP_VINFO_MASKS (loop_vinfo);
else
{
- bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
- cond.code = invert_tree_comparison (cond.code, honor_nans);
+ scalar_cond_masked_key cond (cond_expr, ncopies);
if (loop_vinfo->scalar_cond_masked_set.contains (cond))
+ masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ else
{
- vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
- loop_mask = vect_get_loop_mask (gsi, masks, ncopies,
- vectype, j);
- cond_code = cond.code;
- swap_cond_operands = true;
+ bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
+ cond.code = invert_tree_comparison (cond.code, honor_nans);
+ if (loop_vinfo->scalar_cond_masked_set.contains (cond))
+ {
+ masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ cond_code = cond.code;
+ swap_cond_operands = true;
+ }
}
}
}
@@ -10180,18 +10181,29 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vec != { 0, ... } (masked in the MASK_LOAD,
unmasked in the VEC_COND_EXPR). */
- if (loop_mask)
+ if (masks)
{
- if (COMPARISON_CLASS_P (vec_compare))
+ unsigned vec_num = vec_oprnds0.length ();
+ loop_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+ vectype, vec_num * j + i);
+
+ if (!is_gimple_val (vec_compare))
+ {
+ tree vec_compare_name = make_ssa_name (vec_cmp_type);
+ gassign *new_stmt = gimple_build_assign (vec_compare_name,
+ vec_compare);
+ vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vec_compare = vec_compare_name;
+ }
+
+ if (must_invert_cmp_result)
{
- tree tmp = make_ssa_name (vec_cmp_type);
- tree op0 = TREE_OPERAND (vec_compare, 0);
- tree op1 = TREE_OPERAND (vec_compare, 1);
- gassign *g = gimple_build_assign (tmp,
- TREE_CODE (vec_compare),
- op0, op1);
- vect_finish_stmt_generation (stmt_info, g, gsi);
- vec_compare = tmp;
+ tree vec_compare_name = make_ssa_name (vec_cmp_type);
+ gassign *new_stmt = gimple_build_assign (vec_compare_name,
+ BIT_NOT_EXPR,
+ vec_compare);
+ vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vec_compare = vec_compare_name;
}
tree tmp2 = make_ssa_name (vec_cmp_type);
@@ -10203,23 +10215,6 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
if (reduction_type == EXTRACT_LAST_REDUCTION)
{
- if (!is_gimple_val (vec_compare))
- {
- tree vec_compare_name = make_ssa_name (vec_cmp_type);
- gassign *new_stmt = gimple_build_assign (vec_compare_name,
- vec_compare);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- vec_compare = vec_compare_name;
- }
- if (must_invert_cmp_result)
- {
- tree vec_compare_name = make_ssa_name (vec_cmp_type);
- gassign *new_stmt = gimple_build_assign (vec_compare_name,
- BIT_NOT_EXPR,
- vec_compare);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- vec_compare = vec_compare_name;
- }
gcall *new_stmt = gimple_build_call_internal
(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
vec_then_clause);
next prev parent reply other threads:[~2019-10-24 2:31 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-10-18 1:48 Prathamesh Kulkarni
2019-10-18 9:07 ` Richard Sandiford
2019-10-21 20:16 ` Prathamesh Kulkarni
2019-10-22 7:51 ` Richard Sandiford
2019-10-24 5:44 ` Prathamesh Kulkarni [this message]
2019-10-25 9:12 ` Richard Sandiford
2019-10-25 19:58 ` Prathamesh Kulkarni
2019-10-27 13:41 ` Richard Sandiford
2019-10-28 15:02 ` Prathamesh Kulkarni
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAAgBjMkAfFLFQdsxMr_rKRDBbsxm90xcxoDPg1fvgK4wo=+gOQ@mail.gmail.com' \
--to=prathamesh.kulkarni@linaro.org \
--cc=gcc-patches@gcc.gnu.org \
--cc=richard.sandiford@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).