From: juzhe.zhong@rivai.ai
To: gcc-patches@gcc.gnu.org
Cc: richard.sandiford@arm.com, rguenther@suse.de,
rdapp.gcc@gmail.com, Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Subject: [PATCH] VECT: Apply LEN_MASK_GATHER_LOAD/SCATTER_STORE into vectorizer
Date: Fri, 30 Jun 2023 18:41:04 +0800 [thread overview]
Message-ID: <20230630104104.4193661-2-juzhe.zhong@rivai.ai> (raw)
In-Reply-To: <20230630104104.4193661-1-juzhe.zhong@rivai.ai>
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Hi, Richard and Richi.
It seems that the implementation of LEN_MASK_GATHER_LOAD/LEN_MASK_SCATTER_STORE is simple
and code change is not big.
Here is an example:
#include <stdint.h>
void
f (uint8_t *restrict a,
uint8_t *restrict b, int n,
int base, int step,
int *restrict cond)
{
for (int i = 0; i < n; ++i)
{
if (cond[i])
a[i * step + base] = b[i * step + base];
}
}
With this patch:
<bb 6> [local count: 84095460]:
_58 = (unsigned int) base_19(D);
_61 = (unsigned long) b_20(D);
_63 = (unsigned long) a_21(D);
vect_cst__105 = [vec_duplicate_expr] _58;
_110 = (unsigned long) n_16(D);
<bb 7> [local count: 504572759]:
# vect_vec_iv_.8_95 = PHI <_96(7), { 0, 1, 2, ... }(6)>
# vectp_cond.9_99 = PHI <vectp_cond.9_100(7), cond_17(D)(6)>
# ivtmp_111 = PHI <ivtmp_112(7), _110(6)>
_113 = .SELECT_VL (ivtmp_111, POLY_INT_CST [4, 4]);
_96 = vect_vec_iv_.8_95 + { POLY_INT_CST [4, 4], ... };
ivtmp_98 = _113 * 4;
vect__24.11_101 = .LEN_MASK_LOAD (vectp_cond.9_99, 32B, _113, { -1, ... }, 0);
mask__14.12_103 = vect__24.11_101 != { 0, ... };
vect__59.13_104 = VIEW_CONVERT_EXPR<vector([4,4]) unsigned int>(vect_vec_iv_.8_95);
vect__60.14_106 = vect__59.13_104 + vect_cst__105;
vect__12.15_107 = VIEW_CONVERT_EXPR<vector([4,4]) int>(vect__60.14_106);
vect_patt_5.16_108 = .LEN_MASK_GATHER_LOAD (_61, vect__12.15_107, 4, { 0, ... }, _113, mask__14.12_103, 0);
.LEN_MASK_SCATTER_STORE (_63, vect__12.15_107, 4, vect_patt_5.16_108, _113, mask__14.12_103, 0);
vectp_cond.9_100 = vectp_cond.9_99 + ivtmp_98;
ivtmp_112 = ivtmp_111 - _113;
if (ivtmp_112 != 0)
goto <bb 7>; [83.33%]
else
goto <bb 8>; [16.67%]
gcc/ChangeLog:
* optabs-query.cc (supports_vec_gather_load_p): Apply LEN_MASK_GATHER_LOAD/SCATTER_STORE.
(supports_vec_scatter_store_p): Ditto.
* tree-vect-data-refs.cc (vect_gather_scatter_fn_p): Ditto.
* tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
(vectorizable_store): Ditto.
(vectorizable_load): Ditto.
---
gcc/optabs-query.cc | 2 +
gcc/tree-vect-data-refs.cc | 18 ++++++++-
gcc/tree-vect-stmts.cc | 81 +++++++++++++++++++++++++++++++++++++-
3 files changed, 98 insertions(+), 3 deletions(-)
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 2fdd0d34354..bf1f484e874 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -676,6 +676,7 @@ supports_vec_gather_load_p (machine_mode mode)
this_fn_optabs->supports_vec_gather_load[mode]
= (supports_vec_convert_optab_p (gather_load_optab, mode)
|| supports_vec_convert_optab_p (mask_gather_load_optab, mode)
+ || supports_vec_convert_optab_p (len_mask_gather_load_optab, mode)
? 1 : -1);
return this_fn_optabs->supports_vec_gather_load[mode] > 0;
@@ -692,6 +693,7 @@ supports_vec_scatter_store_p (machine_mode mode)
this_fn_optabs->supports_vec_scatter_store[mode]
= (supports_vec_convert_optab_p (scatter_store_optab, mode)
|| supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
+ || supports_vec_convert_optab_p (len_mask_scatter_store_optab, mode)
? 1 : -1);
return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index ebe93832b1e..01016284c48 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -3873,16 +3873,24 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
return false;
/* Work out which function we need. */
- internal_fn ifn, alt_ifn;
+ internal_fn ifn, alt_ifn, len_mask_ifn;
if (read_p)
{
ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
alt_ifn = IFN_MASK_GATHER_LOAD;
+ /* When target supports LEN_MASK_GATHER_LOAD, we always
+ use LEN_MASK_GATHER_LOAD regardless whether len and
+ mask are valid or not. */
+ len_mask_ifn = IFN_LEN_MASK_GATHER_LOAD;
}
else
{
ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
alt_ifn = IFN_MASK_SCATTER_STORE;
+ /* When target supports LEN_MASK_SCATTER_STORE, we always
+ use LEN_MASK_SCATTER_STORE regardless whether len and
+ mask are valid or not. */
+ len_mask_ifn = IFN_LEN_MASK_SCATTER_STORE;
}
for (;;)
@@ -3909,6 +3917,14 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
*offset_vectype_out = offset_vectype;
return true;
}
+ else if (internal_gather_scatter_fn_supported_p (len_mask_ifn, vectype,
+ memory_type,
+ offset_vectype, scale))
+ {
+ *ifn_out = ifn;
+ *offset_vectype_out = offset_vectype;
+ return true;
+ }
if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
&& TYPE_PRECISION (offset_type) >= element_bits)
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 68faa8ead39..fa0387353cf 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1771,6 +1771,17 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
gs_info->offset_vectype,
gs_info->scale))
{
+ internal_fn len_mask_ifn
+ = (is_load ? IFN_LEN_MASK_GATHER_LOAD : IFN_LEN_MASK_SCATTER_STORE);
+ if (internal_gather_scatter_fn_supported_p (len_mask_ifn, vectype,
+ gs_info->memory_type,
+ gs_info->offset_vectype,
+ gs_info->scale))
+ {
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
+ return;
+ }
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't operate on partial vectors because"
@@ -8930,7 +8941,40 @@ vectorizable_store (vec_info *vinfo,
vec_offset = vec_offsets[vec_num * j + i];
tree scale = size_int (gs_info.scale);
gcall *call;
- if (final_mask)
+ if (internal_gather_scatter_fn_supported_p (
+ IFN_LEN_MASK_SCATTER_STORE, vectype,
+ gs_info.memory_type, TREE_TYPE (vec_offset),
+ gs_info.scale))
+ {
+ tree final_len = NULL_TREE;
+ tree bias = NULL_TREE;
+ if (loop_lens)
+ {
+ final_len
+ = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+ vec_num * ncopies, vectype,
+ vec_num * j + i, 1);
+ }
+ else
+ {
+ tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+ final_len
+ = build_int_cst (iv_type,
+ TYPE_VECTOR_SUBPARTS (vectype));
+ }
+ signed char biasval
+ = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+ bias = build_int_cst (intQI_type_node, biasval);
+ if (!final_mask)
+ {
+ mask_vectype = truth_type_for (vectype);
+ final_mask = build_minus_one_cst (mask_vectype);
+ }
+ call = gimple_build_call_internal (
+ IFN_LEN_MASK_SCATTER_STORE, 7, dataref_ptr, vec_offset,
+ scale, vec_oprnd, final_len, final_mask, bias);
+ }
+ else if (final_mask)
call = gimple_build_call_internal
(IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
scale, vec_oprnd, final_mask);
@@ -10368,7 +10412,40 @@ vectorizable_load (vec_info *vinfo,
tree zero = build_zero_cst (vectype);
tree scale = size_int (gs_info.scale);
gcall *call;
- if (final_mask)
+ if (internal_gather_scatter_fn_supported_p (
+ IFN_LEN_MASK_GATHER_LOAD, vectype,
+ gs_info.memory_type, TREE_TYPE (vec_offset),
+ gs_info.scale))
+ {
+ tree final_len = NULL_TREE;
+ tree bias = NULL_TREE;
+ if (loop_lens)
+ {
+ final_len = vect_get_loop_len (
+ loop_vinfo, gsi, loop_lens, vec_num * ncopies,
+ vectype, vec_num * j + i, 1);
+ }
+ else
+ {
+ tree iv_type
+ = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+ final_len = build_int_cst (
+ iv_type, TYPE_VECTOR_SUBPARTS (vectype));
+ }
+ signed char biasval
+ = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+ bias = build_int_cst (intQI_type_node, biasval);
+ if (!final_mask)
+ {
+ mask_vectype = truth_type_for (vectype);
+ final_mask = build_minus_one_cst (mask_vectype);
+ }
+ call = gimple_build_call_internal (
+ IFN_LEN_MASK_GATHER_LOAD, 7, dataref_ptr,
+ vec_offset, scale, zero, final_len, final_mask,
+ bias);
+ }
+ else if (final_mask)
call = gimple_build_call_internal
(IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
vec_offset, scale, zero, final_mask);
--
2.36.3
next prev parent reply other threads:[~2023-06-30 10:41 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-30 10:41 [PATCH V5] Machine Description: Add LEN_MASK_{GATHER_LOAD, SCATTER_STORE} pattern juzhe.zhong
2023-06-30 10:41 ` juzhe.zhong [this message]
2023-07-02 9:35 ` Richard Sandiford
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230630104104.4193661-2-juzhe.zhong@rivai.ai \
--to=juzhe.zhong@rivai.ai \
--cc=gcc-patches@gcc.gnu.org \
--cc=rdapp.gcc@gmail.com \
--cc=rguenther@suse.de \
--cc=richard.sandiford@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).