From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpbguseast1.qq.com (smtpbguseast1.qq.com [54.204.34.129]) by sourceware.org (Postfix) with ESMTPS id 0F3D9385141E for ; Fri, 30 Jun 2023 10:41:15 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 0F3D9385141E Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=rivai.ai Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=rivai.ai X-QQ-mid: bizesmtp85t1688121670thz1smaz Received: from rios-cad5.localdomain ( [58.60.1.11]) by bizesmtp.qq.com (ESMTP) with id ; Fri, 30 Jun 2023 18:41:09 +0800 (CST) X-QQ-SSF: 01400000000000G0T000000A0000000 X-QQ-FEAT: dKvkn8qoLrFuRYVrDLtQM+w8Xjs6YcErXPYMiWwriQEW4gCdHU5MvpzrWiDTB nP9mcu3u9x8WQ6O1pPHW7SpRgg12QzCrjLRIW9ybazspbMC0GD4s8k4FOJKvjrBadvillHU 4a+dzdZvPWEx+zIjkDaGJutWAlOqF5A5z1J0XL8YBZtIooc+ybUBAujKfXa/xEBsEEoe1iU KKp/A8s3ts28b/pYPDM6Q1Yya/6HeDdyLXztjAOjAnQOx5DRqRH36xybeVjP8V861k2tR5l rOUTWQneAJctGDjUjJRmKSdt6rciK925JXsUY+eIXtnemdToSDAg64FGa9USV/pWuOeJS05 ug3WFL/POfbjy8aUJbYOpKLFtQnoHTRuPlkSc67ZfXf4JPiI9zZ40tN1VyMNnBr7QcU6KXv 30ihu2Ca9jPsVyQkEnlipg== X-QQ-GoodBg: 2 X-BIZMAIL-ID: 9980876668090774109 From: juzhe.zhong@rivai.ai To: gcc-patches@gcc.gnu.org Cc: richard.sandiford@arm.com, rguenther@suse.de, rdapp.gcc@gmail.com, Ju-Zhe Zhong Subject: [PATCH] VECT: Apply LEN_MASK_GATHER_LOAD/SCATTER_STORE into vectorizer Date: Fri, 30 Jun 2023 18:41:04 +0800 Message-Id: <20230630104104.4193661-2-juzhe.zhong@rivai.ai> X-Mailer: git-send-email 2.36.3 In-Reply-To: <20230630104104.4193661-1-juzhe.zhong@rivai.ai> References: <20230630104104.4193661-1-juzhe.zhong@rivai.ai> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-QQ-SENDSIZE: 520 Feedback-ID: bizesmtp:rivai.ai:qybglogicsvrgz:qybglogicsvrgz7a-one-0 X-Spam-Status: No, score=-12.1 required=5.0 tests=BAYES_00,GIT_PATCH_0,KAM_DMARC_STATUS,RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H4,RCVD_IN_MSPIKE_WL,SPF_HELO_PASS,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: From: Ju-Zhe Zhong Hi, Richard and Richi. It seems that the implementation of LEN_MASK_GATHER_LOAD/LEN_MASK_SCATTER_STORE is simple and code change is not big. Here is an example: #include void f (uint8_t *restrict a, uint8_t *restrict b, int n, int base, int step, int *restrict cond) { for (int i = 0; i < n; ++i) { if (cond[i]) a[i * step + base] = b[i * step + base]; } } With this patch: [local count: 84095460]: _58 = (unsigned int) base_19(D); _61 = (unsigned long) b_20(D); _63 = (unsigned long) a_21(D); vect_cst__105 = [vec_duplicate_expr] _58; _110 = (unsigned long) n_16(D); [local count: 504572759]: # vect_vec_iv_.8_95 = PHI <_96(7), { 0, 1, 2, ... }(6)> # vectp_cond.9_99 = PHI # ivtmp_111 = PHI _113 = .SELECT_VL (ivtmp_111, POLY_INT_CST [4, 4]); _96 = vect_vec_iv_.8_95 + { POLY_INT_CST [4, 4], ... }; ivtmp_98 = _113 * 4; vect__24.11_101 = .LEN_MASK_LOAD (vectp_cond.9_99, 32B, _113, { -1, ... }, 0); mask__14.12_103 = vect__24.11_101 != { 0, ... }; vect__59.13_104 = VIEW_CONVERT_EXPR(vect_vec_iv_.8_95); vect__60.14_106 = vect__59.13_104 + vect_cst__105; vect__12.15_107 = VIEW_CONVERT_EXPR(vect__60.14_106); vect_patt_5.16_108 = .LEN_MASK_GATHER_LOAD (_61, vect__12.15_107, 4, { 0, ... }, _113, mask__14.12_103, 0); .LEN_MASK_SCATTER_STORE (_63, vect__12.15_107, 4, vect_patt_5.16_108, _113, mask__14.12_103, 0); vectp_cond.9_100 = vectp_cond.9_99 + ivtmp_98; ivtmp_112 = ivtmp_111 - _113; if (ivtmp_112 != 0) goto ; [83.33%] else goto ; [16.67%] gcc/ChangeLog: * optabs-query.cc (supports_vec_gather_load_p): Apply LEN_MASK_GATHER_LOAD/SCATTER_STORE. (supports_vec_scatter_store_p): Ditto. * tree-vect-data-refs.cc (vect_gather_scatter_fn_p): Ditto. * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto. (vectorizable_store): Ditto. (vectorizable_load): Ditto. --- gcc/optabs-query.cc | 2 + gcc/tree-vect-data-refs.cc | 18 ++++++++- gcc/tree-vect-stmts.cc | 81 +++++++++++++++++++++++++++++++++++++- 3 files changed, 98 insertions(+), 3 deletions(-) diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc index 2fdd0d34354..bf1f484e874 100644 --- a/gcc/optabs-query.cc +++ b/gcc/optabs-query.cc @@ -676,6 +676,7 @@ supports_vec_gather_load_p (machine_mode mode) this_fn_optabs->supports_vec_gather_load[mode] = (supports_vec_convert_optab_p (gather_load_optab, mode) || supports_vec_convert_optab_p (mask_gather_load_optab, mode) + || supports_vec_convert_optab_p (len_mask_gather_load_optab, mode) ? 1 : -1); return this_fn_optabs->supports_vec_gather_load[mode] > 0; @@ -692,6 +693,7 @@ supports_vec_scatter_store_p (machine_mode mode) this_fn_optabs->supports_vec_scatter_store[mode] = (supports_vec_convert_optab_p (scatter_store_optab, mode) || supports_vec_convert_optab_p (mask_scatter_store_optab, mode) + || supports_vec_convert_optab_p (len_mask_scatter_store_optab, mode) ? 1 : -1); return this_fn_optabs->supports_vec_scatter_store[mode] > 0; diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index ebe93832b1e..01016284c48 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -3873,16 +3873,24 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, return false; /* Work out which function we need. */ - internal_fn ifn, alt_ifn; + internal_fn ifn, alt_ifn, len_mask_ifn; if (read_p) { ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD; alt_ifn = IFN_MASK_GATHER_LOAD; + /* When target supports LEN_MASK_GATHER_LOAD, we always + use LEN_MASK_GATHER_LOAD regardless whether len and + mask are valid or not. */ + len_mask_ifn = IFN_LEN_MASK_GATHER_LOAD; } else { ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE; alt_ifn = IFN_MASK_SCATTER_STORE; + /* When target supports LEN_MASK_SCATTER_STORE, we always + use LEN_MASK_SCATTER_STORE regardless whether len and + mask are valid or not. */ + len_mask_ifn = IFN_LEN_MASK_SCATTER_STORE; } for (;;) @@ -3909,6 +3917,14 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, *offset_vectype_out = offset_vectype; return true; } + else if (internal_gather_scatter_fn_supported_p (len_mask_ifn, vectype, + memory_type, + offset_vectype, scale)) + { + *ifn_out = ifn; + *offset_vectype_out = offset_vectype; + return true; + } if (TYPE_PRECISION (offset_type) >= POINTER_SIZE && TYPE_PRECISION (offset_type) >= element_bits) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 68faa8ead39..fa0387353cf 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1771,6 +1771,17 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, gs_info->offset_vectype, gs_info->scale)) { + internal_fn len_mask_ifn + = (is_load ? IFN_LEN_MASK_GATHER_LOAD : IFN_LEN_MASK_SCATTER_STORE); + if (internal_gather_scatter_fn_supported_p (len_mask_ifn, vectype, + gs_info->memory_type, + gs_info->offset_vectype, + gs_info->scale)) + { + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); + vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); + return; + } if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "can't operate on partial vectors because" @@ -8930,7 +8941,40 @@ vectorizable_store (vec_info *vinfo, vec_offset = vec_offsets[vec_num * j + i]; tree scale = size_int (gs_info.scale); gcall *call; - if (final_mask) + if (internal_gather_scatter_fn_supported_p ( + IFN_LEN_MASK_SCATTER_STORE, vectype, + gs_info.memory_type, TREE_TYPE (vec_offset), + gs_info.scale)) + { + tree final_len = NULL_TREE; + tree bias = NULL_TREE; + if (loop_lens) + { + final_len + = vect_get_loop_len (loop_vinfo, gsi, loop_lens, + vec_num * ncopies, vectype, + vec_num * j + i, 1); + } + else + { + tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo); + final_len + = build_int_cst (iv_type, + TYPE_VECTOR_SUBPARTS (vectype)); + } + signed char biasval + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); + bias = build_int_cst (intQI_type_node, biasval); + if (!final_mask) + { + mask_vectype = truth_type_for (vectype); + final_mask = build_minus_one_cst (mask_vectype); + } + call = gimple_build_call_internal ( + IFN_LEN_MASK_SCATTER_STORE, 7, dataref_ptr, vec_offset, + scale, vec_oprnd, final_len, final_mask, bias); + } + else if (final_mask) call = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset, scale, vec_oprnd, final_mask); @@ -10368,7 +10412,40 @@ vectorizable_load (vec_info *vinfo, tree zero = build_zero_cst (vectype); tree scale = size_int (gs_info.scale); gcall *call; - if (final_mask) + if (internal_gather_scatter_fn_supported_p ( + IFN_LEN_MASK_GATHER_LOAD, vectype, + gs_info.memory_type, TREE_TYPE (vec_offset), + gs_info.scale)) + { + tree final_len = NULL_TREE; + tree bias = NULL_TREE; + if (loop_lens) + { + final_len = vect_get_loop_len ( + loop_vinfo, gsi, loop_lens, vec_num * ncopies, + vectype, vec_num * j + i, 1); + } + else + { + tree iv_type + = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo); + final_len = build_int_cst ( + iv_type, TYPE_VECTOR_SUBPARTS (vectype)); + } + signed char biasval + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); + bias = build_int_cst (intQI_type_node, biasval); + if (!final_mask) + { + mask_vectype = truth_type_for (vectype); + final_mask = build_minus_one_cst (mask_vectype); + } + call = gimple_build_call_internal ( + IFN_LEN_MASK_GATHER_LOAD, 7, dataref_ptr, + vec_offset, scale, zero, final_len, final_mask, + bias); + } + else if (final_mask) call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5, dataref_ptr, vec_offset, scale, zero, final_mask); -- 2.36.3