From: Robin Dapp <rdapp@linux.ibm.com>
To: Robin Dapp via Gcc-patches <gcc-patches@gcc.gnu.org>,
richard.sandiford@arm.com
Subject: Re: [RFC] Partial vectors for s390
Date: Tue, 26 Oct 2021 15:04:41 +0200 [thread overview]
Message-ID: <be7f6da6-59f7-b894-90cd-f2cabcd4e081@linux.ibm.com> (raw)
In-Reply-To: <mptmtn42hfq.fsf@arm.com>
[-- Attachment #1: Type: text/plain, Size: 1077 bytes --]
Hi Richard,
> We already have code to probe the predicates of the underlying
> define_expands/insns to see whether they support certain constant
> IFN arguments; see e.g. internal_gather_scatter_fn_supported_p.
> We could do something similar here: add an extra operand to the optab,
> and an extra argument to the IFN, that gives a bias amount.
> The PowerPC version would require 0, the System Z version would
> require -1. The vectoriser would probe to see which value
> it should use.
>
> Doing it that way ensures that the gimple is still self-describing.
> It avoids gimple semantics depending on target hooks.
As I don't have much previous exposure to the vectoriser code, I cobbled
together something pretty ad-hoc (attached). Does this come somehow
close to what you have in mind?
internal_len_load_supported_p should rather be called
internal_len_load_bias_supported_p or so I guess and the part where we
exclude multiple loop_lens is still missing. Would we also check for a
viable bias there and then either accept multiple lens or not?
Regards
Robin
[-- Attachment #2: vll-bias.patch --]
[-- Type: text/x-patch, Size: 7682 bytes --]
commit 2320dbfdfe1477b15a2ac59847d2a52e68de49ab
Author: Robin Dapp <rdapp@linux.ibm.com>
Date: Tue Oct 26 14:36:08 2021 +0200
bias1
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 8312d08aab2..bf97d3e471a 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -2696,9 +2696,9 @@ expand_call_mem_ref (tree type, gcall *stmt, int index)
static void
expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
{
- class expand_operand ops[3];
- tree type, lhs, rhs, maskt;
- rtx mem, target, mask;
+ class expand_operand ops[4];
+ tree type, lhs, rhs, maskt, biast;
+ rtx mem, target, mask, bias;
insn_code icode;
maskt = gimple_call_arg (stmt, 2);
@@ -2727,7 +2727,18 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
TYPE_UNSIGNED (TREE_TYPE (maskt)));
else
create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
- expand_insn (icode, 3, ops);
+ if (optab == len_load_optab)
+ {
+ biast = gimple_call_arg (stmt, 3);
+ bias = expand_normal (biast);
+ create_input_operand (&ops[3], bias, SImode);
+ }
+
+ if (optab != len_load_optab)
+ expand_insn (icode, 3, ops);
+ else
+ expand_insn (icode, 4, ops);
+
if (!rtx_equal_p (target, ops[0].value))
emit_move_insn (target, ops[0].value);
}
@@ -2741,9 +2752,9 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
static void
expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
{
- class expand_operand ops[3];
- tree type, lhs, rhs, maskt;
- rtx mem, reg, mask;
+ class expand_operand ops[4];
+ tree type, lhs, rhs, maskt, biast;
+ rtx mem, reg, mask, bias;
insn_code icode;
maskt = gimple_call_arg (stmt, 2);
@@ -2770,7 +2781,17 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
TYPE_UNSIGNED (TREE_TYPE (maskt)));
else
create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
- expand_insn (icode, 3, ops);
+ if (optab == len_store_optab)
+ {
+ biast = gimple_call_arg (stmt, 4);
+ bias = expand_normal (biast);
+ create_input_operand (&ops[3], bias, SImode);
+ }
+
+ if (optab != len_store_optab)
+ expand_insn (icode, 3, ops);
+ else
+ expand_insn (icode, 4, ops);
}
#define expand_mask_store_optab_fn expand_partial_store_optab_fn
@@ -4154,6 +4175,25 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
&& insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)));
}
+bool
+internal_len_load_supported_p (internal_fn ifn, tree load_type, int bias)
+{
+ if (bias > 0 || bias < -1)
+ return false;
+
+ machine_mode mode = TYPE_MODE (load_type);
+
+ optab optab = direct_internal_fn_optab (ifn);
+ insn_code icode = direct_optab_handler (optab, mode);
+ int output_ops = internal_load_fn_p (ifn) ? 1 : 0;
+
+ if (icode != CODE_FOR_nothing
+ && insn_operand_matches (icode, 2 + output_ops, GEN_INT (bias)))
+ return true;
+
+ return false;
+}
+
/* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
for pointers of type TYPE when the accesses have LENGTH bytes and their
common byte alignment is ALIGN. */
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 19d0f849a5a..d0bf9941bcc 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -225,6 +225,7 @@ extern int internal_fn_mask_index (internal_fn);
extern int internal_fn_stored_value_index (internal_fn);
extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
tree, tree, int);
+extern bool internal_len_load_supported_p (internal_fn ifn, tree, int);
extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
poly_uint64, unsigned int);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index d7723b1a92a..50537763ace 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -8272,12 +8272,14 @@ vectorizable_store (vec_info *vinfo,
opt_machine_mode new_ovmode
= get_len_load_store_mode (vmode, false);
machine_mode new_vmode = new_ovmode.require ();
+ tree vtype = vectype;
/* Need conversion if it's wrapped with VnQI. */
if (vmode != new_vmode)
{
tree new_vtype
= build_vector_type_for_mode (unsigned_intQI_type_node,
- new_vmode);
+ new_vmode);
+ vtype = new_vtype;
tree var
= vect_get_new_ssa_name (new_vtype, vect_simple_var);
vec_oprnd
@@ -8289,9 +8291,29 @@ vectorizable_store (vec_info *vinfo,
gsi);
vec_oprnd = var;
}
+
+ /* Check which bias value to use. Default is 0. */
+ tree bias = build_int_cst (intSI_type_node, 0);
+ tree new_len = final_len;
+ if (!internal_len_load_supported_p (IFN_LEN_LOAD, vtype, 0)
+ && internal_len_load_supported_p (IFN_LEN_LOAD,
+ vtype, -1))
+ {
+ bias = build_int_cst (intSI_type_node, -1);
+ new_len = make_ssa_name (TREE_TYPE (final_len));
+ gassign *m1 = gimple_build_assign (new_len,
+ MINUS_EXPR,
+ final_len,
+ build_one_cst
+ (TREE_TYPE
+ (final_len)));
+ vect_finish_stmt_generation (vinfo, stmt_info, m1,
+ gsi);
+ }
gcall *call
- = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
- ptr, final_len, vec_oprnd);
+ = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
+ ptr, new_len, vec_oprnd,
+ bias);
gimple_call_set_nothrow (call, true);
vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
new_stmt = call;
@@ -9588,24 +9610,50 @@ vectorizable_load (vec_info *vinfo,
vec_num * j + i);
tree ptr = build_int_cst (ref_type,
align * BITS_PER_UNIT);
+
+ machine_mode vmode = TYPE_MODE (vectype);
+ opt_machine_mode new_ovmode
+ = get_len_load_store_mode (vmode, true);
+ machine_mode new_vmode = new_ovmode.require ();
+ tree qi_type = unsigned_intQI_type_node;
+ tree new_vtype
+ = build_vector_type_for_mode (qi_type, new_vmode);
+
+ tree vtype = vectype;
+ if (vmode != new_vmode)
+ vtype = new_vtype;
+
+ /* Check which bias value to use. Default is 0. */
+ tree bias = build_int_cst (intSI_type_node, 0);
+ tree new_len = final_len;
+ if (!internal_len_load_supported_p (IFN_LEN_LOAD,
+ vtype, 0)
+ && internal_len_load_supported_p (IFN_LEN_LOAD,
+ vtype, -1))
+ {
+ bias = build_int_cst (intSI_type_node, -1);
+ new_len = make_ssa_name (TREE_TYPE (final_len));
+ gassign *m1 = gimple_build_assign (new_len,
+ MINUS_EXPR,
+ final_len,
+ build_one_cst
+ (TREE_TYPE
+ (final_len)));
+ vect_finish_stmt_generation (vinfo, stmt_info, m1,
+ gsi);
+ }
+
gcall *call
- = gimple_build_call_internal (IFN_LEN_LOAD, 3,
+ = gimple_build_call_internal (IFN_LEN_LOAD, 4,
dataref_ptr, ptr,
- final_len);
+ new_len, bias);
gimple_call_set_nothrow (call, true);
new_stmt = call;
data_ref = NULL_TREE;
/* Need conversion if it's wrapped with VnQI. */
- machine_mode vmode = TYPE_MODE (vectype);
- opt_machine_mode new_ovmode
- = get_len_load_store_mode (vmode, true);
- machine_mode new_vmode = new_ovmode.require ();
if (vmode != new_vmode)
{
- tree qi_type = unsigned_intQI_type_node;
- tree new_vtype
- = build_vector_type_for_mode (qi_type, new_vmode);
tree var = vect_get_new_ssa_name (new_vtype,
vect_simple_var);
gimple_set_lhs (call, var);
next prev parent reply other threads:[~2021-10-26 13:04 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-10-20 8:34 Robin Dapp
2021-10-20 9:07 ` Richard Sandiford
2021-10-26 13:04 ` Robin Dapp [this message]
2021-10-26 14:18 ` Richard Sandiford
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=be7f6da6-59f7-b894-90cd-f2cabcd4e081@linux.ibm.com \
--to=rdapp@linux.ibm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=richard.sandiford@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).