From: Christophe Lyon <christophe.lyon@foss.st.com>
To: <gcc-patches@gcc.gnu.org>
Subject: [PATCH v3 08/15] arm: Implement auto-vectorized MVE comparisons with vectors of boolean predicates
Date: Thu, 13 Jan 2022 15:56:18 +0100 [thread overview]
Message-ID: <20220113145645.4077141-9-christophe.lyon@foss.st.com> (raw)
In-Reply-To: <20220113145645.4077141-1-christophe.lyon@foss.st.com>
We make use of qualifier_predicate to describe MVE builtins
prototypes, restricting to auto-vectorizable vcmp* and vpsel builtins,
as they are exercised by the tests added earlier in the series.
Special handling is needed for mve_vpselq because it has a v2di
variant, which has no natural VPR.P0 representation: we keep HImode
for it.
The vector_compare expansion code is updated to use the right VxBI
mode instead of HI for the result.
We extend the existing thumb2_movhi_vfp and thumb2_movhi_fp16 patterns
to use the new MVE_7_HI iterator which covers HI and the new VxBI
modes, in conjunction with the new DB constraint for a constant vector
of booleans.
2022-01-13 Christophe Lyon <christophe.lyon@foss.st.com>
Richard Sandiford <richard.sandiford@arm.com>
gcc/
PR target/100757
PR target/101325
* config/arm/arm-builtins.c (BINOP_PRED_UNONE_UNONE_QUALIFIERS)
(BINOP_PRED_NONE_NONE_QUALIFIERS)
(TERNOP_NONE_NONE_NONE_PRED_QUALIFIERS)
(TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS): New.
* config/arm/arm-protos.h (mve_const_bool_vec_to_hi): New.
* config/arm/arm.c (arm_hard_regno_mode_ok): Handle new VxBI
modes.
(arm_mode_to_pred_mode): New.
(arm_expand_vector_compare): Use the right VxBI mode instead of
HI.
(arm_expand_vcond): Likewise.
(simd_valid_immediate): Handle MODE_VECTOR_BOOL.
(mve_const_bool_vec_to_hi): New.
(neon_make_constant): Call mve_const_bool_vec_to_hi when needed.
* config/arm/arm_mve_builtins.def (vcmpneq_, vcmphiq_, vcmpcsq_)
(vcmpltq_, vcmpleq_, vcmpgtq_, vcmpgeq_, vcmpeqq_, vcmpneq_f)
(vcmpltq_f, vcmpleq_f, vcmpgtq_f, vcmpgeq_f, vcmpeqq_f, vpselq_u)
(vpselq_s, vpselq_f): Use new predicated qualifiers.
* config/arm/constraints.md (DB): New.
* config/arm/iterators.md (MVE_7, MVE_7_HI): New mode iterators.
(MVE_VPRED, MVE_vpred): New attribute iterators.
* config/arm/mve.md (@mve_vcmp<mve_cmp_op>q_<mode>)
(@mve_vcmp<mve_cmp_op>q_f<mode>, @mve_vpselq_<supf><mode>)
(@mve_vpselq_f<mode>): Use MVE_VPRED instead of HI.
(@mve_vpselq_<supf>v2di): Define separately.
(mov<mode>): New expander for VxBI modes.
* config/arm/vfp.md (thumb2_movhi_vfp, thumb2_movhi_fp16): Use
MVE_7_HI iterator and add support for DB constraint.
gcc/testsuite/
PR target/100757
PR target/101325
* gcc.dg/rtl/arm/mve-vxbi.c: New test.
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 2ccfa37c302..36d71ab1a13 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -420,6 +420,12 @@ arm_binop_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define BINOP_UNONE_UNONE_UNONE_QUALIFIERS \
(arm_binop_unone_unone_unone_qualifiers)
+static enum arm_type_qualifiers
+arm_binop_pred_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_predicate, qualifier_unsigned, qualifier_unsigned };
+#define BINOP_PRED_UNONE_UNONE_QUALIFIERS \
+ (arm_binop_pred_unone_unone_qualifiers)
+
static enum arm_type_qualifiers
arm_binop_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_none, qualifier_immediate };
@@ -438,6 +444,12 @@ arm_binop_unone_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define BINOP_UNONE_NONE_NONE_QUALIFIERS \
(arm_binop_unone_none_none_qualifiers)
+static enum arm_type_qualifiers
+arm_binop_pred_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_predicate, qualifier_none, qualifier_none };
+#define BINOP_PRED_NONE_NONE_QUALIFIERS \
+ (arm_binop_pred_none_none_qualifiers)
+
static enum arm_type_qualifiers
arm_binop_unone_unone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_unsigned, qualifier_none };
@@ -509,6 +521,12 @@ arm_ternop_none_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define TERNOP_NONE_NONE_NONE_UNONE_QUALIFIERS \
(arm_ternop_none_none_none_unone_qualifiers)
+static enum arm_type_qualifiers
+arm_ternop_none_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_none, qualifier_none, qualifier_none, qualifier_predicate };
+#define TERNOP_NONE_NONE_NONE_PRED_QUALIFIERS \
+ (arm_ternop_none_none_none_pred_qualifiers)
+
static enum arm_type_qualifiers
arm_ternop_none_none_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_immediate, qualifier_unsigned };
@@ -528,6 +546,13 @@ arm_ternop_unone_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define TERNOP_UNONE_UNONE_UNONE_UNONE_QUALIFIERS \
(arm_ternop_unone_unone_unone_unone_qualifiers)
+static enum arm_type_qualifiers
+arm_ternop_unone_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
+ qualifier_predicate };
+#define TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS \
+ (arm_ternop_unone_unone_unone_pred_qualifiers)
+
static enum arm_type_qualifiers
arm_ternop_none_none_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index fb365ac5268..b978adf2038 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -101,6 +101,7 @@ extern char *neon_output_shift_immediate (const char *, char, rtx *,
machine_mode, int, bool);
extern void neon_pairwise_reduce (rtx, rtx, machine_mode,
rtx (*) (rtx, rtx, rtx));
+extern rtx mve_const_bool_vec_to_hi (rtx const_vec);
extern rtx neon_make_constant (rtx, bool generate = true);
extern tree arm_builtin_vectorized_function (unsigned int, tree, tree);
extern void neon_expand_vector_init (rtx, rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 64a8f2dc7de..fa18c7bd3fe 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -12750,7 +12750,10 @@ simd_valid_immediate (rtx op, machine_mode mode, int inverse,
innersize = GET_MODE_UNIT_SIZE (mode);
/* Only support 128-bit vectors for MVE. */
- if (TARGET_HAVE_MVE && (!vector || n_elts * innersize != 16))
+ if (TARGET_HAVE_MVE
+ && (!vector
+ || (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ || n_elts * innersize != 16))
return -1;
/* Vectors of float constants. */
@@ -13115,6 +13118,29 @@ neon_vdup_constant (rtx vals, bool generate)
return gen_vec_duplicate (mode, x);
}
+/* Return a HI representation of CONST_VEC suitable for MVE predicates. */
+rtx
+mve_const_bool_vec_to_hi (rtx const_vec)
+{
+ int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec));
+ int repeat = 16 / n_elts;
+ int i;
+ int hi_val = 0;
+
+ for (i = 0; i < n_elts; i++)
+ {
+ rtx el = CONST_VECTOR_ELT (const_vec, i);
+ unsigned HOST_WIDE_INT elpart;
+
+ gcc_assert (CONST_INT_P (el));
+ elpart = INTVAL (el);
+
+ for (int j = 0; j < repeat; j++)
+ hi_val |= elpart << (i * repeat + j);
+ }
+ return GEN_INT (hi_val);
+}
+
/* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
into a register.
@@ -13155,6 +13181,8 @@ neon_make_constant (rtx vals, bool generate)
&& simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
/* Load using VMOV. On Cortex-A8 this takes one cycle. */
return const_vec;
+ else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL))
+ return mve_const_bool_vec_to_hi (const_vec);
else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
/* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
pipeline cycle; creating the constant takes one or two ARM
@@ -25313,7 +25341,10 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
return false;
if (IS_VPR_REGNUM (regno))
- return mode == HImode;
+ return mode == HImode
+ || mode == V16BImode
+ || mode == V8BImode
+ || mode == V4BImode;
if (TARGET_THUMB1)
/* For the Thumb we only allow values bigger than SImode in
@@ -31001,6 +31032,19 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
arm_post_atomic_barrier (model);
}
\f
+/* Return the mode for the MVE vector of predicates corresponding to MODE. */
+machine_mode
+arm_mode_to_pred_mode (machine_mode mode)
+{
+ switch (GET_MODE_NUNITS (mode))
+ {
+ case 16: return V16BImode;
+ case 8: return V8BImode;
+ case 4: return V4BImode;
+ }
+ gcc_unreachable ();
+}
+
/* Expand code to compare vectors OP0 and OP1 using condition CODE.
If CAN_INVERT, store either the result or its inverse in TARGET
and return true if TARGET contains the inverse. If !CAN_INVERT,
@@ -31084,7 +31128,7 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
if (vcond_mve)
vpr_p0 = target;
else
- vpr_p0 = gen_reg_rtx (HImode);
+ vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
switch (GET_MODE_CLASS (cmp_mode))
{
@@ -31126,7 +31170,7 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
if (vcond_mve)
vpr_p0 = target;
else
- vpr_p0 = gen_reg_rtx (HImode);
+ vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
if (!vcond_mve)
@@ -31153,7 +31197,7 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
if (vcond_mve)
vpr_p0 = target;
else
- vpr_p0 = gen_reg_rtx (HImode);
+ vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, vpr_p0, force_reg (cmp_mode, op1), op0));
if (!vcond_mve)
@@ -31206,7 +31250,7 @@ arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
if (TARGET_HAVE_MVE)
{
vcond_mve=true;
- mask = gen_reg_rtx (HImode);
+ mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode));
}
else
mask = gen_reg_rtx (cmp_result_mode);
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index c3ae40765fe..44b41eab4c5 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -89,7 +89,7 @@ VAR3 (BINOP_UNONE_UNONE_IMM, vshrq_n_u, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_IMM, vshrq_n_s, v16qi, v8hi, v4si)
VAR1 (BINOP_NONE_NONE_UNONE, vaddlvq_p_s, v4si)
VAR1 (BINOP_UNONE_UNONE_UNONE, vaddlvq_p_u, v4si)
-VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_, v16qi, v8hi, v4si)
+VAR3 (BINOP_PRED_NONE_NONE, vcmpneq_, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vshlq_s, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_NONE, vshlq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vsubq_u, v16qi, v8hi, v4si)
@@ -117,9 +117,9 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vhsubq_n_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_n_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, veorq_u, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_UNONE_UNONE, vcmphiq_, v16qi, v8hi, v4si)
+VAR3 (BINOP_PRED_UNONE_UNONE, vcmphiq_, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmphiq_n_, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_, v16qi, v8hi, v4si)
+VAR3 (BINOP_PRED_UNONE_UNONE, vcmpcsq_, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si)
@@ -143,15 +143,15 @@ VAR3 (BINOP_UNONE_UNONE_IMM, vshlq_n_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_IMM, vrshrq_n_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_IMM, vqshlq_n_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_n_, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_NONE_NONE, vcmpltq_, v16qi, v8hi, v4si)
+VAR3 (BINOP_PRED_NONE_NONE, vcmpltq_, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_NONE_NONE, vcmpltq_n_, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_NONE_NONE, vcmpleq_, v16qi, v8hi, v4si)
+VAR3 (BINOP_PRED_NONE_NONE, vcmpleq_, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_NONE_NONE, vcmpleq_n_, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_NONE_NONE, vcmpgtq_, v16qi, v8hi, v4si)
+VAR3 (BINOP_PRED_NONE_NONE, vcmpgtq_, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_NONE_NONE, vcmpgtq_n_, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_NONE_NONE, vcmpgeq_, v16qi, v8hi, v4si)
+VAR3 (BINOP_PRED_NONE_NONE, vcmpgeq_, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_NONE_NONE, vcmpgeq_n_, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_NONE_NONE, vcmpeqq_, v16qi, v8hi, v4si)
+VAR3 (BINOP_PRED_NONE_NONE, vcmpeqq_, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_NONE_NONE, vcmpeqq_n_, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_NONE_IMM, vqshluq_n_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_UNONE, vaddvq_p_s, v16qi, v8hi, v4si)
@@ -219,17 +219,17 @@ VAR2 (BINOP_UNONE_UNONE_IMM, vshllbq_n_u, v16qi, v8hi)
VAR2 (BINOP_UNONE_UNONE_IMM, vorrq_n_u, v8hi, v4si)
VAR2 (BINOP_UNONE_UNONE_IMM, vbicq_n_u, v8hi, v4si)
VAR2 (BINOP_UNONE_NONE_NONE, vcmpneq_n_f, v8hf, v4sf)
-VAR2 (BINOP_UNONE_NONE_NONE, vcmpneq_f, v8hf, v4sf)
+VAR2 (BINOP_PRED_NONE_NONE, vcmpneq_f, v8hf, v4sf)
VAR2 (BINOP_UNONE_NONE_NONE, vcmpltq_n_f, v8hf, v4sf)
-VAR2 (BINOP_UNONE_NONE_NONE, vcmpltq_f, v8hf, v4sf)
+VAR2 (BINOP_PRED_NONE_NONE, vcmpltq_f, v8hf, v4sf)
VAR2 (BINOP_UNONE_NONE_NONE, vcmpleq_n_f, v8hf, v4sf)
-VAR2 (BINOP_UNONE_NONE_NONE, vcmpleq_f, v8hf, v4sf)
+VAR2 (BINOP_PRED_NONE_NONE, vcmpleq_f, v8hf, v4sf)
VAR2 (BINOP_UNONE_NONE_NONE, vcmpgtq_n_f, v8hf, v4sf)
-VAR2 (BINOP_UNONE_NONE_NONE, vcmpgtq_f, v8hf, v4sf)
+VAR2 (BINOP_PRED_NONE_NONE, vcmpgtq_f, v8hf, v4sf)
VAR2 (BINOP_UNONE_NONE_NONE, vcmpgeq_n_f, v8hf, v4sf)
-VAR2 (BINOP_UNONE_NONE_NONE, vcmpgeq_f, v8hf, v4sf)
+VAR2 (BINOP_PRED_NONE_NONE, vcmpgeq_f, v8hf, v4sf)
VAR2 (BINOP_UNONE_NONE_NONE, vcmpeqq_n_f, v8hf, v4sf)
-VAR2 (BINOP_UNONE_NONE_NONE, vcmpeqq_f, v8hf, v4sf)
+VAR2 (BINOP_PRED_NONE_NONE, vcmpeqq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vsubq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vqmovntq_s, v8hi, v4si)
VAR2 (BINOP_NONE_NONE_NONE, vqmovnbq_s, v8hi, v4si)
@@ -295,8 +295,8 @@ VAR2 (TERNOP_UNONE_UNONE_NONE_UNONE, vcvtaq_m_u, v8hi, v4si)
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vcvtaq_m_s, v8hi, v4si)
VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vshlcq_vec_u, v16qi, v8hi, v4si)
VAR3 (TERNOP_NONE_NONE_UNONE_IMM, vshlcq_vec_s, v16qi, v8hi, v4si)
-VAR4 (TERNOP_UNONE_UNONE_UNONE_UNONE, vpselq_u, v16qi, v8hi, v4si, v2di)
-VAR4 (TERNOP_NONE_NONE_NONE_UNONE, vpselq_s, v16qi, v8hi, v4si, v2di)
+VAR4 (TERNOP_UNONE_UNONE_UNONE_PRED, vpselq_u, v16qi, v8hi, v4si, v2di)
+VAR4 (TERNOP_NONE_NONE_NONE_PRED, vpselq_s, v16qi, v8hi, v4si, v2di)
VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrev64q_m_u, v16qi, v8hi, v4si)
VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmvnq_m_u, v16qi, v8hi, v4si)
VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlasq_n_u, v16qi, v8hi, v4si)
@@ -426,7 +426,7 @@ VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrev64q_m_f, v8hf, v4sf)
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrev32q_m_s, v16qi, v8hi)
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vqmovntq_m_s, v8hi, v4si)
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vqmovnbq_m_s, v8hi, v4si)
-VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vpselq_f, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_PRED, vpselq_f, v8hf, v4sf)
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vnegq_m_f, v8hf, v4sf)
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmovntq_m_s, v8hi, v4si)
VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmovnbq_m_s, v8hi, v4si)
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 1920004b450..2b411b0cb0f 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -312,6 +312,12 @@ (define_constraint "Dz"
(and (match_code "const_vector")
(match_test "(TARGET_NEON || TARGET_HAVE_MVE) && op == CONST0_RTX (mode)")))
+(define_constraint "DB"
+ "@internal
+ In ARM/Thumb-2 state with MVE a constant vector of booleans."
+ (and (match_code "const_vector")
+ (match_test "TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL")))
+
(define_constraint "Da"
"@internal
In ARM/Thumb-2 state a const_int, const_double or const_vector that can
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 8202c27cc82..37cf7971be8 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -272,6 +272,8 @@ (define_mode_iterator MVE_3 [V16QI V8HI])
(define_mode_iterator MVE_2 [V16QI V8HI V4SI])
(define_mode_iterator MVE_5 [V8HI V4SI])
(define_mode_iterator MVE_6 [V8HI V4SI])
+(define_mode_iterator MVE_7 [V16BI V8BI V4BI])
+(define_mode_iterator MVE_7_HI [HI V16BI V8BI V4BI])
;;----------------------------------------------------------------------------
;; Code iterators
@@ -946,6 +948,10 @@ (define_mode_attr V_extr_elem [(V16QI "u8") (V8HI "u16") (V4SI "32")
(V8HF "u16") (V4SF "32")])
(define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w")
(V8HF "=w") (V4SF "=&w")])
+(define_mode_attr MVE_VPRED [(V16QI "V16BI") (V8HI "V8BI") (V4SI "V4BI")
+ (V2DI "HI") (V8HF "V8BI") (V4SF "V4BI")])
+(define_mode_attr MVE_vpred [(V16QI "v16bi") (V8HI "v8bi") (V4SI "v4bi")
+ (V2DI "hi") (V8HF "v8bi") (V4SF "v4bi")])
;;----------------------------------------------------------------------------
;; Code attributes
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 5c3b34dce3a..983aa10e652 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -839,8 +839,8 @@ (define_insn "mve_vaddlvq_p_<supf>v4si"
;;
(define_insn "@mve_vcmp<mve_cmp_op>q_<mode>"
[
- (set (match_operand:HI 0 "vpr_register_operand" "=Up")
- (MVE_COMPARISONS:HI (match_operand:MVE_2 1 "s_register_operand" "w")
+ (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
+ (MVE_COMPARISONS:<MVE_VPRED> (match_operand:MVE_2 1 "s_register_operand" "w")
(match_operand:MVE_2 2 "s_register_operand" "w")))
]
"TARGET_HAVE_MVE"
@@ -1929,8 +1929,8 @@ (define_insn "mve_vcaddq<mve_rot><mode>"
;;
(define_insn "@mve_vcmp<mve_cmp_op>q_f<mode>"
[
- (set (match_operand:HI 0 "vpr_register_operand" "=Up")
- (MVE_FP_COMPARISONS:HI (match_operand:MVE_0 1 "s_register_operand" "w")
+ (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
+ (MVE_FP_COMPARISONS:<MVE_VPRED> (match_operand:MVE_0 1 "s_register_operand" "w")
(match_operand:MVE_0 2 "s_register_operand" "w")))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
@@ -3324,7 +3324,7 @@ (define_insn "@mve_vpselq_<supf><mode>"
(set (match_operand:MVE_1 0 "s_register_operand" "=w")
(unspec:MVE_1 [(match_operand:MVE_1 1 "s_register_operand" "w")
(match_operand:MVE_1 2 "s_register_operand" "w")
- (match_operand:HI 3 "vpr_register_operand" "Up")]
+ (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
VPSELQ))
]
"TARGET_HAVE_MVE"
@@ -4419,7 +4419,7 @@ (define_insn "@mve_vpselq_f<mode>"
(set (match_operand:MVE_0 0 "s_register_operand" "=w")
(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
(match_operand:MVE_0 2 "s_register_operand" "w")
- (match_operand:HI 3 "vpr_register_operand" "Up")]
+ (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
VPSELQ_F))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
@@ -10516,3 +10516,14 @@ (define_insn "*movmisalign<mode>_mve_load"
"vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1"
[(set_attr "type" "mve_load")]
)
+
+;; Expander for VxBI moves
+(define_expand "mov<mode>"
+ [(set (match_operand:MVE_7 0 "nonimmediate_operand")
+ (match_operand:MVE_7 1 "general_operand"))]
+ "TARGET_HAVE_MVE"
+ {
+ if (!register_operand (operands[0], <MODE>mode))
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ }
+)
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index f5ccb92d097..f00d1cad3e9 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -73,21 +73,26 @@ (define_insn "*arm_movhi_vfp"
(define_insn "*thumb2_movhi_vfp"
[(set
- (match_operand:HI 0 "nonimmediate_operand"
+ (match_operand:MVE_7_HI 0 "nonimmediate_operand"
"=rk, r, l, r, m, r, *t, r, *t, Up, r")
- (match_operand:HI 1 "general_operand"
- "rk, I, Py, n, r, m, r, *t, *t, r, Up"))]
+ (match_operand:MVE_7_HI 1 "general_operand"
+ "rk, IDB, Py, n, r, m, r, *t, *t, r, Up"))]
"TARGET_THUMB2 && TARGET_VFP_BASE
&& !TARGET_VFP_FP16INST
- && (register_operand (operands[0], HImode)
- || register_operand (operands[1], HImode))"
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
{
switch (which_alternative)
{
case 0:
- case 1:
case 2:
return "mov%?\t%0, %1\t%@ movhi";
+ case 1:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_BOOL)
+ operands[1] = mve_const_bool_vec_to_hi (operands[1]);
+ else
+ operands[1] = gen_lowpart (HImode, operands[1]);
+ return "mov%?\t%0, %1\t%@ movhi";
case 3:
return "movw%?\t%0, %L1\t%@ movhi";
case 4:
@@ -173,20 +178,25 @@ (define_insn "*arm_movhi_fp16"
(define_insn "*thumb2_movhi_fp16"
[(set
- (match_operand:HI 0 "nonimmediate_operand"
+ (match_operand:MVE_7_HI 0 "nonimmediate_operand"
"=rk, r, l, r, m, r, *t, r, *t, Up, r")
- (match_operand:HI 1 "general_operand"
- "rk, I, Py, n, r, m, r, *t, *t, r, Up"))]
+ (match_operand:MVE_7_HI 1 "general_operand"
+ "rk, IDB, Py, n, r, m, r, *t, *t, r, Up"))]
"TARGET_THUMB2 && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE)
- && (register_operand (operands[0], HImode)
- || register_operand (operands[1], HImode))"
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
{
switch (which_alternative)
{
case 0:
- case 1:
case 2:
return "mov%?\t%0, %1\t%@ movhi";
+ case 1:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_BOOL)
+ operands[1] = mve_const_bool_vec_to_hi (operands[1]);
+ else
+ operands[1] = gen_lowpart (HImode, operands[1]);
+ return "mov%?\t%0, %1\t%@ movhi";
case 3:
return "movw%?\t%0, %L1\t%@ movhi";
case 4:
diff --git a/gcc/testsuite/gcc.dg/rtl/arm/mve-vxbi.c b/gcc/testsuite/gcc.dg/rtl/arm/mve-vxbi.c
new file mode 100644
index 00000000000..093283ed43c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/arm/mve-vxbi.c
@@ -0,0 +1,89 @@
+/* { dg-do compile { target arm*-*-* } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+void __RTL (startwith ("ira")) foo (void *ptr)
+{
+ (function "foo"
+ (param "ptr"
+ (DECL_RTL (reg/v:SI <0> [ ptr ]))
+ (DECL_RTL_INCOMING (reg:SI r0 [ ptr ]))
+ ) ;; param "n"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 5 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (insn 7 (set (reg:V4BI <1>)
+ (const_vector:V4BI [(const_int 1)
+ (const_int 0)
+ (const_int 0)
+ (const_int 1)])) (nil))
+ (insn 8 (set (mem:V4BI (reg:SI <0>) [1 ptr+0 S2 A16]) (reg:V4BI <1>)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ ) ;; function
+}
+
+void __RTL (startwith ("ira")) foo2 (void *ptr)
+{
+ (function "foo"
+ (param "ptr"
+ (DECL_RTL (reg/v:SI <0> [ ptr ]))
+ (DECL_RTL_INCOMING (reg:SI r0 [ ptr ]))
+ ) ;; param "n"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 5 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (insn 7 (set (reg:V8BI <1>)
+ (const_vector:V8BI [(const_int 1)
+ (const_int 0)
+ (const_int 1)
+ (const_int 1)
+ (const_int 1)
+ (const_int 1)
+ (const_int 0)
+ (const_int 1)])) (nil))
+ (insn 8 (set (mem:V8BI (reg:SI <0>) [1 ptr+0 S2 A16]) (reg:V8BI <1>)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ ) ;; function
+}
+
+void __RTL (startwith ("ira")) foo3 (void *ptr)
+{
+ (function "foo"
+ (param "ptr"
+ (DECL_RTL (reg/v:SI <0> [ ptr ]))
+ (DECL_RTL_INCOMING (reg:SI r0 [ ptr ]))
+ ) ;; param "n"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 5 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (insn 7 (set (reg:V16BI <1>)
+ (const_vector:V16BI [(const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)])) (nil))
+ (insn 8 (set (mem:V16BI (reg:SI <0>) [1 ptr+0 S2 A16]) (reg:V16BI <1>)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ ) ;; function
+}
--
2.25.1
next prev parent reply other threads:[~2022-01-13 14:59 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-13 14:56 [PATCH v3 00/15] ARM/MVE use vectors of boolean for predicates Christophe Lyon
2022-01-13 14:56 ` [PATCH v3 01/15] arm: Add new tests for comparison vectorization with Neon and MVE Christophe Lyon
2022-01-13 14:56 ` [PATCH v3 02/15] arm: Add tests for PR target/100757 Christophe Lyon
2022-01-13 14:56 ` [PATCH v3 03/15] arm: Add tests for PR target/101325 Christophe Lyon
2022-01-13 14:56 ` [PATCH v3 04/15] arm: Add GENERAL_AND_VPR_REGS regclass Christophe Lyon
2022-01-19 18:17 ` Andre Vieira (lists)
2022-01-20 9:14 ` Christophe Lyon
2022-01-20 9:43 ` Andre Vieira (lists)
2022-01-20 10:40 ` Richard Sandiford
2022-01-20 10:45 ` Andre Vieira (lists)
2022-01-27 16:21 ` Kyrylo Tkachov
2022-01-13 14:56 ` [PATCH v3 05/15] arm: Add support for VPR_REG in arm_class_likely_spilled_p Christophe Lyon
2022-01-19 18:25 ` Andre Vieira (lists)
2022-01-20 9:20 ` Christophe Lyon
2022-01-13 14:56 ` [PATCH v3 06/15] arm: Fix mve_vmvnq_n_<supf><mode> argument mode Christophe Lyon
2022-01-19 19:03 ` Andre Vieira (lists)
2022-01-20 9:23 ` Christophe Lyon
2022-01-20 9:38 ` Andre Simoes Dias Vieira
2022-01-20 9:44 ` Christophe Lyon
2022-01-20 10:45 ` Richard Sandiford
2022-01-20 11:06 ` Andre Vieira (lists)
2022-01-13 14:56 ` [PATCH v3 07/15] arm: Implement MVE predicates as vectors of booleans Christophe Lyon
2022-01-21 11:20 ` Andre Vieira (lists)
2022-01-21 22:30 ` Christophe Lyon
2022-01-27 16:28 ` Kyrylo Tkachov
2022-01-27 18:10 ` Christophe Lyon
2022-01-31 18:01 ` Richard Sandiford
2022-01-31 22:57 ` Christophe Lyon
2022-02-01 3:42 ` Richard Sandiford
2022-02-02 16:51 ` Christophe Lyon
2022-02-04 9:42 ` Richard Sandiford
2022-02-04 9:54 ` Richard Sandiford
2022-02-17 15:39 ` Christophe Lyon
2022-02-21 18:18 ` Richard Sandiford
2022-01-13 14:56 ` Christophe Lyon [this message]
2022-01-27 16:37 ` [PATCH v3 08/15] arm: Implement auto-vectorized MVE comparisons with vectors of boolean predicates Kyrylo Tkachov
2022-01-13 14:56 ` [PATCH v3 09/15] arm: Fix vcond_mask expander for MVE (PR target/100757) Christophe Lyon
2022-01-27 16:55 ` Kyrylo Tkachov
2022-01-13 14:56 ` [PATCH v3 10/15] arm: Convert remaining MVE vcmp builtins to predicate qualifiers Christophe Lyon
2022-01-13 14:56 ` [PATCH v3 11/15] arm: Convert more MVE " Christophe Lyon
2022-01-13 14:56 ` [PATCH v3 12/15] arm: Convert more load/store " Christophe Lyon
2022-01-27 16:56 ` Kyrylo Tkachov
2022-01-13 14:56 ` [PATCH v3 13/15] arm: Convert more MVE/CDE " Christophe Lyon
2022-01-27 16:56 ` Kyrylo Tkachov
2022-01-13 14:56 ` [PATCH v3 14/15] arm: Add VPR_REG to ALL_REGS Christophe Lyon
2022-01-13 14:56 ` [PATCH v3 15/15] arm: Fix constraint check for V8HI in mve_vector_mem_operand Christophe Lyon
2022-01-14 17:03 ` [arm] MVE: Relax addressing modes for full loads and stores Andre Vieira (lists)
2022-01-17 7:48 ` Christophe Lyon
2022-03-07 14:16 ` Andre Vieira (lists)
2022-03-07 16:14 ` Kyrylo Tkachov
2022-01-14 13:18 ` [PATCH v3 00/15] ARM/MVE use vectors of boolean for predicates Christophe Lyon
2022-01-14 13:33 ` Richard Biener
2022-01-14 14:22 ` Kyrylo Tkachov
2022-01-26 8:40 ` Christophe Lyon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220113145645.4077141-9-christophe.lyon@foss.st.com \
--to=christophe.lyon@foss.st.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).