public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/clyon/heads/mve-autovec)] arm: Implement MVE predicates as vectors of booleans
@ 2022-01-12 8:27 Christophe Lyon
0 siblings, 0 replies; 5+ messages in thread
From: Christophe Lyon @ 2022-01-12 8:27 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:8e9ce79dd16270c18fb7d018eef3e91cd42ab4f2
commit 8e9ce79dd16270c18fb7d018eef3e91cd42ab4f2
Author: Christophe Lyon <christophe.lyon@foss.st.com>
Date: Wed Oct 13 09:16:22 2021 +0000
arm: Implement MVE predicates as vectors of booleans
This patch implements support for vectors of booleans to support MVE
predicates, instead of HImode. Since the ABI mandates pred16_t (aka
uint16_t) to represent predicates in intrinsics prototypes, we
introduce a new "predicate" type qualifier so that we can map relevant
builtins HImode arguments and return value to the appropriate vector
of booleans (VxBI).
We have to update test_vector_ops_duplicate, because it iterates using
an offset in bytes, where we would need to iterate in bits: we stop
iterating when we reach the end of the vector of booleans.
2021-10-13 Christophe Lyon <christophe.lyon@foss.st.com>
gcc/
PR target/100757
PR target/101325
* config/arm/arm-builtins.c (arm_type_qualifiers): Add qualifier_predicate.
(arm_init_simd_builtin_types): Add new simd types.
(arm_init_builtin): Map predicate vectors arguments to HImode.
(arm_expand_builtin_args): Move HImode predicate arguments to VxBI
rtx. Move return value to HImode rtx.
* config/arm/arm-modes.def (V16BI, V8BI, V4BI): New modes.
* config/arm/arm-simd-builtin-types.def (Pred1x16_t,
Pred2x8_t,Pred4x4_t): New.
* simplify-rtx.c (test_vector_ops_duplicate): Skip vec_merge test
with vectors of booleans.
Diff:
---
gcc/config/arm/arm-builtins.c | 28 +++++++++++++++++++++++++++-
gcc/config/arm/arm-modes.def | 5 +++++
gcc/config/arm/arm-simd-builtin-types.def | 4 ++++
gcc/simplify-rtx.c | 26 ++++++++++++++++----------
4 files changed, 52 insertions(+), 11 deletions(-)
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 3a9ff8f26b8..771759f0cdd 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -92,7 +92,9 @@ enum arm_type_qualifiers
qualifier_lane_pair_index = 0x1000,
/* Lane indices selected in quadtuplets - must be within range of previous
argument = a vector. */
- qualifier_lane_quadtup_index = 0x2000
+ qualifier_lane_quadtup_index = 0x2000,
+ /* MVE vector predicates. */
+ qualifier_predicate = 0x4000
};
/* The qualifier_internal allows generation of a unary builtin from
@@ -1633,6 +1635,13 @@ arm_init_simd_builtin_types (void)
arm_simd_types[Bfloat16x4_t].eltype = arm_bf16_type_node;
arm_simd_types[Bfloat16x8_t].eltype = arm_bf16_type_node;
+ if (TARGET_HAVE_MVE)
+ {
+ arm_simd_types[Pred1x16_t].eltype = unsigned_intHI_type_node;
+ arm_simd_types[Pred2x8_t].eltype = unsigned_intHI_type_node;
+ arm_simd_types[Pred4x4_t].eltype = unsigned_intHI_type_node;
+ }
+
for (i = 0; i < nelts; i++)
{
tree eltype = arm_simd_types[i].eltype;
@@ -1780,6 +1789,11 @@ arm_init_builtin (unsigned int fcode, arm_builtin_datum *d,
if (qualifiers & qualifier_map_mode)
op_mode = d->mode;
+ /* MVE Predicates use HImode as mandated by the ABI: pred16_t is unsigned
+ short. */
+ if (qualifiers & qualifier_predicate)
+ op_mode = HImode;
+
/* For pointers, we want a pointer to the basic type
of the vector. */
if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
@@ -3024,6 +3038,11 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode,
case ARG_BUILTIN_COPY_TO_REG:
if (POINTER_TYPE_P (TREE_TYPE (arg[argc])))
op[argc] = convert_memory_address (Pmode, op[argc]);
+
+ /* MVE uses mve_pred16_t (aka HImode) for vectors of predicates. */
+ if (GET_MODE_CLASS (mode[argc]) == MODE_VECTOR_BOOL)
+ op[argc] = gen_lowpart (mode[argc], op[argc]);
+
/*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */
if (!(*insn_data[icode].operand[opno].predicate)
(op[argc], mode[argc]))
@@ -3229,6 +3248,13 @@ constant_arg:
else
emit_insn (insn);
+ if (GET_MODE_CLASS (tmode) == MODE_VECTOR_BOOL)
+ {
+ rtx HItarget = gen_reg_rtx (HImode);
+ emit_move_insn (HItarget, gen_lowpart (HImode, target));
+ return HItarget;
+ }
+
return target;
}
diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index a5e74ba3943..b414a709a62 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -84,6 +84,11 @@ VECTOR_MODE (FLOAT, BF, 2); /* V2BF. */
VECTOR_MODE (FLOAT, BF, 4); /* V4BF. */
VECTOR_MODE (FLOAT, BF, 8); /* V8BF. */
+/* Predicates for MVE. */
+VECTOR_BOOL_MODE (V16BI, 16, 2);
+VECTOR_BOOL_MODE (V8BI, 8, 2);
+VECTOR_BOOL_MODE (V4BI, 4, 2);
+
/* Fraction and accumulator vector modes. */
VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */
VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */
diff --git a/gcc/config/arm/arm-simd-builtin-types.def b/gcc/config/arm/arm-simd-builtin-types.def
index c19a1b6e3eb..d3987985b4c 100644
--- a/gcc/config/arm/arm-simd-builtin-types.def
+++ b/gcc/config/arm/arm-simd-builtin-types.def
@@ -51,3 +51,7 @@
ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20)
ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20)
ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20)
+
+ ENTRY (Pred1x16_t, V16BI, unsigned, 16, uint16, 21)
+ ENTRY (Pred2x8_t, V8BI, unsigned, 8, uint16, 21)
+ ENTRY (Pred4x4_t, V4BI, unsigned, 4, uint16, 21)
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index eea747664d4..153a9964d2c 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -7984,17 +7984,23 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg)
duplicate, last_par));
/* Test a scalar subreg of a VEC_MERGE of a VEC_DUPLICATE. */
- rtx vector_reg = make_test_reg (mode);
- for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
+ /* Skip this test for vectors of booleans, because offset is in bytes,
+ while vec_merge indices are in elements (usually bits). */
+ if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
{
- if (i >= HOST_BITS_PER_WIDE_INT)
- break;
- rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
- rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
- poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
- ASSERT_RTX_EQ (scalar_reg,
- simplify_gen_subreg (inner_mode, vm,
- mode, offset));
+ rtx vector_reg = make_test_reg (mode);
+ for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
+ {
+ if (i >= HOST_BITS_PER_WIDE_INT)
+ break;
+ rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
+ rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
+ poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
+
+ ASSERT_RTX_EQ (scalar_reg,
+ simplify_gen_subreg (inner_mode, vm,
+ mode, offset));
+ }
}
}
^ permalink raw reply [flat|nested] 5+ messages in thread
* [gcc(refs/users/clyon/heads/mve-autovec)] arm: Implement MVE predicates as vectors of booleans
@ 2022-02-22 9:08 Christophe Lyon
0 siblings, 0 replies; 5+ messages in thread
From: Christophe Lyon @ 2022-02-22 9:08 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:6decdd9e3bb8fa21176bbc277ac29e73e57abf9f
commit 6decdd9e3bb8fa21176bbc277ac29e73e57abf9f
Author: Christophe Lyon <christophe.lyon@foss.st.com>
Date: Wed Oct 13 09:16:22 2021 +0000
arm: Implement MVE predicates as vectors of booleans
This patch implements support for vectors of booleans to support MVE
predicates, instead of HImode. Since the ABI mandates pred16_t (aka
uint16_t) to represent predicates in intrinsics prototypes, we
introduce a new "predicate" type qualifier so that we can map relevant
builtins HImode arguments and return value to the appropriate vector
of booleans (VxBI).
We have to update test_vector_ops_duplicate, because it iterates using
an offset in bytes, where we would need to iterate in bits: we stop
iterating when we reach the end of the vector of booleans.
In addition, we have to fix the underlying definition of vectors of
booleans because ARM/MVE needs a different representation than
AArch64/SVE. With ARM/MVE the 'true' bit is duplicated over the
element size, so that a true element of V4BI is represented by
'0b1111'. This patch updates the aarch64 definition of VNx*BI as
needed.
2022-01-13 Christophe Lyon <christophe.lyon@foss.st.com>
Richard Sandiford <richard.sandiford@arm.com>
gcc/
PR target/100757
PR target/101325
* config/aarch64/aarch64-modes.def (VNx16BI, VNx8BI, VNx4BI,
VNx2BI): Update definition.
* config/arm/arm-builtins.c (arm_init_simd_builtin_types): Add new
simd types.
(arm_init_builtin): Map predicate vectors arguments to HImode.
(arm_expand_builtin_args): Move HImode predicate arguments to VxBI
rtx. Move return value to HImode rtx.
* config/arm/arm-builtins.h (arm_type_qualifiers): Add qualifier_predicate.
* config/arm/arm-modes.def (B2I, B4I, V16BI, V8BI, V4BI): New modes.
* config/arm/arm-simd-builtin-types.def (Pred1x16_t,
Pred2x8_t,Pred4x4_t): New.
* emit-rtl.c (init_emit_once): Handle all boolean modes.
* genmodes.c (mode_data): Add boolean field.
(blank_mode): Initialize it.
(make_complex_modes): Fix handling of boolean modes.
(make_vector_modes): Likewise.
(VECTOR_BOOL_MODE): Use new COMPONENT parameter.
(make_vector_bool_mode): Likewise.
(BOOL_MODE): New.
(make_bool_mode): New.
(emit_insn_modes_h): Fix generation of boolean modes.
(emit_class_narrowest_mode): Likewise.
* machmode.def: (VECTOR_BOOL_MODE): Document new COMPONENT
parameter. Use new BOOL_MODE instead of FRACTIONAL_INT_MODE to
define BImode.
* rtx-vector-builder.c (rtx_vector_builder::find_cached_value):
Fix handling of constm1_rtx for VECTOR_BOOL.
* simplify-rtx.c (native_encode_rtx): Fix support for VECTOR_BOOL.
(native_decode_vector_rtx): Likewise.
(test_vector_ops_duplicate): Skip vec_merge test
with vectors of booleans.
* varasm.c (output_constant_pool_2): Likewise.
Diff:
---
gcc/config/aarch64/aarch64-modes.def | 8 ++--
gcc/config/arm/arm-builtins.c | 36 +++++++++++++++-
gcc/config/arm/arm-builtins.h | 4 +-
gcc/config/arm/arm-modes.def | 8 ++++
gcc/config/arm/arm-simd-builtin-types.def | 4 ++
gcc/emit-rtl.c | 28 +++++++++---
gcc/genmodes.c | 71 ++++++++++++++++++++++---------
gcc/machmode.def | 11 ++---
gcc/rtx-vector-builder.c | 4 +-
gcc/simplify-rtx.c | 34 +++++++++------
gcc/varasm.c | 7 ++-
11 files changed, 159 insertions(+), 56 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 976bf9b42be..8f399225a80 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -47,10 +47,10 @@ ADJUST_FLOAT_FORMAT (HF, &ieee_half_format);
/* Vector modes. */
-VECTOR_BOOL_MODE (VNx16BI, 16, 2);
-VECTOR_BOOL_MODE (VNx8BI, 8, 2);
-VECTOR_BOOL_MODE (VNx4BI, 4, 2);
-VECTOR_BOOL_MODE (VNx2BI, 2, 2);
+VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2);
+VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2);
+VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2);
+VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2);
ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8);
ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4);
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 9c645722230..0af6fb4d57d 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -1553,11 +1553,26 @@ arm_init_simd_builtin_types (void)
tree eltype = arm_simd_types[i].eltype;
machine_mode mode = arm_simd_types[i].mode;
- if (eltype == NULL)
+ if (eltype == NULL
+ /* VECTOR_BOOL is not supported unless MVE is activated, this would
+ make build_truth_vector_type_for_mode crash. */
+ && ((GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
+ || !TARGET_HAVE_MVE))
continue;
if (arm_simd_types[i].itype == NULL)
{
- tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
+ tree type;
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ {
+ /* Handle MVE predicates: they are internally stored as 16 bits,
+ but are used as vectors of 1, 2 or 4-bit elements. */
+ type = build_truth_vector_type_for_mode (GET_MODE_NUNITS (mode),
+ mode);
+ eltype = TREE_TYPE (type);
+ }
+ else
+ type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
+
type = build_distinct_type_copy (type);
SET_TYPE_STRUCTURAL_EQUALITY (type);
@@ -1695,6 +1710,11 @@ arm_init_builtin (unsigned int fcode, arm_builtin_datum *d,
if (qualifiers & qualifier_map_mode)
op_mode = d->mode;
+ /* MVE Predicates use HImode as mandated by the ABI: pred16_t is unsigned
+ short. */
+ if (qualifiers & qualifier_predicate)
+ op_mode = HImode;
+
/* For pointers, we want a pointer to the basic type
of the vector. */
if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
@@ -2939,6 +2959,11 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode,
case ARG_BUILTIN_COPY_TO_REG:
if (POINTER_TYPE_P (TREE_TYPE (arg[argc])))
op[argc] = convert_memory_address (Pmode, op[argc]);
+
+ /* MVE uses mve_pred16_t (aka HImode) for vectors of predicates. */
+ if (GET_MODE_CLASS (mode[argc]) == MODE_VECTOR_BOOL)
+ op[argc] = gen_lowpart (mode[argc], op[argc]);
+
/*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */
if (!(*insn_data[icode].operand[opno].predicate)
(op[argc], mode[argc]))
@@ -3144,6 +3169,13 @@ constant_arg:
else
emit_insn (insn);
+ if (GET_MODE_CLASS (tmode) == MODE_VECTOR_BOOL)
+ {
+ rtx HItarget = gen_reg_rtx (HImode);
+ emit_move_insn (HItarget, gen_lowpart (HImode, target));
+ return HItarget;
+ }
+
return target;
}
diff --git a/gcc/config/arm/arm-builtins.h b/gcc/config/arm/arm-builtins.h
index e5130d6d286..a8ef8aef82d 100644
--- a/gcc/config/arm/arm-builtins.h
+++ b/gcc/config/arm/arm-builtins.h
@@ -84,7 +84,9 @@ enum arm_type_qualifiers
qualifier_lane_pair_index = 0x1000,
/* Lane indices selected in quadtuplets - must be within range of previous
argument = a vector. */
- qualifier_lane_quadtup_index = 0x2000
+ qualifier_lane_quadtup_index = 0x2000,
+ /* MVE vector predicates. */
+ qualifier_predicate = 0x4000
};
struct arm_simd_type_info
diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index de689c8b45e..9ed0cd042c5 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -84,6 +84,14 @@ VECTOR_MODE (FLOAT, BF, 2); /* V2BF. */
VECTOR_MODE (FLOAT, BF, 4); /* V4BF. */
VECTOR_MODE (FLOAT, BF, 8); /* V8BF. */
+/* Predicates for MVE. */
+BOOL_MODE (B2I, 2, 1);
+BOOL_MODE (B4I, 4, 1);
+
+VECTOR_BOOL_MODE (V16BI, 16, BI, 2);
+VECTOR_BOOL_MODE (V8BI, 8, B2I, 2);
+VECTOR_BOOL_MODE (V4BI, 4, B4I, 2);
+
/* Fraction and accumulator vector modes. */
VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */
VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */
diff --git a/gcc/config/arm/arm-simd-builtin-types.def b/gcc/config/arm/arm-simd-builtin-types.def
index 6ba6f211531..d1d6416dad1 100644
--- a/gcc/config/arm/arm-simd-builtin-types.def
+++ b/gcc/config/arm/arm-simd-builtin-types.def
@@ -51,3 +51,7 @@
ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20)
ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20)
ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20)
+
+ ENTRY (Pred1x16_t, V16BI, predicate, 16, pred1, 16)
+ ENTRY (Pred2x8_t, V8BI, predicate, 8, pred1, 15)
+ ENTRY (Pred4x4_t, V4BI, predicate, 4, pred1, 15)
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index feeee16d320..5bf7d37cfa6 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -6239,9 +6239,22 @@ init_emit_once (void)
/* For BImode, 1 and -1 are unsigned and signed interpretations
of the same value. */
- const_tiny_rtx[0][(int) BImode] = const0_rtx;
- const_tiny_rtx[1][(int) BImode] = const_true_rtx;
- const_tiny_rtx[3][(int) BImode] = const_true_rtx;
+ for (mode = MIN_MODE_BOOL;
+ mode <= MAX_MODE_BOOL;
+ mode = (machine_mode)((int)(mode) + 1))
+ {
+ const_tiny_rtx[0][(int) mode] = const0_rtx;
+ if (mode == BImode)
+ {
+ const_tiny_rtx[1][(int) mode] = const_true_rtx;
+ const_tiny_rtx[3][(int) mode] = const_true_rtx;
+ }
+ else
+ {
+ const_tiny_rtx[1][(int) mode] = const1_rtx;
+ const_tiny_rtx[3][(int) mode] = constm1_rtx;
+ }
+ }
for (mode = MIN_MODE_PARTIAL_INT;
mode <= MAX_MODE_PARTIAL_INT;
@@ -6260,13 +6273,16 @@ init_emit_once (void)
const_tiny_rtx[0][(int) mode] = gen_rtx_CONCAT (mode, inner, inner);
}
- /* As for BImode, "all 1" and "all -1" are unsigned and signed
- interpretations of the same value. */
FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_BOOL)
{
const_tiny_rtx[0][(int) mode] = gen_const_vector (mode, 0);
const_tiny_rtx[3][(int) mode] = gen_const_vector (mode, 3);
- const_tiny_rtx[1][(int) mode] = const_tiny_rtx[3][(int) mode];
+ if (GET_MODE_INNER (mode) == BImode)
+ /* As for BImode, "all 1" and "all -1" are unsigned and signed
+ interpretations of the same value. */
+ const_tiny_rtx[1][(int) mode] = const_tiny_rtx[3][(int) mode];
+ else
+ const_tiny_rtx[1][(int) mode] = gen_const_vector (mode, 1);
}
FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT)
diff --git a/gcc/genmodes.c b/gcc/genmodes.c
index 6001b854547..5881abd846c 100644
--- a/gcc/genmodes.c
+++ b/gcc/genmodes.c
@@ -78,6 +78,7 @@ struct mode_data
bool need_bytesize_adj; /* true if this mode needs dynamic size
adjustment */
unsigned int int_n; /* If nonzero, then __int<INT_N> will be defined */
+ bool boolean;
};
static struct mode_data *modes[MAX_MODE_CLASS];
@@ -88,7 +89,8 @@ static const struct mode_data blank_mode = {
0, "<unknown>", MAX_MODE_CLASS,
0, -1U, -1U, -1U, -1U,
0, 0, 0, 0, 0, 0,
- "<unknown>", 0, 0, 0, 0, false, false, 0
+ "<unknown>", 0, 0, 0, 0, false, false, 0,
+ false
};
static htab_t modes_by_name;
@@ -456,7 +458,7 @@ make_complex_modes (enum mode_class cl,
size_t m_len;
/* Skip BImode. FIXME: BImode probably shouldn't be MODE_INT. */
- if (m->precision == 1)
+ if (m->boolean)
continue;
m_len = strlen (m->name);
@@ -528,7 +530,7 @@ make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width,
not be necessary. */
if (cl == MODE_FLOAT && m->bytesize == 1)
continue;
- if (cl == MODE_INT && m->precision == 1)
+ if (m->boolean)
continue;
if ((size_t) snprintf (buf, sizeof buf, "%s%u%s", prefix,
@@ -548,17 +550,18 @@ make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width,
/* Create a vector of booleans called NAME with COUNT elements and
BYTESIZE bytes in total. */
-#define VECTOR_BOOL_MODE(NAME, COUNT, BYTESIZE) \
- make_vector_bool_mode (#NAME, COUNT, BYTESIZE, __FILE__, __LINE__)
+#define VECTOR_BOOL_MODE(NAME, COUNT, COMPONENT, BYTESIZE) \
+ make_vector_bool_mode (#NAME, COUNT, #COMPONENT, BYTESIZE, \
+ __FILE__, __LINE__)
static void ATTRIBUTE_UNUSED
make_vector_bool_mode (const char *name, unsigned int count,
- unsigned int bytesize, const char *file,
- unsigned int line)
+ const char *component, unsigned int bytesize,
+ const char *file, unsigned int line)
{
- struct mode_data *m = find_mode ("BI");
+ struct mode_data *m = find_mode (component);
if (!m)
{
- error ("%s:%d: no mode \"BI\"", file, line);
+ error ("%s:%d: no mode \"%s\"", file, line, component);
return;
}
@@ -596,6 +599,20 @@ make_int_mode (const char *name,
m->precision = precision;
}
+#define BOOL_MODE(N, B, Y) \
+ make_bool_mode (#N, B, Y, __FILE__, __LINE__)
+
+static void
+make_bool_mode (const char *name,
+ unsigned int precision, unsigned int bytesize,
+ const char *file, unsigned int line)
+{
+ struct mode_data *m = new_mode (MODE_INT, name, file, line);
+ m->bytesize = bytesize;
+ m->precision = precision;
+ m->boolean = true;
+}
+
#define OPAQUE_MODE(N, B) \
make_opaque_mode (#N, -1U, B, __FILE__, __LINE__)
@@ -1298,9 +1315,21 @@ enum machine_mode\n{");
/* Don't use BImode for MIN_MODE_INT, since otherwise the middle
end will try to use it for bitfields in structures and the
like, which we do not want. Only the target md file should
- generate BImode widgets. */
- if (first && first->precision == 1 && c == MODE_INT)
- first = first->next;
+ generate BImode widgets. Since some targets such as ARM/MVE
+ define boolean modes with multiple bits, handle those too. */
+ if (first && first->boolean)
+ {
+ struct mode_data *last_bool = first;
+ printf (" MIN_MODE_BOOL = E_%smode,\n", first->name);
+
+ while (first && first->boolean)
+ {
+ last_bool = first;
+ first = first->next;
+ }
+
+ printf (" MAX_MODE_BOOL = E_%smode,\n\n", last_bool->name);
+ }
if (first && last)
printf (" MIN_%s = E_%smode,\n MAX_%s = E_%smode,\n\n",
@@ -1679,15 +1708,15 @@ emit_class_narrowest_mode (void)
print_decl ("unsigned char", "class_narrowest_mode", "MAX_MODE_CLASS");
for (c = 0; c < MAX_MODE_CLASS; c++)
- /* Bleah, all this to get the comment right for MIN_MODE_INT. */
- tagged_printf ("MIN_%s", mode_class_names[c],
- modes[c]
- ? ((c != MODE_INT || modes[c]->precision != 1)
- ? modes[c]->name
- : (modes[c]->next
- ? modes[c]->next->name
- : void_mode->name))
- : void_mode->name);
+ {
+ /* Bleah, all this to get the comment right for MIN_MODE_INT. */
+ struct mode_data *m = modes[c];
+ while (m && m->boolean)
+ m = m->next;
+ const char *comment_name = (m ? m : void_mode)->name;
+
+ tagged_printf ("MIN_%s", mode_class_names[c], comment_name);
+ }
print_closer ();
}
diff --git a/gcc/machmode.def b/gcc/machmode.def
index 866a2082d01..533cf6ab4b2 100644
--- a/gcc/machmode.def
+++ b/gcc/machmode.def
@@ -146,12 +146,13 @@ along with GCC; see the file COPYING3. If not see
Like VECTOR_MODES, but start the mode names with PREFIX instead
of the usual "V".
- VECTOR_BOOL_MODE (NAME, COUNT, BYTESIZE)
+ VECTOR_BOOL_MODE (NAME, COUNT, COMPONENT, BYTESIZE)
Create a vector mode called NAME that contains COUNT boolean
elements and occupies BYTESIZE bytes in total. Each boolean
- element occupies (COUNT * BITS_PER_UNIT) / BYTESIZE bits, with
- the element at index 0 occupying the lsb of the first byte in
- memory. Only the lowest bit of each element is significant.
+ element is of COMPONENT type and occupies (COUNT * BITS_PER_UNIT) /
+ BYTESIZE bits, with the element at index 0 occupying the lsb of the
+ first byte in memory. Only the lowest bit of each element is
+ significant.
OPAQUE_MODE (NAME, BYTESIZE)
Create an opaque mode called NAME that is BYTESIZE bytes wide.
@@ -196,7 +197,7 @@ RANDOM_MODE (VOID);
RANDOM_MODE (BLK);
/* Single bit mode used for booleans. */
-FRACTIONAL_INT_MODE (BI, 1, 1);
+BOOL_MODE (BI, 1, 1);
/* Basic integer modes. We go up to TI in generic code (128 bits).
TImode is needed here because the some front ends now genericly
diff --git a/gcc/rtx-vector-builder.c b/gcc/rtx-vector-builder.c
index e36aba010a0..55ffe0d5a76 100644
--- a/gcc/rtx-vector-builder.c
+++ b/gcc/rtx-vector-builder.c
@@ -90,8 +90,10 @@ rtx_vector_builder::find_cached_value ()
if (GET_MODE_CLASS (m_mode) == MODE_VECTOR_BOOL)
{
- if (elt == const1_rtx || elt == constm1_rtx)
+ if (elt == const1_rtx)
return CONST1_RTX (m_mode);
+ else if (elt == constm1_rtx)
+ return CONSTM1_RTX (m_mode);
else if (elt == const0_rtx)
return CONST0_RTX (m_mode);
else
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index c36c825f958..532537ea48d 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -6876,12 +6876,13 @@ native_encode_rtx (machine_mode mode, rtx x, vec<target_unit> &bytes,
/* This is the only case in which elements can be smaller than
a byte. */
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
+ auto mask = GET_MODE_MASK (GET_MODE_INNER (mode));
for (unsigned int i = 0; i < num_bytes; ++i)
{
target_unit value = 0;
for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits)
{
- value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & 1) << j;
+ value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & mask) << j;
elt += 1;
}
bytes.quick_push (value);
@@ -7025,9 +7026,8 @@ native_decode_vector_rtx (machine_mode mode, const vec<target_unit> &bytes,
unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits;
unsigned int byte_index = bit_index / BITS_PER_UNIT;
unsigned int lsb = bit_index % BITS_PER_UNIT;
- builder.quick_push (bytes[byte_index] & (1 << lsb)
- ? CONST1_RTX (BImode)
- : CONST0_RTX (BImode));
+ unsigned int value = bytes[byte_index] >> lsb;
+ builder.quick_push (gen_int_mode (value, GET_MODE_INNER (mode)));
}
}
else
@@ -7994,17 +7994,23 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg)
duplicate, last_par));
/* Test a scalar subreg of a VEC_MERGE of a VEC_DUPLICATE. */
- rtx vector_reg = make_test_reg (mode);
- for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
+ /* Skip this test for vectors of booleans, because offset is in bytes,
+ while vec_merge indices are in elements (usually bits). */
+ if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
{
- if (i >= HOST_BITS_PER_WIDE_INT)
- break;
- rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
- rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
- poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
- ASSERT_RTX_EQ (scalar_reg,
- simplify_gen_subreg (inner_mode, vm,
- mode, offset));
+ rtx vector_reg = make_test_reg (mode);
+ for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
+ {
+ if (i >= HOST_BITS_PER_WIDE_INT)
+ break;
+ rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
+ rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
+ poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
+
+ ASSERT_RTX_EQ (scalar_reg,
+ simplify_gen_subreg (inner_mode, vm,
+ mode, offset));
+ }
}
}
diff --git a/gcc/varasm.c b/gcc/varasm.c
index 76574be191f..5f59b6ace15 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -4085,6 +4085,7 @@ output_constant_pool_2 (fixed_size_mode mode, rtx x, unsigned int align)
unsigned int elt_bits = GET_MODE_BITSIZE (mode) / nelts;
unsigned int int_bits = MAX (elt_bits, BITS_PER_UNIT);
scalar_int_mode int_mode = int_mode_for_size (int_bits, 0).require ();
+ unsigned int mask = GET_MODE_MASK (GET_MODE_INNER (mode));
/* Build the constant up one integer at a time. */
unsigned int elts_per_int = int_bits / elt_bits;
@@ -4093,8 +4094,10 @@ output_constant_pool_2 (fixed_size_mode mode, rtx x, unsigned int align)
unsigned HOST_WIDE_INT value = 0;
unsigned int limit = MIN (nelts - i, elts_per_int);
for (unsigned int j = 0; j < limit; ++j)
- if (INTVAL (CONST_VECTOR_ELT (x, i + j)) != 0)
- value |= 1 << (j * elt_bits);
+ {
+ auto elt = INTVAL (CONST_VECTOR_ELT (x, i + j));
+ value |= (elt & mask) << (j * elt_bits);
+ }
output_constant_pool_2 (int_mode, gen_int_mode (value, int_mode),
i != 0 ? MIN (align, int_bits) : align);
}
^ permalink raw reply [flat|nested] 5+ messages in thread
* [gcc(refs/users/clyon/heads/mve-autovec)] arm: Implement MVE predicates as vectors of booleans
@ 2021-11-16 14:06 Christophe Lyon
0 siblings, 0 replies; 5+ messages in thread
From: Christophe Lyon @ 2021-11-16 14:06 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:e82560be5614dd162da126f12972c7f5e1c5320c
commit e82560be5614dd162da126f12972c7f5e1c5320c
Author: Christophe Lyon <christophe.lyon@foss.st.com>
Date: Wed Oct 13 09:16:22 2021 +0000
arm: Implement MVE predicates as vectors of booleans
This patch implements support for vectors of booleans to support MVE
predicates, instead of HImode. Since the ABI mandates pred16_t (aka
uint16_t) to represent predicates in intrinsics prototypes, we
introduce a new "predicate" type qualifier so that we can map relevant
builtins HImode arguments and return value to the appropriate vector
of booleans (VxBI).
We have to update test_vector_ops_duplicate, because it iterates using
an offset in bytes, where we would need to iterate in bits: we stop
iterating when we reach the end of the vector of booleans.
2021-10-13 Christophe Lyon <christophe.lyon@foss.st.com>
gcc/
PR target/100757
PR target/101325
* config/arm/arm-builtins.c (arm_type_qualifiers): Add qualifier_predicate.
(arm_init_simd_builtin_types): Add new simd types.
(arm_init_builtin): Map predicate vectors arguments to HImode.
(arm_expand_builtin_args): Move HImode predicate arguments to VxBI
rtx. Move return value to HImode rtx.
* config/arm/arm-modes.def (V16BI, V8BI, V4BI): New modes.
* config/arm/arm-simd-builtin-types.def (Pred1x16_t,
Pred2x8_t,Pred4x4_t): New.
* simplify-rtx.c (test_vector_ops_duplicate): Skip vec_merge test
with vectors of booleans.
Diff:
---
gcc/config/arm/arm-builtins.c | 28 +++++++++++++++++++++++++++-
gcc/config/arm/arm-modes.def | 5 +++++
gcc/config/arm/arm-simd-builtin-types.def | 4 ++++
gcc/simplify-rtx.c | 26 ++++++++++++++++----------
4 files changed, 52 insertions(+), 11 deletions(-)
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 3a9ff8f26b8..771759f0cdd 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -92,7 +92,9 @@ enum arm_type_qualifiers
qualifier_lane_pair_index = 0x1000,
/* Lane indices selected in quadtuplets - must be within range of previous
argument = a vector. */
- qualifier_lane_quadtup_index = 0x2000
+ qualifier_lane_quadtup_index = 0x2000,
+ /* MVE vector predicates. */
+ qualifier_predicate = 0x4000
};
/* The qualifier_internal allows generation of a unary builtin from
@@ -1633,6 +1635,13 @@ arm_init_simd_builtin_types (void)
arm_simd_types[Bfloat16x4_t].eltype = arm_bf16_type_node;
arm_simd_types[Bfloat16x8_t].eltype = arm_bf16_type_node;
+ if (TARGET_HAVE_MVE)
+ {
+ arm_simd_types[Pred1x16_t].eltype = unsigned_intHI_type_node;
+ arm_simd_types[Pred2x8_t].eltype = unsigned_intHI_type_node;
+ arm_simd_types[Pred4x4_t].eltype = unsigned_intHI_type_node;
+ }
+
for (i = 0; i < nelts; i++)
{
tree eltype = arm_simd_types[i].eltype;
@@ -1780,6 +1789,11 @@ arm_init_builtin (unsigned int fcode, arm_builtin_datum *d,
if (qualifiers & qualifier_map_mode)
op_mode = d->mode;
+ /* MVE Predicates use HImode as mandated by the ABI: pred16_t is unsigned
+ short. */
+ if (qualifiers & qualifier_predicate)
+ op_mode = HImode;
+
/* For pointers, we want a pointer to the basic type
of the vector. */
if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
@@ -3024,6 +3038,11 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode,
case ARG_BUILTIN_COPY_TO_REG:
if (POINTER_TYPE_P (TREE_TYPE (arg[argc])))
op[argc] = convert_memory_address (Pmode, op[argc]);
+
+ /* MVE uses mve_pred16_t (aka HImode) for vectors of predicates. */
+ if (GET_MODE_CLASS (mode[argc]) == MODE_VECTOR_BOOL)
+ op[argc] = gen_lowpart (mode[argc], op[argc]);
+
/*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */
if (!(*insn_data[icode].operand[opno].predicate)
(op[argc], mode[argc]))
@@ -3229,6 +3248,13 @@ constant_arg:
else
emit_insn (insn);
+ if (GET_MODE_CLASS (tmode) == MODE_VECTOR_BOOL)
+ {
+ rtx HItarget = gen_reg_rtx (HImode);
+ emit_move_insn (HItarget, gen_lowpart (HImode, target));
+ return HItarget;
+ }
+
return target;
}
diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index a5e74ba3943..b414a709a62 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -84,6 +84,11 @@ VECTOR_MODE (FLOAT, BF, 2); /* V2BF. */
VECTOR_MODE (FLOAT, BF, 4); /* V4BF. */
VECTOR_MODE (FLOAT, BF, 8); /* V8BF. */
+/* Predicates for MVE. */
+VECTOR_BOOL_MODE (V16BI, 16, 2);
+VECTOR_BOOL_MODE (V8BI, 8, 2);
+VECTOR_BOOL_MODE (V4BI, 4, 2);
+
/* Fraction and accumulator vector modes. */
VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */
VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */
diff --git a/gcc/config/arm/arm-simd-builtin-types.def b/gcc/config/arm/arm-simd-builtin-types.def
index c19a1b6e3eb..d3987985b4c 100644
--- a/gcc/config/arm/arm-simd-builtin-types.def
+++ b/gcc/config/arm/arm-simd-builtin-types.def
@@ -51,3 +51,7 @@
ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20)
ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20)
ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20)
+
+ ENTRY (Pred1x16_t, V16BI, unsigned, 16, uint16, 21)
+ ENTRY (Pred2x8_t, V8BI, unsigned, 8, uint16, 21)
+ ENTRY (Pred4x4_t, V4BI, unsigned, 4, uint16, 21)
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index a719f57870f..7b3962339ba 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -7634,17 +7634,23 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg)
duplicate, last_par));
/* Test a scalar subreg of a VEC_MERGE of a VEC_DUPLICATE. */
- rtx vector_reg = make_test_reg (mode);
- for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
+ /* Skip this test for vectors of booleans, because offset is in bytes,
+ while vec_merge indices are in elements (usually bits). */
+ if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
{
- if (i >= HOST_BITS_PER_WIDE_INT)
- break;
- rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
- rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
- poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
- ASSERT_RTX_EQ (scalar_reg,
- simplify_gen_subreg (inner_mode, vm,
- mode, offset));
+ rtx vector_reg = make_test_reg (mode);
+ for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
+ {
+ if (i >= HOST_BITS_PER_WIDE_INT)
+ break;
+ rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
+ rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
+ poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
+
+ ASSERT_RTX_EQ (scalar_reg,
+ simplify_gen_subreg (inner_mode, vm,
+ mode, offset));
+ }
}
}
^ permalink raw reply [flat|nested] 5+ messages in thread
* [gcc(refs/users/clyon/heads/mve-autovec)] arm: Implement MVE predicates as vectors of booleans
@ 2021-10-01 14:37 Christophe Lyon
0 siblings, 0 replies; 5+ messages in thread
From: Christophe Lyon @ 2021-10-01 14:37 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:5abd0f02ff4a881e427c8533f23eb586026a8a3a
commit 5abd0f02ff4a881e427c8533f23eb586026a8a3a
Author: Christophe Lyon <christophe.lyon@foss.st.com>
Date: Mon Aug 30 17:07:08 2021 +0000
arm: Implement MVE predicates as vectors of booleans
This patch implements support for vectors of booleans to support MVE
predicates, instead of HImode. Since the ABI mandates pred16_t (aka
uint16_t) to represent predicates in intrinsics prototypes, we
introduce a new "predicate" type qualifier so that we can map relevant
builtins HImode arguments and return value to the appropriate vector
of booleans (VxBI).
We have to update test_vector_ops_duplicate, because it iterates using
an offset in bytes, where we would need to iterate in bits: we stop
iterating when we reach the end of the vector of booleans.
2021-09-01 Christophe Lyon <christophe.lyon@foss.st.com>
gcc/
PR target/100757
PR target/101325
* config/arm/arm-builtins.c (arm_type_qualifiers): Add qualifier_predicate.
(arm_init_simd_builtin_types): Add new simd types.
(arm_init_builtin): Map predicate vectors arguments to HImode.
(arm_expand_builtin_args): Move HImode predicate arguments to VxBI
rtx. Move return value to HImode rtx.
* config/arm/arm-modes.def (V16BI, V8BI, V4BI): New modes.
* config/arm/arm-simd-builtin-types.def (Pred1x16_t,
Pred2x8_t,Pred4x4_t): New.
* simplify-rtx.c (test_vector_ops_duplicate): Avoid going past the
end of the test vector.
Diff:
---
gcc/config/arm/arm-builtins.c | 28 +++++++++++++++++++++++++++-
gcc/config/arm/arm-modes.def | 5 +++++
gcc/config/arm/arm-simd-builtin-types.def | 4 ++++
gcc/simplify-rtx.c | 7 +++++++
4 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 3a9ff8f26b8..771759f0cdd 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -92,7 +92,9 @@ enum arm_type_qualifiers
qualifier_lane_pair_index = 0x1000,
/* Lane indices selected in quadtuplets - must be within range of previous
argument = a vector. */
- qualifier_lane_quadtup_index = 0x2000
+ qualifier_lane_quadtup_index = 0x2000,
+ /* MVE vector predicates. */
+ qualifier_predicate = 0x4000
};
/* The qualifier_internal allows generation of a unary builtin from
@@ -1633,6 +1635,13 @@ arm_init_simd_builtin_types (void)
arm_simd_types[Bfloat16x4_t].eltype = arm_bf16_type_node;
arm_simd_types[Bfloat16x8_t].eltype = arm_bf16_type_node;
+ if (TARGET_HAVE_MVE)
+ {
+ arm_simd_types[Pred1x16_t].eltype = unsigned_intHI_type_node;
+ arm_simd_types[Pred2x8_t].eltype = unsigned_intHI_type_node;
+ arm_simd_types[Pred4x4_t].eltype = unsigned_intHI_type_node;
+ }
+
for (i = 0; i < nelts; i++)
{
tree eltype = arm_simd_types[i].eltype;
@@ -1780,6 +1789,11 @@ arm_init_builtin (unsigned int fcode, arm_builtin_datum *d,
if (qualifiers & qualifier_map_mode)
op_mode = d->mode;
+ /* MVE Predicates use HImode as mandated by the ABI: pred16_t is unsigned
+ short. */
+ if (qualifiers & qualifier_predicate)
+ op_mode = HImode;
+
/* For pointers, we want a pointer to the basic type
of the vector. */
if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
@@ -3024,6 +3038,11 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode,
case ARG_BUILTIN_COPY_TO_REG:
if (POINTER_TYPE_P (TREE_TYPE (arg[argc])))
op[argc] = convert_memory_address (Pmode, op[argc]);
+
+ /* MVE uses mve_pred16_t (aka HImode) for vectors of predicates. */
+ if (GET_MODE_CLASS (mode[argc]) == MODE_VECTOR_BOOL)
+ op[argc] = gen_lowpart (mode[argc], op[argc]);
+
/*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */
if (!(*insn_data[icode].operand[opno].predicate)
(op[argc], mode[argc]))
@@ -3229,6 +3248,13 @@ constant_arg:
else
emit_insn (insn);
+ if (GET_MODE_CLASS (tmode) == MODE_VECTOR_BOOL)
+ {
+ rtx HItarget = gen_reg_rtx (HImode);
+ emit_move_insn (HItarget, gen_lowpart (HImode, target));
+ return HItarget;
+ }
+
return target;
}
diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index a5e74ba3943..b414a709a62 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -84,6 +84,11 @@ VECTOR_MODE (FLOAT, BF, 2); /* V2BF. */
VECTOR_MODE (FLOAT, BF, 4); /* V4BF. */
VECTOR_MODE (FLOAT, BF, 8); /* V8BF. */
+/* Predicates for MVE. */
+VECTOR_BOOL_MODE (V16BI, 16, 2);
+VECTOR_BOOL_MODE (V8BI, 8, 2);
+VECTOR_BOOL_MODE (V4BI, 4, 2);
+
/* Fraction and accumulator vector modes. */
VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */
VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */
diff --git a/gcc/config/arm/arm-simd-builtin-types.def b/gcc/config/arm/arm-simd-builtin-types.def
index c19a1b6e3eb..d3987985b4c 100644
--- a/gcc/config/arm/arm-simd-builtin-types.def
+++ b/gcc/config/arm/arm-simd-builtin-types.def
@@ -51,3 +51,7 @@
ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20)
ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20)
ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20)
+
+ ENTRY (Pred1x16_t, V16BI, unsigned, 16, uint16, 21)
+ ENTRY (Pred2x8_t, V8BI, unsigned, 8, uint16, 21)
+ ENTRY (Pred4x4_t, V4BI, unsigned, 4, uint16, 21)
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index a719f57870f..1453f984f99 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -7642,6 +7642,13 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg)
rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
+
+ /* OFFSET is in bytes, so stop testing when we go past the end of a
+ vector of booleans, where we would need an offset in bits. */
+ if ((GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ && (maybe_ge (offset, GET_MODE_SIZE (mode))))
+ break;
+
ASSERT_RTX_EQ (scalar_reg,
simplify_gen_subreg (inner_mode, vm,
mode, offset));
^ permalink raw reply [flat|nested] 5+ messages in thread
* [gcc(refs/users/clyon/heads/mve-autovec)] arm: Implement MVE predicates as vectors of booleans
@ 2021-09-29 7:30 Christophe Lyon
0 siblings, 0 replies; 5+ messages in thread
From: Christophe Lyon @ 2021-09-29 7:30 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:2ab140081ab93a57c00418902e2d9ba2f782a4d0
commit 2ab140081ab93a57c00418902e2d9ba2f782a4d0
Author: Christophe Lyon <christophe.lyon@foss.st.com>
Date: Mon Aug 30 17:07:08 2021 +0000
arm: Implement MVE predicates as vectors of booleans
This patch implements support for vectors of booleans to support MVE
predicates, instead of HImode. Since the ABI mandates pred16_t (aka
uint16_t) to represent predicates in intrinsics prototypes, we
introduce a new "predicate" type qualifier so that we can map relevant
builtins HImode arguments and return value to the appropriate vector
of booleans (VxBI).
We have to update test_vector_ops_duplicate, because it iterates using
an offset in bytes, where we would need to iterate in bits: we stop
iterating when we reach the end of the vector of booleans.
2021-09-01 Christophe Lyon <christophe.lyon@foss.st.com>
gcc/
PR target/100757
PR target/101325
* config/arm/arm-builtins.c (arm_type_qualifiers): Add qualifier_predicate.
(arm_init_simd_builtin_types): Add new simd types.
(arm_init_builtin): Map predicate vectors arguments to HImode.
(arm_expand_builtin_args): Move HImode predicate arguments to VxBI
rtx. Move return value to HImode rtx.
* config/arm/arm-modes.def (V16BI, V8BI, V4BI): New modes.
* config/arm/arm-simd-builtin-types.def (Pred1x16_t,
Pred2x8_t,Pred4x4_t): New.
* simplify-rtx.c (test_vector_ops_duplicate): Avoid going past the
end of the test vector.
Diff:
---
gcc/config/arm/arm-builtins.c | 28 +++++++++++++++++++++++++++-
gcc/config/arm/arm-modes.def | 5 +++++
gcc/config/arm/arm-simd-builtin-types.def | 4 ++++
gcc/simplify-rtx.c | 7 +++++++
4 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 3a9ff8f26b8..771759f0cdd 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -92,7 +92,9 @@ enum arm_type_qualifiers
qualifier_lane_pair_index = 0x1000,
/* Lane indices selected in quadtuplets - must be within range of previous
argument = a vector. */
- qualifier_lane_quadtup_index = 0x2000
+ qualifier_lane_quadtup_index = 0x2000,
+ /* MVE vector predicates. */
+ qualifier_predicate = 0x4000
};
/* The qualifier_internal allows generation of a unary builtin from
@@ -1633,6 +1635,13 @@ arm_init_simd_builtin_types (void)
arm_simd_types[Bfloat16x4_t].eltype = arm_bf16_type_node;
arm_simd_types[Bfloat16x8_t].eltype = arm_bf16_type_node;
+ if (TARGET_HAVE_MVE)
+ {
+ arm_simd_types[Pred1x16_t].eltype = unsigned_intHI_type_node;
+ arm_simd_types[Pred2x8_t].eltype = unsigned_intHI_type_node;
+ arm_simd_types[Pred4x4_t].eltype = unsigned_intHI_type_node;
+ }
+
for (i = 0; i < nelts; i++)
{
tree eltype = arm_simd_types[i].eltype;
@@ -1780,6 +1789,11 @@ arm_init_builtin (unsigned int fcode, arm_builtin_datum *d,
if (qualifiers & qualifier_map_mode)
op_mode = d->mode;
+ /* MVE Predicates use HImode as mandated by the ABI: pred16_t is unsigned
+ short. */
+ if (qualifiers & qualifier_predicate)
+ op_mode = HImode;
+
/* For pointers, we want a pointer to the basic type
of the vector. */
if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
@@ -3024,6 +3038,11 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode,
case ARG_BUILTIN_COPY_TO_REG:
if (POINTER_TYPE_P (TREE_TYPE (arg[argc])))
op[argc] = convert_memory_address (Pmode, op[argc]);
+
+ /* MVE uses mve_pred16_t (aka HImode) for vectors of predicates. */
+ if (GET_MODE_CLASS (mode[argc]) == MODE_VECTOR_BOOL)
+ op[argc] = gen_lowpart (mode[argc], op[argc]);
+
/*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */
if (!(*insn_data[icode].operand[opno].predicate)
(op[argc], mode[argc]))
@@ -3229,6 +3248,13 @@ constant_arg:
else
emit_insn (insn);
+ if (GET_MODE_CLASS (tmode) == MODE_VECTOR_BOOL)
+ {
+ rtx HItarget = gen_reg_rtx (HImode);
+ emit_move_insn (HItarget, gen_lowpart (HImode, target));
+ return HItarget;
+ }
+
return target;
}
diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index a5e74ba3943..b414a709a62 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -84,6 +84,11 @@ VECTOR_MODE (FLOAT, BF, 2); /* V2BF. */
VECTOR_MODE (FLOAT, BF, 4); /* V4BF. */
VECTOR_MODE (FLOAT, BF, 8); /* V8BF. */
+/* Predicates for MVE. */
+VECTOR_BOOL_MODE (V16BI, 16, 2);
+VECTOR_BOOL_MODE (V8BI, 8, 2);
+VECTOR_BOOL_MODE (V4BI, 4, 2);
+
/* Fraction and accumulator vector modes. */
VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */
VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */
diff --git a/gcc/config/arm/arm-simd-builtin-types.def b/gcc/config/arm/arm-simd-builtin-types.def
index c19a1b6e3eb..d3987985b4c 100644
--- a/gcc/config/arm/arm-simd-builtin-types.def
+++ b/gcc/config/arm/arm-simd-builtin-types.def
@@ -51,3 +51,7 @@
ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20)
ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20)
ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20)
+
+ ENTRY (Pred1x16_t, V16BI, unsigned, 16, uint16, 21)
+ ENTRY (Pred2x8_t, V8BI, unsigned, 8, uint16, 21)
+ ENTRY (Pred4x4_t, V4BI, unsigned, 4, uint16, 21)
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index a719f57870f..1453f984f99 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -7642,6 +7642,13 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg)
rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
+
+ /* OFFSET is in bytes, so stop testing when we go past the end of a
+ vector of booleans, where we would need an offset in bits. */
+ if ((GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ && (maybe_ge (offset, GET_MODE_SIZE (mode))))
+ break;
+
ASSERT_RTX_EQ (scalar_reg,
simplify_gen_subreg (inner_mode, vm,
mode, offset));
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2022-02-22 9:08 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-12 8:27 [gcc(refs/users/clyon/heads/mve-autovec)] arm: Implement MVE predicates as vectors of booleans Christophe Lyon
-- strict thread matches above, loose matches on Subject: below --
2022-02-22 9:08 Christophe Lyon
2021-11-16 14:06 Christophe Lyon
2021-10-01 14:37 Christophe Lyon
2021-09-29 7:30 Christophe Lyon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).