* [PATCH 1/16][ARM] PR/63870 Add qualifier to check lane bounds in expand
2015-07-07 12:32 [PATCH 0/16][ARM/AArch64] Float16_t support, v2 Alan Lawrence
@ 2015-07-07 12:34 ` Alan Lawrence
2015-07-27 14:33 ` Kyrill Tkachov
2015-07-07 12:34 ` [PATCH 4/16][ARM] Add float16x8_t type Alan Lawrence
` (14 subsequent siblings)
15 siblings, 1 reply; 35+ messages in thread
From: Alan Lawrence @ 2015-07-07 12:34 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 195 bytes --]
As per https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01333.html
(While this falls under PR/63870, and I will link to that in the ChangeLog, it
is only a small step towards fixing that PR.)
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 01_arm_qualifier_lane_index.patch --]
[-- Type: text/x-patch; name=01_arm_qualifier_lane_index.patch, Size: 19177 bytes --]
commit 9812db88cff20a505365f68f4065d2fbab998c9c
Author: Alan Lawrence <alan.lawrence@arm.com>
Date: Mon Dec 8 11:04:49 2014 +0000
ARM: Add qualifier_lane_index
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index f960e0a..7f5bf87 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -77,7 +77,9 @@ enum arm_type_qualifiers
/* qualifier_const_pointer | qualifier_map_mode */
qualifier_const_pointer_map_mode = 0x86,
/* Polynomial types. */
- qualifier_poly = 0x100
+ qualifier_poly = 0x100,
+ /* Lane indices - must be within range of previous argument = a vector. */
+ qualifier_lane_index = 0x200
};
/* The qualifier_internal allows generation of a unary builtin from
@@ -108,21 +110,40 @@ arm_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
/* T (T, immediate). */
static enum arm_type_qualifiers
-arm_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+arm_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_immediate };
+#define BINOP_IMM_QUALIFIERS (arm_binop_imm_qualifiers)
+
+/* T (T, lane index). */
+static enum arm_type_qualifiers
+arm_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_none, qualifier_none, qualifier_lane_index };
#define GETLANE_QUALIFIERS (arm_getlane_qualifiers)
/* T (T, T, T, immediate). */
static enum arm_type_qualifiers
-arm_lanemac_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+arm_mac_n_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_none,
qualifier_none, qualifier_immediate };
-#define LANEMAC_QUALIFIERS (arm_lanemac_qualifiers)
+#define MAC_N_QUALIFIERS (arm_mac_n_qualifiers)
+
+/* T (T, T, T, lane index). */
+static enum arm_type_qualifiers
+arm_mac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_none, qualifier_none, qualifier_none,
+ qualifier_none, qualifier_lane_index };
+#define MAC_LANE_QUALIFIERS (arm_mac_lane_qualifiers)
/* T (T, T, immediate). */
static enum arm_type_qualifiers
-arm_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+arm_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
+#define TERNOP_IMM_QUALIFIERS (arm_ternop_imm_qualifiers)
+
+/* T (T, T, lane index). */
+static enum arm_type_qualifiers
+arm_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
#define SETLANE_QUALIFIERS (arm_setlane_qualifiers)
/* T (T, T). */
@@ -1927,6 +1948,7 @@ arm_expand_unop_builtin (enum insn_code icode,
typedef enum {
NEON_ARG_COPY_TO_REG,
NEON_ARG_CONSTANT,
+ NEON_ARG_LANE_INDEX,
NEON_ARG_MEMORY,
NEON_ARG_STOP
} builtin_arg;
@@ -2043,6 +2065,16 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
break;
+ case NEON_ARG_LANE_INDEX:
+ /* Previous argument must be a vector, which this indexes. */
+ gcc_assert (argc > 0);
+ if (CONST_INT_P (op[argc]))
+ {
+ enum machine_mode vmode = mode[argc - 1];
+ neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode), exp);
+ }
+ /* Fall through - if the lane index isn't a constant then
+ the next case will error. */
case NEON_ARG_CONSTANT:
if (!(*insn_data[icode].operand[opno].predicate)
(op[argc], mode[argc]))
@@ -2170,7 +2202,9 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
int operands_k = k - is_void;
int expr_args_k = k - 1;
- if (d->qualifiers[qualifiers_k] & qualifier_immediate)
+ if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
+ args[k] = NEON_ARG_LANE_INDEX;
+ else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
args[k] = NEON_ARG_CONSTANT;
else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
{
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 62f91ef..25bdebd 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -86,7 +86,7 @@ extern void neon_pairwise_reduce (rtx, rtx, machine_mode,
extern rtx neon_make_constant (rtx);
extern tree arm_builtin_vectorized_function (tree, tree, tree);
extern void neon_expand_vector_init (rtx, rtx);
-extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
extern HOST_WIDE_INT neon_element_bits (machine_mode);
extern void neon_reinterpret (rtx, rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e79a369..6e074ea 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -12788,12 +12788,12 @@ neon_expand_vector_init (rtx target, rtx vals)
}
/* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
- ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
- reported source locations are bogus. */
+ ERR if it doesn't. EXP indicates the source location, which includes the
+ inlining history for intrinsics. */
static void
bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
- const char *err)
+ const_tree exp, const char *desc)
{
HOST_WIDE_INT lane;
@@ -12802,15 +12802,22 @@ bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
lane = INTVAL (operand);
if (lane < low || lane >= high)
- error (err);
+ {
+ if (exp)
+ error ("%K%s %lld out of range %lld - %lld",
+ exp, desc, lane, low, high - 1);
+ else
+ error ("%s %lld out of range %lld - %lld", desc, lane, low, high - 1);
+ }
}
/* Bounds-check lanes. */
void
-neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
+ const_tree exp)
{
- bounds_check (operand, low, high, "lane out of range");
+ bounds_check (operand, low, high, exp, "lane");
}
/* Bounds-check constants. */
@@ -12818,7 +12825,7 @@ neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
void
neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
{
- bounds_check (operand, low, high, "constant out of range");
+ bounds_check (operand, low, high, NULL_TREE, "constant");
}
HOST_WIDE_INT
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index f55591d..f150b98 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -67,28 +67,28 @@ VAR8 (BINOP, vqshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
VAR8 (BINOP, vqshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
VAR8 (BINOP, vqrshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
VAR8 (BINOP, vqrshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vrshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vrshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR3 (GETLANE, vshrn_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vrshrn_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqshrns_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqshrnu_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqrshrns_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqrshrnu_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqshrun_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqrshrun_n, v8hi, v4si, v2di)
-VAR8 (GETLANE, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vqshl_s_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vqshl_u_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR3 (GETLANE, vshlls_n, v8qi, v4hi, v2si)
-VAR3 (GETLANE, vshllu_n, v8qi, v4hi, v2si)
-VAR8 (SETLANE, vsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (SETLANE, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (SETLANE, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (SETLANE, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vrshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vrshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vshrn_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vrshrn_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqshrns_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqshrnu_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqrshrns_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqrshrnu_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqshrun_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqrshrun_n, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vqshl_s_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vqshl_u_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vshlls_n, v8qi, v4hi, v2si)
+VAR3 (BINOP_IMM, vshllu_n, v8qi, v4hi, v2si)
+VAR8 (TERNOP_IMM, vsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
VAR2 (BINOP, vsub, v2sf, v4sf)
VAR3 (BINOP, vsubls, v8qi, v4hi, v2si)
VAR3 (BINOP, vsublu, v8qi, v4hi, v2si)
@@ -140,8 +140,8 @@ VAR6 (BINOP, vpadals, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
VAR6 (BINOP, vpadalu, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
VAR2 (BINOP, vrecps, v2sf, v4sf)
VAR2 (BINOP, vrsqrts, v2sf, v4sf)
-VAR8 (SETLANE, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (SETLANE, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
@@ -162,7 +162,7 @@ VAR10 (SETLANE, vset_lane,
VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di)
VAR10 (UNOP, vdup_n,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR10 (BINOP, vdup_lane,
+VAR10 (GETLANE, vdup_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di)
VAR5 (UNOP, vget_high, v16qi, v8hi, v4si, v4sf, v2di)
@@ -174,23 +174,23 @@ VAR3 (UNOP, vqmovun, v8hi, v4si, v2di)
VAR3 (UNOP, vmovls, v8qi, v4hi, v2si)
VAR3 (UNOP, vmovlu, v8qi, v4hi, v2si)
VAR6 (SETLANE, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR2 (LANEMAC, vmlals_lane, v4hi, v2si)
-VAR2 (LANEMAC, vmlalu_lane, v4hi, v2si)
-VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si)
-VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR2 (LANEMAC, vmlsls_lane, v4hi, v2si)
-VAR2 (LANEMAC, vmlslu_lane, v4hi, v2si)
-VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si)
+VAR6 (MAC_LANE, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR2 (MAC_LANE, vmlals_lane, v4hi, v2si)
+VAR2 (MAC_LANE, vmlalu_lane, v4hi, v2si)
+VAR2 (MAC_LANE, vqdmlal_lane, v4hi, v2si)
+VAR6 (MAC_LANE, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR2 (MAC_LANE, vmlsls_lane, v4hi, v2si)
+VAR2 (MAC_LANE, vmlslu_lane, v4hi, v2si)
+VAR2 (MAC_LANE, vqdmlsl_lane, v4hi, v2si)
VAR6 (BINOP, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR6 (LANEMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR2 (LANEMAC, vmlals_n, v4hi, v2si)
-VAR2 (LANEMAC, vmlalu_n, v4hi, v2si)
-VAR2 (LANEMAC, vqdmlal_n, v4hi, v2si)
-VAR6 (LANEMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR2 (LANEMAC, vmlsls_n, v4hi, v2si)
-VAR2 (LANEMAC, vmlslu_n, v4hi, v2si)
-VAR2 (LANEMAC, vqdmlsl_n, v4hi, v2si)
+VAR6 (MAC_N, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR2 (MAC_N, vmlals_n, v4hi, v2si)
+VAR2 (MAC_N, vmlalu_n, v4hi, v2si)
+VAR2 (MAC_N, vqdmlal_n, v4hi, v2si)
+VAR6 (MAC_N, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR2 (MAC_N, vmlsls_n, v4hi, v2si)
+VAR2 (MAC_N, vmlslu_n, v4hi, v2si)
+VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si)
VAR10 (SETLANE, vext,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 654d9d5..4af74ce 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2663,8 +2663,6 @@
(match_operand:SI 2 "immediate_operand" "")]
"TARGET_NEON"
{
- neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
-
if (BYTES_BIG_ENDIAN)
{
/* The intrinsics are defined in terms of a model where the
@@ -2694,8 +2692,6 @@
(match_operand:SI 2 "immediate_operand" "")]
"TARGET_NEON"
{
- neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
-
if (BYTES_BIG_ENDIAN)
{
/* The intrinsics are defined in terms of a model where the
@@ -2725,7 +2721,6 @@
(match_operand:SI 2 "immediate_operand" "")]
"TARGET_NEON"
{
- neon_lane_bounds (operands[2], 0, 1);
emit_move_insn (operands[0], operands[1]);
DONE;
})
@@ -2736,18 +2731,11 @@
(match_operand:SI 2 "immediate_operand" "")]
"TARGET_NEON"
{
- switch (INTVAL (operands[2]))
- {
- case 0:
- emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
- break;
- case 1:
- emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
- break;
- default:
- neon_lane_bounds (operands[2], 0, 1);
- FAIL;
- }
+ int lane = INTVAL (operands[2]);
+ gcc_assert ((lane ==0) || (lane == 1));
+ emit_move_insn (operands[0], lane == 0
+ ? gen_lowpart (DImode, operands[1])
+ : gen_highpart (DImode, operands[1]));
DONE;
})
@@ -2759,7 +2747,6 @@
"TARGET_NEON"
{
unsigned int elt = INTVAL (operands[3]);
- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
if (BYTES_BIG_ENDIAN)
{
@@ -2782,7 +2769,6 @@
(match_operand:SI 3 "immediate_operand" "i")]
"TARGET_NEON"
{
- neon_lane_bounds (operands[3], 0, 1);
emit_move_insn (operands[0], operands[1]);
DONE;
})
@@ -2864,7 +2850,6 @@
(match_operand:SI 2 "immediate_operand" "i")]
"TARGET_NEON"
{
- neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
if (BYTES_BIG_ENDIAN)
{
unsigned int elt = INTVAL (operands[2]);
@@ -2885,7 +2870,6 @@
(match_operand:SI 2 "immediate_operand" "i")]
"TARGET_NEON"
{
- neon_lane_bounds (operands[2], 0, 1);
emit_move_insn (operands[0], operands[1]);
DONE;
})
@@ -2897,7 +2881,6 @@
(match_operand:SI 2 "immediate_operand" "i")]
"TARGET_NEON"
{
- neon_lane_bounds (operands[2], 0, 1);
emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
DONE;
})
@@ -3097,7 +3080,6 @@
UNSPEC_VMUL_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
}
[(set (attr "type")
@@ -3115,7 +3097,6 @@
UNSPEC_VMUL_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
}
[(set (attr "type")
@@ -3133,7 +3114,6 @@
VMULL_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
}
[(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
@@ -3148,7 +3128,6 @@
UNSPEC_VQDMULL_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
}
[(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
@@ -3163,7 +3142,6 @@
VQDMULH_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
}
[(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
@@ -3178,7 +3156,6 @@
VQDMULH_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
}
[(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
@@ -3194,7 +3171,6 @@
UNSPEC_VMLA_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
}
[(set (attr "type")
@@ -3213,7 +3189,6 @@
UNSPEC_VMLA_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
}
[(set (attr "type")
@@ -3232,7 +3207,6 @@
VMLAL_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
}
[(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
@@ -3248,7 +3222,6 @@
UNSPEC_VQDMLAL_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
}
[(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
@@ -3264,7 +3237,6 @@
UNSPEC_VMLS_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
}
[(set (attr "type")
@@ -3283,7 +3255,6 @@
UNSPEC_VMLS_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
}
[(set (attr "type")
@@ -3302,7 +3273,6 @@
VMLSL_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
}
[(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
@@ -3318,7 +3288,6 @@
UNSPEC_VQDMLSL_LANE))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
}
[(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH 6/16][ARM] Remaining float16 intrinsics: vld..., vst..., vget_low/high, vcombine
2015-07-07 12:32 [PATCH 0/16][ARM/AArch64] Float16_t support, v2 Alan Lawrence
` (3 preceding siblings ...)
2015-07-07 12:34 ` [PATCH 3/16][ARM] Add float16x4_t intrinsics Alan Lawrence
@ 2015-07-07 12:35 ` Alan Lawrence
2015-07-07 12:35 ` [PATCH 7/16][AArch64] Add basic fp16 support Alan Lawrence
` (10 subsequent siblings)
15 siblings, 0 replies; 35+ messages in thread
From: Alan Lawrence @ 2015-07-07 12:35 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 65 bytes --]
As per https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01341.html
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 06_arm_iterators.patch --]
[-- Type: text/x-patch; name=06_arm_iterators.patch, Size: 48196 bytes --]
commit ae6264b144d25fadcbf219e68ddf3d8c5f40be34
Author: Alan Lawrence <alan.lawrence@arm.com>
Date: Thu Dec 11 11:53:59 2014 +0000
ARM 4/4 v2: v(ld|st)[234](q?|_lane|_dup), vcombine, vget_(low|high) (v2 w/ V_uf_sclr)
All are tied together with so many iterators!
Also vec_extract
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 17e39d8..1ee0a3d 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -241,6 +241,12 @@ typedef struct {
#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
VAR9 (T, N, A, B, C, D, E, F, G, H, I) \
VAR1 (T, N, J)
+#define VAR11(T, N, A, B, C, D, E, F, G, H, I, J, K) \
+ VAR10 (T, N, A, B, C, D, E, F, G, H, I, J) \
+ VAR1 (T, N, K)
+#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
+ VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \
+ VAR1 (T, N, L)
/* The NEON builtin data can be found in arm_neon_builtins.def.
The mode entries in the following table correspond to the "key" type of the
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index db73c70..93fb44f 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -162,6 +162,16 @@ typedef struct uint64x2x2_t
uint64x2_t val[2];
} uint64x2x2_t;
+typedef struct float16x4x2_t
+{
+ float16x4_t val[2];
+} float16x4x2_t;
+
+typedef struct float16x8x2_t
+{
+ float16x8_t val[2];
+} float16x8x2_t;
+
typedef struct float32x2x2_t
{
float32x2_t val[2];
@@ -288,6 +298,16 @@ typedef struct uint64x2x3_t
uint64x2_t val[3];
} uint64x2x3_t;
+typedef struct float16x4x3_t
+{
+ float16x4_t val[3];
+} float16x4x3_t;
+
+typedef struct float16x8x3_t
+{
+ float16x8_t val[3];
+} float16x8x3_t;
+
typedef struct float32x2x3_t
{
float32x2_t val[3];
@@ -414,6 +434,16 @@ typedef struct uint64x2x4_t
uint64x2_t val[4];
} uint64x2x4_t;
+typedef struct float16x4x4_t
+{
+ float16x4_t val[4];
+} float16x4x4_t;
+
+typedef struct float16x8x4_t
+{
+ float16x8_t val[4];
+} float16x8x4_t;
+
typedef struct float32x2x4_t
{
float32x2_t val[4];
@@ -6031,6 +6061,12 @@ vcombine_s64 (int64x1_t __a, int64x1_t __b)
return (int64x2_t)__builtin_neon_vcombinedi (__a, __b);
}
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcombine_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_neon_vcombinev4hf (__a, __b);
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcombine_f32 (float32x2_t __a, float32x2_t __b)
{
@@ -6105,6 +6141,12 @@ vget_high_s64 (int64x2_t __a)
return (int64x1_t)__builtin_neon_vget_highv2di (__a);
}
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vget_high_f16 (float16x8_t __a)
+{
+ return __builtin_neon_vget_highv8hf (__a);
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vget_high_f32 (float32x4_t __a)
{
@@ -6165,6 +6207,12 @@ vget_low_s32 (int32x4_t __a)
return (int32x2_t)__builtin_neon_vget_lowv4si (__a);
}
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vget_low_f16 (float16x8_t __a)
+{
+ return __builtin_neon_vget_lowv8hf (__a);
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vget_low_f32 (float32x4_t __a)
{
@@ -8712,6 +8760,12 @@ vld1_s64 (const int64_t * __a)
return (int64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a);
}
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vld1_f16 (const float16_t * __a)
+{
+ return __builtin_neon_vld1v4hf ((const __builtin_neon_hf *) __a);
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_f32 (const float32_t * __a)
{
@@ -8786,6 +8840,12 @@ vld1q_s64 (const int64_t * __a)
return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a);
}
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vld1q_f16 (const float16_t * __a)
+{
+ return __builtin_neon_vld1v8hf ((const __builtin_neon_hf *) __a);
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_f32 (const float32_t * __a)
{
@@ -9183,6 +9243,12 @@ vst1_s64 (int64_t * __a, int64x1_t __b)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f16 (float16_t * __a, float16x4_t __b)
+{
+ __builtin_neon_vst1v4hf ((__builtin_neon_hf *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_f32 (float32_t * __a, float32x2_t __b)
{
__builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b);
@@ -9257,6 +9323,12 @@ vst1q_s64 (int64_t * __a, int64x2_t __b)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f16 (float16_t * __a, float16x8_t __b)
+{
+ __builtin_neon_vst1v8hf ((__builtin_neon_hf *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_f32 (float32_t * __a, float32x4_t __b)
{
__builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b);
@@ -9317,6 +9389,12 @@ vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_f16 (float16_t * __a, float16x4_t __b, const int __c)
+{
+ __builtin_neon_vst1_lanev4hf ((__builtin_neon_hf *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c)
{
__builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c);
@@ -9391,6 +9469,12 @@ vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_f16 (float16_t * __a, float16x8_t __b, const int __c)
+{
+ __builtin_neon_vst1_lanev8hf ((__builtin_neon_hf *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c)
{
__builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c);
@@ -9470,6 +9554,14 @@ vld2_s32 (const int32_t * __a)
return __rv.__i;
}
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_f16 (const float16_t * __a)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+ __rv.__o = __builtin_neon_vld2v4hf ((const __builtin_neon_hf *) __a);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_f32 (const float32_t * __a)
{
@@ -9568,6 +9660,14 @@ vld2q_s32 (const int32_t * __a)
return __rv.__i;
}
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vld2q_f16 (const float16_t * __a)
+{
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld2v8hf ((const __builtin_neon_hf *) __a);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_f32 (const float32_t * __a)
{
@@ -9643,6 +9743,16 @@ vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c)
return __rv.__i;
}
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_lane_f16 (const float16_t * __a, float16x4x2_t __b, const int __c)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+ __rv.__o = __builtin_neon_vld2_lanev4hf ((const __builtin_neon_hf *) __a,
+ __bu.__o, __c);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c)
{
@@ -9715,6 +9825,16 @@ vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c)
return __rv.__i;
}
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vld2q_lane_f16 (const float16_t * __a, float16x8x2_t __b, const int __c)
+{
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld2_lanev8hf ((const __builtin_neon_hf *) __a,
+ __bu.__o, __c);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c)
{
@@ -9775,6 +9895,13 @@ vld2_dup_s32 (const int32_t * __a)
return __rv.__i;
}
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_f16 (const float16_t * __a)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+ __rv.__o = __builtin_neon_vld2_dupv4hf ((const __builtin_neon_hf *) __a);
+ return __rv.__i;
+}
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_dup_f32 (const float32_t * __a)
{
@@ -9871,6 +9998,13 @@ vst2_s32 (int32_t * __a, int32x2x2_t __b)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_f16 (float16_t * __a, float16x4x2_t __b)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+ __builtin_neon_vst2v4hf ((__builtin_neon_hf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst2_f32 (float32_t * __a, float32x2x2_t __b)
{
union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
@@ -9957,6 +10091,13 @@ vst2q_s32 (int32_t * __a, int32x4x2_t __b)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_f16 (float16_t * __a, float16x8x2_t __b)
+{
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst2v8hf ((__builtin_neon_hf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f32 (float32_t * __a, float32x4x2_t __b)
{
union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
@@ -10020,6 +10161,13 @@ vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_f16 (float16_t * __a, float16x4x2_t __b, const int __c)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+ __builtin_neon_vst2_lanev4hf ((__builtin_neon_hf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c)
{
union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
@@ -10076,6 +10224,13 @@ vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_f16 (float16_t * __a, float16x8x2_t __b, const int __c)
+{
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst2_lanev8hf ((__builtin_neon_hf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c)
{
union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
@@ -10127,6 +10282,14 @@ vld3_s32 (const int32_t * __a)
return __rv.__i;
}
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_f16 (const float16_t * __a)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+ __rv.__o = __builtin_neon_vld3v4hf ((const __builtin_neon_hf *) __a);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_f32 (const float32_t * __a)
{
@@ -10225,6 +10388,14 @@ vld3q_s32 (const int32_t * __a)
return __rv.__i;
}
+__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
+vld3q_f16 (const float16_t * __a)
+{
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+ __rv.__o = __builtin_neon_vld3v8hf ((const __builtin_neon_hf *) __a);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_f32 (const float32_t * __a)
{
@@ -10300,6 +10471,16 @@ vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c)
return __rv.__i;
}
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_lane_f16 (const float16_t * __a, float16x4x3_t __b, const int __c)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+ __rv.__o = __builtin_neon_vld3_lanev4hf ((const __builtin_neon_hf *) __a,
+ __bu.__o, __c);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c)
{
@@ -10372,6 +10553,16 @@ vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c)
return __rv.__i;
}
+__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
+vld3q_lane_f16 (const float16_t * __a, float16x8x3_t __b, const int __c)
+{
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+ __rv.__o = __builtin_neon_vld3_lanev8hf ((const __builtin_neon_hf *) __a,
+ __bu.__o, __c);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c)
{
@@ -10432,6 +10623,14 @@ vld3_dup_s32 (const int32_t * __a)
return __rv.__i;
}
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_f16 (const float16_t * __a)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+ __rv.__o = __builtin_neon_vld3_dupv4hf ((const __builtin_neon_hf *) __a);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_dup_f32 (const float32_t * __a)
{
@@ -10528,6 +10727,13 @@ vst3_s32 (int32_t * __a, int32x2x3_t __b)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_f16 (float16_t * __a, float16x4x3_t __b)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+ __builtin_neon_vst3v4hf ((__builtin_neon_hf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst3_f32 (float32_t * __a, float32x2x3_t __b)
{
union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
@@ -10614,6 +10820,13 @@ vst3q_s32 (int32_t * __a, int32x4x3_t __b)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_f16 (float16_t * __a, float16x8x3_t __b)
+{
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+ __builtin_neon_vst3v8hf ((__builtin_neon_hf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f32 (float32_t * __a, float32x4x3_t __b)
{
union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
@@ -10677,6 +10890,13 @@ vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_f16 (float16_t * __a, float16x4x3_t __b, const int __c)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+ __builtin_neon_vst3_lanev4hf ((__builtin_neon_hf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c)
{
union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
@@ -10733,6 +10953,13 @@ vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_f16 (float16_t * __a, float16x8x3_t __b, const int __c)
+{
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+ __builtin_neon_vst3_lanev8hf ((__builtin_neon_hf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c)
{
union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
@@ -10784,6 +11011,14 @@ vld4_s32 (const int32_t * __a)
return __rv.__i;
}
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_f16 (const float16_t * __a)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4v4hf ((const __builtin_neon_hf *) __a);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_f32 (const float32_t * __a)
{
@@ -10882,6 +11117,14 @@ vld4q_s32 (const int32_t * __a)
return __rv.__i;
}
+__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
+vld4q_f16 (const float16_t * __a)
+{
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4v8hf ((const __builtin_neon_hf *) __a);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_f32 (const float32_t * __a)
{
@@ -10957,6 +11200,16 @@ vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c)
return __rv.__i;
}
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_lane_f16 (const float16_t * __a, float16x4x4_t __b, const int __c)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4_lanev4hf ((const __builtin_neon_hf *) __a,
+ __bu.__o, __c);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c)
{
@@ -11029,6 +11282,16 @@ vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c)
return __rv.__i;
}
+__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
+vld4q_lane_f16 (const float16_t * __a, float16x8x4_t __b, const int __c)
+{
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4_lanev8hf ((const __builtin_neon_hf *) __a,
+ __bu.__o, __c);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c)
{
@@ -11089,6 +11352,14 @@ vld4_dup_s32 (const int32_t * __a)
return __rv.__i;
}
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_f16 (const float16_t * __a)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4_dupv4hf ((const __builtin_neon_hf *) __a);
+ return __rv.__i;
+}
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_dup_f32 (const float32_t * __a)
{
@@ -11185,6 +11456,13 @@ vst4_s32 (int32_t * __a, int32x2x4_t __b)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_f16 (float16_t * __a, float16x4x4_t __b)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst4v4hf ((__builtin_neon_hf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst4_f32 (float32_t * __a, float32x2x4_t __b)
{
union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
@@ -11271,6 +11549,13 @@ vst4q_s32 (int32_t * __a, int32x4x4_t __b)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_f16 (float16_t * __a, float16x8x4_t __b)
+{
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+ __builtin_neon_vst4v8hf ((__builtin_neon_hf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f32 (float32_t * __a, float32x4x4_t __b)
{
union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
@@ -11334,6 +11619,13 @@ vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_f16 (float16_t * __a, float16x4x4_t __b, const int __c)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst4_lanev4hf ((__builtin_neon_hf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c)
{
union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
@@ -11390,6 +11682,13 @@ vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_f16 (float16_t * __a, float16x8x4_t __b, const int __c)
+{
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+ __builtin_neon_vst4_lanev8hf ((__builtin_neon_hf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c)
{
union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index f150b98..0b719df 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -164,9 +164,9 @@ VAR10 (UNOP, vdup_n,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR10 (GETLANE, vdup_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di)
-VAR5 (UNOP, vget_high, v16qi, v8hi, v4si, v4sf, v2di)
-VAR5 (UNOP, vget_low, v16qi, v8hi, v4si, v4sf, v2di)
+VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di)
+VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
+VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR3 (UNOP, vmovn, v8hi, v4si, v2di)
VAR3 (UNOP, vqmovns, v8hi, v4si, v2di)
VAR3 (UNOP, vqmovnu, v8hi, v4si, v2di)
@@ -242,40 +242,40 @@ VAR6 (UNOP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di, ti)
VAR6 (UNOP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di, ti)
VAR6 (UNOP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di, ti)
VAR6 (UNOP, vreinterpretti, v16qi, v8hi, v4si, v4sf, v2di, ti)
-VAR10 (LOAD1, vld1,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR12 (LOAD1, vld1,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR10 (LOAD1LANE, vld1_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR10 (LOAD1, vld1_dup,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR10 (STORE1, vst1,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR10 (STORE1LANE, vst1_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR9 (LOAD1, vld2,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (LOAD1LANE, vld2_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR5 (LOAD1, vld2_dup, v8qi, v4hi, v2si, v2sf, di)
-VAR9 (STORE1, vst2,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (STORE1LANE, vst2_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR9 (LOAD1, vld3,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (LOAD1LANE, vld3_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR5 (LOAD1, vld3_dup, v8qi, v4hi, v2si, v2sf, di)
-VAR9 (STORE1, vst3,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (STORE1LANE, vst3_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR9 (LOAD1, vld4,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (LOAD1LANE, vld4_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR5 (LOAD1, vld4_dup, v8qi, v4hi, v2si, v2sf, di)
-VAR9 (STORE1, vst4,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (STORE1LANE, vst4_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR12 (STORE1, vst1,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
+VAR12 (STORE1LANE, vst1_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
+VAR11 (LOAD1, vld2,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (LOAD1LANE, vld2_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR6 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
+VAR11 (STORE1, vst2,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (STORE1LANE, vst2_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR11 (LOAD1, vld3,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (LOAD1LANE, vld3_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR6 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
+VAR11 (STORE1, vst3,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (STORE1LANE, vst3_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR11 (LOAD1, vld4,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (LOAD1LANE, vld4_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR6 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
+VAR11 (STORE1, vst4,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (STORE1LANE, vst4_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 1e7f3f1..47cc1ee 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -65,20 +65,32 @@
;; Integer modes supported by Neon and IWMMXT, except V2DI
(define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI])
-;; Double-width vector modes.
+;; Double-width vector modes, on which we support arithmetic (no HF!)
(define_mode_iterator VD [V8QI V4HI V2SI V2SF])
+;; Double-width vector modes plus 64-bit elements for vreinterpret + vcreate.
+(define_mode_iterator VD_RE [V8QI V4HI V2SI V2SF DI])
+
;; Double-width vector modes plus 64-bit elements.
-(define_mode_iterator VDX [V8QI V4HI V2SI V2SF DI])
+(define_mode_iterator VDX [V8QI V4HI V4HF V2SI V2SF DI])
+
+;; Double-width vector modes, with V4HF - for vldN_lane and vstN_lane.
+(define_mode_iterator VD_LANE [V8QI V4HI V4HF V2SI V2SF])
;; Double-width vector modes without floating-point elements.
(define_mode_iterator VDI [V8QI V4HI V2SI])
-;; Quad-width vector modes.
+;; Quad-width vector modes supporting arithmetic (no HF!).
(define_mode_iterator VQ [V16QI V8HI V4SI V4SF])
+;; Quad-width vector modes, including V8HF.
+(define_mode_iterator VQ2 [V16QI V8HI V8HF V4SI V4SF])
+
+;; Quad-width vector modes with 16- or 32-bit elements
+(define_mode_iterator VQ_HS [V8HI V8HF V4SI V4SF])
+
;; Quad-width vector modes plus 64-bit elements.
-(define_mode_iterator VQX [V16QI V8HI V4SI V4SF V2DI])
+(define_mode_iterator VQX [V16QI V8HI V8HF V4SI V4SF V2DI])
;; Quad-width vector modes without floating-point elements.
(define_mode_iterator VQI [V16QI V8HI V4SI])
@@ -111,7 +123,8 @@
(define_mode_iterator VDQI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
;; Vector modes, including 64-bit integer elements.
-(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF DI V2DI])
+(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI
+ V4HF V8HF V2SF V4SF DI V2DI])
;; Vector modes including 64-bit integer elements, but no floats.
(define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
@@ -366,7 +379,8 @@
;; Define element mode for each vector mode.
(define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
- (V4HI "HI") (V8HI "HI")
+ (V4HI "HI") (V8HI "HI")
+ (V4HF "HF") (V8HF "HF")
(V2SI "SI") (V4SI "SI")
(V2SF "SF") (V4SF "SF")
(DI "DI") (V2DI "DI")])
@@ -383,6 +397,7 @@
;; size for structure lane/dup loads and stores.
(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI")
(V4HI "SI") (V8HI "SI")
+ (V4HF "SF") (V8HF "SF")
(V2SI "V2SI") (V4SI "V2SI")
(V2SF "V2SF") (V4SF "V2SF")
(DI "V2DI") (V2DI "V2DI")])
@@ -390,6 +405,7 @@
;; Similar, for three elements.
(define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK")
(V4HI "BLK") (V8HI "BLK")
+ (V4HF "BLK") (V8HF "BLK")
(V2SI "BLK") (V4SI "BLK")
(V2SF "BLK") (V4SF "BLK")
(DI "EI") (V2DI "EI")])
@@ -397,6 +413,7 @@
;; Similar, for four elements.
(define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI")
(V4HI "V4HI") (V8HI "V4HI")
+ (V4HF "V4HF") (V8HF "V4HF")
(V2SI "V4SI") (V4SI "V4SI")
(V2SF "V4SF") (V4SF "V4SF")
(DI "OI") (V2DI "OI")])
@@ -421,7 +438,8 @@
;; Modes with half the number of equal-sized elements.
(define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI")
- (V4SI "V2SI") (V4SF "V2SF") (V2DF "DF")
+ (V8HF "V4HF") (V4SI "V2SI")
+ (V4SF "V2SF") (V2DF "DF")
(V2DI "DI")])
;; Same, but lower-case.
@@ -431,8 +449,9 @@
;; Modes with twice the number of equal-sized elements.
(define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI")
- (V2SI "V4SI") (V2SF "V4SF") (DF "V2DF")
- (DI "V2DI")])
+ (V2SI "V4SI") (V4HF "V8HF")
+ (V2SF "V4SF") (DF "V2DF")
+ (DI "V2DI")])
;; Same, but lower-case.
(define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi")
@@ -454,8 +473,9 @@
;; Mode of result of comparison operations (and bit-select operand 1).
(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
- (V4HI "V4HI") (V8HI "V8HI")
+ (V4HI "V4HI") (V8HI "V8HI")
(V2SI "V2SI") (V4SI "V4SI")
+ (V4HF "V4HI") (V8HF "V8HI")
(V2SF "V2SI") (V4SF "V4SI")
(DI "DI") (V2DI "V2DI")])
@@ -492,12 +512,14 @@
(define_mode_attr V_uf_sclr [(V8QI "u8") (V16QI "u8")
(V4HI "u16") (V8HI "u16")
(V2SI "32") (V4SI "32")
+ (V4HF "u16") (V8HF "u16")
(V2SF "32") (V4SF "32")])
(define_mode_attr V_sz_elem [(V8QI "8") (V16QI "8")
(V4HI "16") (V8HI "16")
(V2SI "32") (V4SI "32")
(DI "64") (V2DI "64")
+ (V4HF "16") (V8HF "16")
(V2SF "32") (V4SF "32")])
(define_mode_attr V_elem_ch [(V8QI "b") (V16QI "b")
@@ -564,6 +586,7 @@
(DI "true") (V2DI "false")])
(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
+ (V4HF "4") (V8HF "8")
(V4HI "4") (V8HI "8")
(V2SI "2") (V4SI "4")
(V2SF "2") (V4SF "4")
@@ -607,6 +630,7 @@
(define_mode_attr q [(V8QI "") (V16QI "_q")
(V4HI "") (V8HI "_q")
(V2SI "") (V4SI "_q")
+ (V4HF "") (V8HF "_q")
(V2SF "") (V4SF "_q")
(DI "") (V2DI "_q")
(DF "") (V2DF "_q")])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 4af74ce..f8d6e74 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -320,11 +320,11 @@
[(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
(define_insn "vec_set<mode>_internal"
- [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
- (vec_merge:VQ
- (vec_duplicate:VQ
+ [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
+ (vec_merge:VQ2
+ (vec_duplicate:VQ2
(match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
- (match_operand:VQ 3 "s_register_operand" "0,0")
+ (match_operand:VQ2 3 "s_register_operand" "0,0")
(match_operand:SI 2 "immediate_operand" "i,i")))]
"TARGET_NEON"
{
@@ -407,7 +407,7 @@
(define_insn "vec_extract<mode>"
[(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
(vec_select:<V_elem>
- (match_operand:VQ 1 "s_register_operand" "w,w")
+ (match_operand:VQ2 1 "s_register_operand" "w,w")
(parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
"TARGET_NEON"
{
@@ -2607,7 +2607,7 @@
[(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extend:SI
(vec_select:<V_elem>
- (match_operand:VQ 1 "s_register_operand" "w")
+ (match_operand:VQ2 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_NEON"
{
@@ -2634,7 +2634,7 @@
[(set (match_operand:SI 0 "s_register_operand" "=r")
(zero_extend:SI
(vec_select:<V_elem>
- (match_operand:VQ 1 "s_register_operand" "w")
+ (match_operand:VQ2 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_NEON"
{
@@ -2774,7 +2774,7 @@
})
(define_expand "neon_vcreate<mode>"
- [(match_operand:VDX 0 "s_register_operand" "")
+ [(match_operand:VD_RE 0 "s_register_operand" "")
(match_operand:DI 1 "general_operand" "")]
"TARGET_NEON"
{
@@ -4125,7 +4125,7 @@
(define_expand "neon_vreinterpretv8qi<mode>"
[(match_operand:V8QI 0 "s_register_operand" "")
- (match_operand:VDX 1 "s_register_operand" "")]
+ (match_operand:VD_RE 1 "s_register_operand" "")]
"TARGET_NEON"
{
neon_reinterpret (operands[0], operands[1]);
@@ -4134,7 +4134,7 @@
(define_expand "neon_vreinterpretv4hi<mode>"
[(match_operand:V4HI 0 "s_register_operand" "")
- (match_operand:VDX 1 "s_register_operand" "")]
+ (match_operand:VD_RE 1 "s_register_operand" "")]
"TARGET_NEON"
{
neon_reinterpret (operands[0], operands[1]);
@@ -4143,7 +4143,7 @@
(define_expand "neon_vreinterpretv2si<mode>"
[(match_operand:V2SI 0 "s_register_operand" "")
- (match_operand:VDX 1 "s_register_operand" "")]
+ (match_operand:VD_RE 1 "s_register_operand" "")]
"TARGET_NEON"
{
neon_reinterpret (operands[0], operands[1]);
@@ -4152,7 +4152,7 @@
(define_expand "neon_vreinterpretv2sf<mode>"
[(match_operand:V2SF 0 "s_register_operand" "")
- (match_operand:VDX 1 "s_register_operand" "")]
+ (match_operand:VD_RE 1 "s_register_operand" "")]
"TARGET_NEON"
{
neon_reinterpret (operands[0], operands[1]);
@@ -4161,7 +4161,7 @@
(define_expand "neon_vreinterpretdi<mode>"
[(match_operand:DI 0 "s_register_operand" "")
- (match_operand:VDX 1 "s_register_operand" "")]
+ (match_operand:VD_RE 1 "s_register_operand" "")]
"TARGET_NEON"
{
neon_reinterpret (operands[0], operands[1]);
@@ -4420,14 +4420,14 @@
(define_expand "vec_load_lanesoi<mode>"
[(set (match_operand:OI 0 "s_register_operand")
(unspec:OI [(match_operand:OI 1 "neon_struct_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2))]
"TARGET_NEON")
(define_insn "neon_vld2<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
(unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2))]
"TARGET_NEON"
"vld2.<V_sz_elem>\t%h0, %A1"
@@ -4438,7 +4438,7 @@
(unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
(match_operand:TI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
- (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2_LANE))]
"TARGET_NEON"
{
@@ -4463,7 +4463,7 @@
(unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
(match_operand:OI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
- (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2_LANE))]
"TARGET_NEON"
{
@@ -4534,14 +4534,14 @@
(define_expand "vec_store_lanesoi<mode>"
[(set (match_operand:OI 0 "neon_struct_operand")
(unspec:OI [(match_operand:OI 1 "s_register_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST2))]
"TARGET_NEON")
(define_insn "neon_vst2<mode>"
[(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST2))]
"TARGET_NEON"
"vst2.<V_sz_elem>\t%h1, %A0"
@@ -4553,7 +4553,7 @@
(unspec:<V_two_elem>
[(match_operand:TI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
- (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST2_LANE))]
"TARGET_NEON"
{
@@ -4578,7 +4578,7 @@
(unspec:<V_two_elem>
[(match_operand:OI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
- (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST2_LANE))]
"TARGET_NEON"
{
@@ -4631,7 +4631,7 @@
(define_expand "vec_load_lanesci<mode>"
[(match_operand:CI 0 "s_register_operand")
(match_operand:CI 1 "neon_struct_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
@@ -4641,7 +4641,7 @@
(define_expand "neon_vld3<mode>"
[(match_operand:CI 0 "s_register_operand")
(match_operand:CI 1 "neon_struct_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
rtx mem;
@@ -4656,7 +4656,7 @@
(define_insn "neon_vld3qa<mode>"
[(set (match_operand:CI 0 "s_register_operand" "=w")
(unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3A))]
"TARGET_NEON"
{
@@ -4676,7 +4676,7 @@
[(set (match_operand:CI 0 "s_register_operand" "=w")
(unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
(match_operand:CI 2 "s_register_operand" "0")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3B))]
"TARGET_NEON"
{
@@ -4697,7 +4697,7 @@
(unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
(match_operand:EI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
- (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3_LANE))]
"TARGET_NEON"
{
@@ -4724,7 +4724,7 @@
(unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
(match_operand:CI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
- (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3_LANE))]
"TARGET_NEON"
{
@@ -4804,7 +4804,7 @@
(define_expand "vec_store_lanesci<mode>"
[(match_operand:CI 0 "neon_struct_operand")
(match_operand:CI 1 "s_register_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
@@ -4814,7 +4814,7 @@
(define_expand "neon_vst3<mode>"
[(match_operand:CI 0 "neon_struct_operand")
(match_operand:CI 1 "s_register_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
rtx mem;
@@ -4829,7 +4829,7 @@
(define_insn "neon_vst3qa<mode>"
[(set (match_operand:EI 0 "neon_struct_operand" "=Um")
(unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST3A))]
"TARGET_NEON"
{
@@ -4848,7 +4848,7 @@
(define_insn "neon_vst3qb<mode>"
[(set (match_operand:EI 0 "neon_struct_operand" "=Um")
(unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST3B))]
"TARGET_NEON"
{
@@ -4869,7 +4869,7 @@
(unspec:<V_three_elem>
[(match_operand:EI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
- (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST3_LANE))]
"TARGET_NEON"
{
@@ -4896,7 +4896,7 @@
(unspec:<V_three_elem>
[(match_operand:CI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
- (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST3_LANE))]
"TARGET_NEON"
{
@@ -4951,7 +4951,7 @@
(define_expand "vec_load_lanesxi<mode>"
[(match_operand:XI 0 "s_register_operand")
(match_operand:XI 1 "neon_struct_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
@@ -4961,7 +4961,7 @@
(define_expand "neon_vld4<mode>"
[(match_operand:XI 0 "s_register_operand")
(match_operand:XI 1 "neon_struct_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
rtx mem;
@@ -4976,7 +4976,7 @@
(define_insn "neon_vld4qa<mode>"
[(set (match_operand:XI 0 "s_register_operand" "=w")
(unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4A))]
"TARGET_NEON"
{
@@ -4997,7 +4997,7 @@
[(set (match_operand:XI 0 "s_register_operand" "=w")
(unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
(match_operand:XI 2 "s_register_operand" "0")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4B))]
"TARGET_NEON"
{
@@ -5019,7 +5019,7 @@
(unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
(match_operand:OI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
- (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4_LANE))]
"TARGET_NEON"
{
@@ -5047,7 +5047,7 @@
(unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
(match_operand:XI 2 "s_register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")
- (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4_LANE))]
"TARGET_NEON"
{
@@ -5132,7 +5132,7 @@
(define_expand "vec_store_lanesxi<mode>"
[(match_operand:XI 0 "neon_struct_operand")
(match_operand:XI 1 "s_register_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
@@ -5142,7 +5142,7 @@
(define_expand "neon_vst4<mode>"
[(match_operand:XI 0 "neon_struct_operand")
(match_operand:XI 1 "s_register_operand")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
rtx mem;
@@ -5157,7 +5157,7 @@
(define_insn "neon_vst4qa<mode>"
[(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST4A))]
"TARGET_NEON"
{
@@ -5177,7 +5177,7 @@
(define_insn "neon_vst4qb<mode>"
[(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
- (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST4B))]
"TARGET_NEON"
{
@@ -5199,7 +5199,7 @@
(unspec:<V_four_elem>
[(match_operand:OI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
- (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST4_LANE))]
"TARGET_NEON"
{
@@ -5227,7 +5227,7 @@
(unspec:<V_four_elem>
[(match_operand:XI 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")
- (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST4_LANE))]
"TARGET_NEON"
{
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH 10/16][AArch64] vld{2,3,4}{,_lane,_dup},vcombine,vcreate
2015-07-07 12:32 [PATCH 0/16][ARM/AArch64] Float16_t support, v2 Alan Lawrence
` (9 preceding siblings ...)
2015-07-07 12:36 ` [PATCH 9/16][AArch64] Add support for float16x{4,8}_t vectors/builtins Alan Lawrence
@ 2015-07-07 12:36 ` Alan Lawrence
2015-07-07 12:37 ` [PATCH 13/16][AArch64] Add vcvt(_high)?_f32_f16 intrinsics, with BE RTL fix Alan Lawrence
` (4 subsequent siblings)
15 siblings, 0 replies; 35+ messages in thread
From: Alan Lawrence @ 2015-07-07 12:36 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 65 bytes --]
As per https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01342.html
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 10_aarch64_vcreate_et_al.patch --]
[-- Type: text/x-patch; name=10_aarch64_vcreate_et_al.patch, Size: 30201 bytes --]
commit ef719e5d3d6eccc5cf621851283b7c0ba1a9ee6c
Author: Alan Lawrence <alan.lawrence@arm.com>
Date: Tue Aug 5 17:52:28 2014 +0100
AArch64 3/N: v(create|combine|v(ld|st|ld...dup/lane|st...lane)[234](q?))_f16; tests vldN{,_lane,_dup} inc bigendian. Add __builtin_aarch64_simd_hf.
Fix some casts, to ..._hf not ..._sf !
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index a6c3377..5367ba6 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -300,6 +300,12 @@ aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
VAR1 (T, N, MAP, L)
+#define VAR13(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
+ VAR12 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
+ VAR1 (T, N, MAP, M)
+#define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
+ VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
+ VAR1 (T, X, MAP, N)
#include "aarch64-builtin-iterators.h"
@@ -377,6 +383,7 @@ const char *aarch64_scalar_builtin_types[] = {
"__builtin_aarch64_simd_qi",
"__builtin_aarch64_simd_hi",
"__builtin_aarch64_simd_si",
+ "__builtin_aarch64_simd_hf",
"__builtin_aarch64_simd_sf",
"__builtin_aarch64_simd_di",
"__builtin_aarch64_simd_df",
@@ -664,6 +671,8 @@ aarch64_init_simd_builtin_scalar_types (void)
"__builtin_aarch64_simd_qi");
(*lang_hooks.types.register_builtin_type) (intHI_type_node,
"__builtin_aarch64_simd_hi");
+ (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
+ "__builtin_aarch64_simd_hf");
(*lang_hooks.types.register_builtin_type) (intSI_type_node,
"__builtin_aarch64_simd_si");
(*lang_hooks.types.register_builtin_type) (float_type_node,
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ccf063a..bbf5230 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1063,6 +1063,9 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
case V2SImode:
gen = gen_aarch64_simd_combinev2si;
break;
+ case V4HFmode:
+ gen = gen_aarch64_simd_combinev4hf;
+ break;
case V2SFmode:
gen = gen_aarch64_simd_combinev2sf;
break;
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 7425485..d61e619 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -153,6 +153,16 @@ typedef struct uint64x2x2_t
uint64x2_t val[2];
} uint64x2x2_t;
+typedef struct float16x4x2_t
+{
+ float16x4_t val[2];
+} float16x4x2_t;
+
+typedef struct float16x8x2_t
+{
+ float16x8_t val[2];
+} float16x8x2_t;
+
typedef struct float32x2x2_t
{
float32x2_t val[2];
@@ -273,6 +283,16 @@ typedef struct uint64x2x3_t
uint64x2_t val[3];
} uint64x2x3_t;
+typedef struct float16x4x3_t
+{
+ float16x4_t val[3];
+} float16x4x3_t;
+
+typedef struct float16x8x3_t
+{
+ float16x8_t val[3];
+} float16x8x3_t;
+
typedef struct float32x2x3_t
{
float32x2_t val[3];
@@ -393,6 +413,16 @@ typedef struct uint64x2x4_t
uint64x2_t val[4];
} uint64x2x4_t;
+typedef struct float16x4x4_t
+{
+ float16x4_t val[4];
+} float16x4x4_t;
+
+typedef struct float16x8x4_t
+{
+ float16x8_t val[4];
+} float16x8x4_t;
+
typedef struct float32x2x4_t
{
float32x2_t val[4];
@@ -2644,6 +2674,12 @@ vcreate_s64 (uint64_t __a)
return (int64x1_t) {__a};
}
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcreate_f16 (uint64_t __a)
+{
+ return (float16x4_t) __a;
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcreate_f32 (uint64_t __a)
{
@@ -4780,6 +4816,12 @@ vcombine_s64 (int64x1_t __a, int64x1_t __b)
return __builtin_aarch64_combinedi (__a[0], __b[0]);
}
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcombine_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_combinev4hf (__a, __b);
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcombine_f32 (float32x2_t __a, float32x2_t __b)
{
@@ -9908,7 +9950,7 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
+------+----+----+----+----+
|uint | Y | Y | N | N |
+------+----+----+----+----+
- |float | - | - | N | N |
+ |float | - | Y | N | N |
+------+----+----+----+----+
|poly | Y | Y | - | - |
+------+----+----+----+----+
@@ -9922,7 +9964,7 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
+------+----+----+----+----+
|uint | Y | Y | Y | Y |
+------+----+----+----+----+
- |float | - | - | Y | Y |
+ |float | - | Y | Y | Y |
+------+----+----+----+----+
|poly | Y | Y | - | - |
+------+----+----+----+----+
@@ -9936,7 +9978,7 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
+------+----+----+----+----+
|uint | Y | N | N | Y |
+------+----+----+----+----+
- |float | - | - | N | Y |
+ |float | - | N | N | Y |
+------+----+----+----+----+
|poly | Y | N | - | - |
+------+----+----+----+----+
@@ -9952,6 +9994,7 @@ __STRUCTN (int, 8, 2)
__STRUCTN (int, 16, 2)
__STRUCTN (uint, 8, 2)
__STRUCTN (uint, 16, 2)
+__STRUCTN (float, 16, 2)
__STRUCTN (poly, 8, 2)
__STRUCTN (poly, 16, 2)
/* 3-element structs. */
@@ -9963,6 +10006,7 @@ __STRUCTN (uint, 8, 3)
__STRUCTN (uint, 16, 3)
__STRUCTN (uint, 32, 3)
__STRUCTN (uint, 64, 3)
+__STRUCTN (float, 16, 3)
__STRUCTN (float, 32, 3)
__STRUCTN (float, 64, 3)
__STRUCTN (poly, 8, 3)
@@ -10000,6 +10044,8 @@ vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __o, __c); \
}
+__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v8hf, hf, f16,
+ float16x8_t)
__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
float32x4_t)
__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
@@ -10032,6 +10078,7 @@ vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __temp.__o, __c); \
}
+__ST2_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16)
__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
@@ -10073,6 +10120,8 @@ vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __o, __c); \
}
+__ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v8hf, hf, f16,
+ float16x8_t)
__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
float32x4_t)
__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
@@ -10105,6 +10154,7 @@ vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __temp.__o, __c); \
}
+__ST3_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16)
__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
@@ -10151,6 +10201,8 @@ vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __o, __c); \
}
+__ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v8hf, hf, f16,
+ float16x8_t)
__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
float32x4_t)
__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
@@ -10183,6 +10235,7 @@ vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __temp.__o, __c); \
}
+__ST4_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16)
__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
@@ -15239,6 +15292,17 @@ vld2_u32 (const uint32_t * __a)
return ret;
}
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_f16 (const float16_t * __a)
+{
+ float16x4x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2v4hf (__a);
+ ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_dregoiv4hf (__o, 1);
+ return ret;
+}
+
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_f32 (const float32_t * __a)
{
@@ -15360,6 +15424,17 @@ vld2q_u64 (const uint64_t * __a)
return ret;
}
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vld2q_f16 (const float16_t * __a)
+{
+ float16x8x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2v8hf (__a);
+ ret.val[0] = __builtin_aarch64_get_qregoiv8hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
+ return ret;
+}
+
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_f32 (const float32_t * __a)
{
@@ -15514,6 +15589,18 @@ vld3_u32 (const uint32_t * __a)
return ret;
}
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_f16 (const float16_t * __a)
+{
+ float16x4x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3v4hf (__a);
+ ret.val[0] = __builtin_aarch64_get_dregciv4hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_dregciv4hf (__o, 1);
+ ret.val[2] = __builtin_aarch64_get_dregciv4hf (__o, 2);
+ return ret;
+}
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_f32 (const float32_t * __a)
{
@@ -15646,6 +15733,18 @@ vld3q_u64 (const uint64_t * __a)
return ret;
}
+__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
+vld3q_f16 (const float16_t * __a)
+{
+ float16x8x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3v8hf (__a);
+ ret.val[0] = __builtin_aarch64_get_qregciv8hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_qregciv8hf (__o, 1);
+ ret.val[2] = __builtin_aarch64_get_qregciv8hf (__o, 2);
+ return ret;
+}
+
__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_f32 (const float32_t * __a)
{
@@ -15813,6 +15912,19 @@ vld4_u32 (const uint32_t * __a)
return ret;
}
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_f16 (const float16_t * __a)
+{
+ float16x4x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4v4hf (__a);
+ ret.val[0] = __builtin_aarch64_get_dregxiv4hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_dregxiv4hf (__o, 1);
+ ret.val[2] = __builtin_aarch64_get_dregxiv4hf (__o, 2);
+ ret.val[3] = __builtin_aarch64_get_dregxiv4hf (__o, 3);
+ return ret;
+}
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_f32 (const float32_t * __a)
{
@@ -15956,6 +16068,19 @@ vld4q_u64 (const uint64_t * __a)
return ret;
}
+__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
+vld4q_f16 (const float16_t * __a)
+{
+ float16x8x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4v8hf (__a);
+ ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
+ ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
+ ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
+ return ret;
+}
+
__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_f32 (const float32_t * __a)
{
@@ -16017,6 +16142,18 @@ vld2_dup_s32 (const int32_t * __a)
return ret;
}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_f16 (const float16_t * __a)
+{
+ float16x4x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
+ ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
+ ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
+ return ret;
+}
+
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_dup_f32 (const float32_t * __a)
{
@@ -16226,6 +16363,17 @@ vld2q_dup_u64 (const uint64_t * __a)
return ret;
}
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vld2q_dup_f16 (const float16_t * __a)
+{
+ float16x8x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a);
+ ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
+ ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
+ return ret;
+}
+
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_dup_f32 (const float32_t * __a)
{
@@ -16380,6 +16528,18 @@ vld3_dup_u32 (const uint32_t * __a)
return ret;
}
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_f16 (const float16_t * __a)
+{
+ float16x4x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a);
+ ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0);
+ ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1);
+ ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2);
+ return ret;
+}
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_dup_f32 (const float32_t * __a)
{
@@ -16512,6 +16672,18 @@ vld3q_dup_u64 (const uint64_t * __a)
return ret;
}
+__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
+vld3q_dup_f16 (const float16_t * __a)
+{
+ float16x8x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a);
+ ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0);
+ ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1);
+ ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2);
+ return ret;
+}
+
__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_dup_f32 (const float32_t * __a)
{
@@ -16679,6 +16851,19 @@ vld4_dup_u32 (const uint32_t * __a)
return ret;
}
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_f16 (const float16_t * __a)
+{
+ float16x4x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a);
+ ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 0);
+ ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 1);
+ ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 2);
+ ret.val[3] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 3);
+ return ret;
+}
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_dup_f32 (const float32_t * __a)
{
@@ -16822,6 +17007,19 @@ vld4q_dup_u64 (const uint64_t * __a)
return ret;
}
+__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
+vld4q_dup_f16 (const float16_t * __a)
+{
+ float16x8x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a);
+ ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 0);
+ ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 1);
+ ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 2);
+ ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 3);
+ return ret;
+}
+
__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_dup_f32 (const float32_t * __a)
{
@@ -16874,6 +17072,8 @@ vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return __b; \
}
+__LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v8hf,
+ hf, f16, float16x8_t)
__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v4sf,
sf, f32, float32x4_t)
__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, v2df,
@@ -16918,6 +17118,7 @@ vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
+__LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
@@ -16965,6 +17166,8 @@ vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return __b; \
}
+__LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v8hf,
+ hf, f16, float16x8_t)
__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v4sf,
sf, f32, float32x4_t)
__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, v2df,
@@ -17011,6 +17214,7 @@ vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
+__LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
@@ -17066,6 +17270,8 @@ vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
/* vld4q_lane */
+__LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v8hf,
+ hf, f16, float16x8_t)
__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v4sf,
sf, f32, float32x4_t)
__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, v2df,
@@ -17114,6 +17320,7 @@ vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
+__LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
@@ -22474,6 +22681,18 @@ vst2_u32 (uint32_t * __a, uint32x2x2_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_f16 (float16_t * __a, float16x4x2_t val)
+{
+ __builtin_aarch64_simd_oi __o;
+ float16x8x2_t temp;
+ temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1);
+ __builtin_aarch64_st2v4hf (__a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst2_f32 (float32_t * __a, float32x2x2_t val)
{
__builtin_aarch64_simd_oi __o;
@@ -22576,6 +22795,15 @@ vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_f16 (float16_t * __a, float16x8x2_t val)
+{
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1);
+ __builtin_aarch64_st2v8hf (__a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f32 (float32_t * __a, float32x4x2_t val)
{
__builtin_aarch64_simd_oi __o;
@@ -22748,6 +22976,20 @@ vst3_u32 (uint32_t * __a, uint32x2x3_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_f16 (float16_t * __a, float16x4x3_t val)
+{
+ __builtin_aarch64_simd_ci __o;
+ float16x8x3_t temp;
+ temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2);
+ __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst3_f32 (float32_t * __a, float32x2x3_t val)
{
__builtin_aarch64_simd_ci __o;
@@ -22862,6 +23104,16 @@ vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_f16 (float16_t * __a, float16x8x3_t val)
+{
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2);
+ __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f32 (float32_t * __a, float32x4x3_t val)
{
__builtin_aarch64_simd_ci __o;
@@ -23058,6 +23310,22 @@ vst4_u32 (uint32_t * __a, uint32x2x4_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_f16 (float16_t * __a, float16x4x4_t val)
+{
+ __builtin_aarch64_simd_xi __o;
+ float16x8x4_t temp;
+ temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ temp.val[3] = vcombine_f16 (val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[3], 3);
+ __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst4_f32 (float32_t * __a, float32x2x4_t val)
{
__builtin_aarch64_simd_xi __o;
@@ -23184,6 +23452,17 @@ vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_f16 (float16_t * __a, float16x8x4_t val)
+{
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[3], 3);
+ __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f32 (float32_t * __a, float32x4x4_t val)
{
__builtin_aarch64_simd_xi __o;
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index a7aaa52..96920cf 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -113,7 +113,7 @@
;; All vector modes and DI and DF.
(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI
- V2DI V2SF V4SF V2DF DI DF])
+ V2DI V4HF V8HF V2SF V4SF V2DF DI DF])
;; Vector modes for Integer reduction across lanes.
(define_mode_iterator VDQV [V8QI V16QI V4HI V8HI V4SI V2DI])
@@ -134,7 +134,7 @@
(define_mode_iterator VQW [V16QI V8HI V4SI])
;; Double vector modes for combines.
-(define_mode_iterator VDC [V8QI V4HI V2SI V2SF DI DF])
+(define_mode_iterator VDC [V8QI V4HI V4HF V2SI V2SF DI DF])
;; Vector modes except double int.
(define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
@@ -364,7 +364,8 @@
(V2SI "2s") (V4SI "4s")
(DI "1d") (DF "1d")
(V2DI "2d") (V2SF "2s")
- (V4SF "4s") (V2DF "2d")])
+ (V4SF "4s") (V2DF "2d")
+ (V4HF "4h") (V8HF "8h")])
(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32")
(V4SI "32") (V2DI "64")])
@@ -390,7 +391,8 @@
(define_mode_attr Vetype [(V8QI "b") (V16QI "b")
(V4HI "h") (V8HI "h")
(V2SI "s") (V4SI "s")
- (V2DI "d") (V2SF "s")
+ (V2DI "d") (V4HF "h")
+ (V8HF "h") (V2SF "s")
(V4SF "s") (V2DF "d")
(SF "s") (DF "d")
(QI "b") (HI "h")
@@ -400,7 +402,8 @@
(define_mode_attr Vbtype [(V8QI "8b") (V16QI "16b")
(V4HI "8b") (V8HI "16b")
(V2SI "8b") (V4SI "16b")
- (V2DI "16b") (V2SF "8b")
+ (V2DI "16b") (V4HF "8b")
+ (V8HF "16b") (V2SF "8b")
(V4SF "16b") (V2DF "16b")
(DI "8b") (DF "8b")
(SI "8b")])
@@ -451,6 +454,7 @@
;; Double modes of vector modes (lower case).
(define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi")
+ (V4HF "v8hf")
(V2SI "v4si") (V2SF "v4sf")
(SI "v2si") (DI "v2di")
(DF "v2df")])
@@ -525,6 +529,7 @@
(V4HI "V4HI") (V8HI "V8HI")
(V2SI "V2SI") (V4SI "V4SI")
(DI "DI") (V2DI "V2DI")
+ (V4HF "V4HI") (V8HF "V8HI")
(V2SF "V2SI") (V4SF "V4SI")
(V2DF "V2DI") (DF "DI")
(SF "SI")])
@@ -534,6 +539,7 @@
(V4HI "v4hi") (V8HI "v8hi")
(V2SI "v2si") (V4SI "v4si")
(DI "di") (V2DI "v2di")
+ (V4HF "v4hi") (V8HF "v8hi")
(V2SF "v2si") (V4SF "v4si")
(V2DF "v2di") (DF "di")
(SF "si")])
diff --git a/gcc/testsuite/gcc.target/aarch64/vldN_1.c b/gcc/testsuite/gcc.target/aarch64/vldN_1.c
index b64de16..caac94f 100644
--- a/gcc/testsuite/gcc.target/aarch64/vldN_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vldN_1.c
@@ -39,6 +39,7 @@ VARIANT (int32, 2, STRUCT, _s32) \
VARIANT (int64, 1, STRUCT, _s64) \
VARIANT (poly8, 8, STRUCT, _p8) \
VARIANT (poly16, 4, STRUCT, _p16) \
+VARIANT (float16, 4, STRUCT, _f16) \
VARIANT (float32, 2, STRUCT, _f32) \
VARIANT (float64, 1, STRUCT, _f64) \
VARIANT (uint8, 16, STRUCT, q_u8) \
@@ -51,6 +52,7 @@ VARIANT (int32, 4, STRUCT, q_s32) \
VARIANT (int64, 2, STRUCT, q_s64) \
VARIANT (poly8, 16, STRUCT, q_p8) \
VARIANT (poly16, 8, STRUCT, q_p16) \
+VARIANT (float16, 8, STRUCT, q_f16) \
VARIANT (float32, 4, STRUCT, q_f32) \
VARIANT (float64, 2, STRUCT, q_f64)
diff --git a/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c b/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c
index 9af0565..68c3fc3 100644
--- a/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c
@@ -16,6 +16,7 @@ VARIANT (int32, , 2, _s32, STRUCT) \
VARIANT (int64, , 1, _s64, STRUCT) \
VARIANT (poly8, , 8, _p8, STRUCT) \
VARIANT (poly16, , 4, _p16, STRUCT) \
+VARIANT (float16, , 4, _f16, STRUCT) \
VARIANT (float32, , 2, _f32, STRUCT) \
VARIANT (float64, , 1, _f64, STRUCT) \
VARIANT (uint8, q, 16, _u8, STRUCT) \
@@ -28,6 +29,7 @@ VARIANT (int32, q, 4, _s32, STRUCT) \
VARIANT (int64, q, 2, _s64, STRUCT) \
VARIANT (poly8, q, 16, _p8, STRUCT) \
VARIANT (poly16, q, 8, _p16, STRUCT) \
+VARIANT (float16, q, 8, _f16, STRUCT) \
VARIANT (float32, q, 4, _f32, STRUCT) \
VARIANT (float64, q, 2, _f64, STRUCT)
@@ -74,6 +76,7 @@ main (int argc, char **argv)
int64_t *int64_data = (int64_t *)uint64_data;
poly8_t poly8_data[4] = { 0, 7, 13, 18, };
poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
+ float16_t float16_data[4] = { 1.0625, 3.125, 0.03125, 7.75 };
float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };
diff --git a/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
index 13ab454..6837a11 100644
--- a/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
@@ -16,6 +16,7 @@ VARIANT (int32, , 2, _s32, 0, STRUCT) \
VARIANT (int64, , 1, _s64, 0, STRUCT) \
VARIANT (poly8, , 8, _p8, 7, STRUCT) \
VARIANT (poly16, , 4, _p16, 1, STRUCT) \
+VARIANT (float16, , 4, _f16, 3, STRUCT) \
VARIANT (float32, , 2, _f32, 1, STRUCT) \
VARIANT (float64, , 1, _f64, 0, STRUCT) \
VARIANT (uint8, q, 16, _u8, 14, STRUCT) \
@@ -28,6 +29,7 @@ VARIANT (int32, q, 4, _s32, 2, STRUCT) \
VARIANT (int64, q, 2, _s64, 1, STRUCT) \
VARIANT (poly8, q, 16, _p8, 12, STRUCT) \
VARIANT (poly16, q, 8, _p16, 5, STRUCT) \
+VARIANT (float16, q, 8, _f16, 7, STRUCT)\
VARIANT (float32, q, 4, _f32, 1, STRUCT)\
VARIANT (float64, q, 2, _f64, 0, STRUCT)
@@ -71,7 +73,7 @@ main (int argc, char **argv)
{
/* Original data for all vector formats. */
uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,
- 0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL,
+ 0x012389ab4567cdefULL, 0xdeeddadacafe0431ULL,
0x1032547698badcfeULL, 0xbadbadbadbad0badULL,
0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};
@@ -87,6 +89,7 @@ main (int argc, char **argv)
int64_t *int64_data = (int64_t *)uint64_data;
poly8_t poly8_data[4] = { 0, 7, 13, 18, };
poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
+ float16_t float16_data[4] = { 0.8125, 7.5, 19, 0.046875 };
float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH 14/16][ARM/AArch64 testsuite] Update advsimd-intrinsics tests to add float16 vectors
2015-07-07 12:32 [PATCH 0/16][ARM/AArch64] Float16_t support, v2 Alan Lawrence
` (13 preceding siblings ...)
2015-07-07 12:37 ` [PATCH 12/16][AArch64] vreinterpret(q?), vget_(low|high), vld1(q?)_dup Alan Lawrence
@ 2015-07-07 12:38 ` Alan Lawrence
2015-07-07 12:39 ` [PATCH 16/16][ARM/AArch64 Testsuite] Add test of vcvt{,_high}_{f16_f32,f32_f16} Alan Lawrence
15 siblings, 0 replies; 35+ messages in thread
From: Alan Lawrence @ 2015-07-07 12:38 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 2032 bytes --]
This is a respin of https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01347.html,
removing many default values of 0x333, to complete that I introduced new macros
CHECK_RESULTS{,_NAMED}_NO_FP16 as writing the same list of vector types in four
places seemed too many.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (hfloat16_t,
vdup_n_f16, CHECK_RESULTS_NO_FP16, CHECK_RESULTS_NAMED_NO_FP16): New.
(result, expected, clean_results): Add float16x4 and float16x8 cases.
(CHECK_RESULTS_NAMED): Likewise, using CHECK_RESULTS_NAMED_NO_FP16.
(CHECK_RESULTS): Redefine using CHECK_RESULTS_NAMED
DECL_VARIABLE_64BITS_VARIANTS: Add float16x4 case.
DECL_VARIABLE_128BITS_VARIANTS: Add float16x8 case.
* gcc.target/aarch64/advsimd-intrinsics/compute-data-ref.h (buffer,
buffer_pad, buffer_dup, buffer_dup_pad): Add float16x4 and float16x8.
* gcc.target/aarch64/advsimd-intrinsics/vbsl.c (exec_vbsl): Change
CHECK_RESULTS to CHECK_RESULTS_NO_FP16.
* gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c (exec_vdup_lane):
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vext.c (exec_vext): Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c (exec_vdup_vmov):
Change CHECK_RESULTS_NAMED to CHECK_RESULTS_NAMED_NO_FP16.
* gcc.target/aarch64/advsimd-intrinsics/vcombine.c: Add expected
results for float16x4 and float16x8.
(exec_vcombine): add test of float16x4 -> float16x8 case.
* gcc.target/aarch64/advsimd-intrinsics/vcreate.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vget_high.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vget_low.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vld1.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vldX.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c: Likewise.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 14_advsimd_intrinsics.patch --]
[-- Type: text/x-patch; name=14_advsimd_intrinsics.patch, Size: 62781 bytes --]
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
index 4e728d5..cf9c358 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -7,6 +7,7 @@
#include <inttypes.h>
/* helper type, to help write floating point results in integer form. */
+typedef uint16_t hfloat16_t;
typedef uint32_t hfloat32_t;
typedef uint64_t hfloat64_t;
@@ -132,6 +133,7 @@ static ARRAY(result, uint, 32, 2);
static ARRAY(result, uint, 64, 1);
static ARRAY(result, poly, 8, 8);
static ARRAY(result, poly, 16, 4);
+static ARRAY(result, float, 16, 4);
static ARRAY(result, float, 32, 2);
static ARRAY(result, int, 8, 16);
static ARRAY(result, int, 16, 8);
@@ -143,6 +145,7 @@ static ARRAY(result, uint, 32, 4);
static ARRAY(result, uint, 64, 2);
static ARRAY(result, poly, 8, 16);
static ARRAY(result, poly, 16, 8);
+static ARRAY(result, float, 16, 8);
static ARRAY(result, float, 32, 4);
#ifdef __aarch64__
static ARRAY(result, float, 64, 2);
@@ -160,6 +163,7 @@ extern ARRAY(expected, uint, 32, 2);
extern ARRAY(expected, uint, 64, 1);
extern ARRAY(expected, poly, 8, 8);
extern ARRAY(expected, poly, 16, 4);
+extern ARRAY(expected, hfloat, 16, 4);
extern ARRAY(expected, hfloat, 32, 2);
extern ARRAY(expected, int, 8, 16);
extern ARRAY(expected, int, 16, 8);
@@ -171,38 +175,11 @@ extern ARRAY(expected, uint, 32, 4);
extern ARRAY(expected, uint, 64, 2);
extern ARRAY(expected, poly, 8, 16);
extern ARRAY(expected, poly, 16, 8);
+extern ARRAY(expected, hfloat, 16, 8);
extern ARRAY(expected, hfloat, 32, 4);
extern ARRAY(expected, hfloat, 64, 2);
-/* Check results. Operates on all possible vector types. */
-#define CHECK_RESULTS(test_name,comment) \
- { \
- CHECK(test_name, int, 8, 8, PRIx8, expected, comment); \
- CHECK(test_name, int, 16, 4, PRIx16, expected, comment); \
- CHECK(test_name, int, 32, 2, PRIx32, expected, comment); \
- CHECK(test_name, int, 64, 1, PRIx64, expected, comment); \
- CHECK(test_name, uint, 8, 8, PRIx8, expected, comment); \
- CHECK(test_name, uint, 16, 4, PRIx16, expected, comment); \
- CHECK(test_name, uint, 32, 2, PRIx32, expected, comment); \
- CHECK(test_name, uint, 64, 1, PRIx64, expected, comment); \
- CHECK(test_name, poly, 8, 8, PRIx8, expected, comment); \
- CHECK(test_name, poly, 16, 4, PRIx16, expected, comment); \
- CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment); \
- \
- CHECK(test_name, int, 8, 16, PRIx8, expected, comment); \
- CHECK(test_name, int, 16, 8, PRIx16, expected, comment); \
- CHECK(test_name, int, 32, 4, PRIx32, expected, comment); \
- CHECK(test_name, int, 64, 2, PRIx64, expected, comment); \
- CHECK(test_name, uint, 8, 16, PRIx8, expected, comment); \
- CHECK(test_name, uint, 16, 8, PRIx16, expected, comment); \
- CHECK(test_name, uint, 32, 4, PRIx32, expected, comment); \
- CHECK(test_name, uint, 64, 2, PRIx64, expected, comment); \
- CHECK(test_name, poly, 8, 16, PRIx8, expected, comment); \
- CHECK(test_name, poly, 16, 8, PRIx16, expected, comment); \
- CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment); \
- } \
-
-#define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment) \
+#define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment) \
{ \
CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
@@ -229,6 +206,19 @@ extern ARRAY(expected, hfloat, 64, 2);
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
} \
+/* Check results against EXPECTED. Operates on all possible vector types. */
+#define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment) \
+ { \
+ CHECK_RESULTS_NAMED_NO_FP16(test_name, EXPECTED, comment) \
+ CHECK_FP(test_name, float, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_FP(test_name, float, 16, 8, PRIx16, EXPECTED, comment); \
+ } \
+
+#define CHECK_RESULTS_NO_FP16(test_name,comment) \
+ CHECK_RESULTS_NAMED_NO_FP16(test_name, expected, comment)
+
+#define CHECK_RESULTS(test_name,comment) \
+ CHECK_RESULTS_NAMED(test_name, expected, comment)
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
@@ -380,6 +370,7 @@ static void clean_results (void)
CLEAN(result, uint, 64, 1);
CLEAN(result, poly, 8, 8);
CLEAN(result, poly, 16, 4);
+ CLEAN(result, float, 16, 4);
CLEAN(result, float, 32, 2);
CLEAN(result, int, 8, 16);
@@ -392,6 +383,7 @@ static void clean_results (void)
CLEAN(result, uint, 64, 2);
CLEAN(result, poly, 8, 16);
CLEAN(result, poly, 16, 8);
+ CLEAN(result, float, 16, 8);
CLEAN(result, float, 32, 4);
#if defined(__aarch64__)
@@ -448,6 +440,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE(VAR, float, 16, 4); \
DECL_VARIABLE(VAR, float, 32, 2)
/* Declare all 128 bits variants. */
@@ -456,6 +449,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE(VAR, float, 16, 8); \
DECL_VARIABLE(VAR, float, 32, 4)
/* Declare all variants. */
@@ -476,6 +470,13 @@ static void clean_results (void)
/* Helpers to initialize vectors. */
#define VDUP(VAR, Q, T1, T2, W, N, V) \
VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
+/* Work around that there is no vdup_n_f16 intrinsic. */
+#define vdup_n_f16(VAL) \
+ __extension__ \
+ ({ \
+ float16_t f = VAL; \
+ vld1_dup_f16(&f); \
+ })
#define VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \
VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
index 26203cc..6365579 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
@@ -118,6 +118,8 @@ VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
PAD(buffer_pad, uint, 32, 2);
VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
PAD(buffer_pad, uint, 64, 1);
+VECT_VAR_DECL_INIT(buffer, float, 16, 4);
+PAD(buffer_pad, float, 16, 4);
VECT_VAR_DECL_INIT(buffer, float, 32, 2);
PAD(buffer_pad, float, 32, 2);
VECT_VAR_DECL_INIT(buffer, int, 8, 16);
@@ -140,6 +142,8 @@ VECT_VAR_DECL_INIT(buffer, poly, 8, 16);
PAD(buffer_pad, poly, 8, 16);
VECT_VAR_DECL_INIT(buffer, poly, 16, 8);
PAD(buffer_pad, poly, 16, 8);
+VECT_VAR_DECL_INIT(buffer, float, 16, 8);
+PAD(buffer_pad, float, 16, 8);
VECT_VAR_DECL_INIT(buffer, float, 32, 4);
PAD(buffer_pad, float, 32, 4);
#ifdef __aarch64__
@@ -170,6 +174,8 @@ VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 8);
VECT_VAR_DECL(buffer_dup_pad, poly, 8, 8);
VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 4);
VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, float, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, float, 16, 4);
VECT_VAR_DECL_INIT4(buffer_dup, float, 32, 2);
VECT_VAR_DECL(buffer_dup_pad, float, 32, 2);
@@ -193,5 +199,7 @@ VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 16);
VECT_VAR_DECL(buffer_dup_pad, poly, 8, 16);
VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 8);
VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8);
+VECT_VAR_DECL_INIT(buffer_dup, float, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, float, 16, 8);
VECT_VAR_DECL_INIT(buffer_dup, float, 32, 4);
VECT_VAR_DECL(buffer_dup_pad, float, 32, 4);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
index bb17f0a..c4fdbb4 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
@@ -114,7 +114,7 @@ void exec_vbsl (void)
TEST_VBSL(uint, , float, f, 32, 2);
TEST_VBSL(uint, q, float, f, 32, 4);
- CHECK_RESULTS (TEST_MSG, "");
+ CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
}
int main (void)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
index 295768a..aee406d 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
@@ -27,6 +27,8 @@ VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0x66, 0x66, 0x66, 0x66 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0x40533333, 0x40533333 };
+VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0x4080, 0x4080, 0x4080, 0x4080 };
#define TEST_MSG "VCOMBINE"
void exec_vcombine (void)
@@ -44,6 +46,7 @@ void exec_vcombine (void)
/* Initialize input "vector64_a" from "buffer". */
TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector64_a, buffer);
+ VLOAD(vector64_a, buffer, , float, f, 16, 4);
VLOAD(vector64_a, buffer, , float, f, 32, 2);
/* Choose init value arbitrarily. */
@@ -57,6 +60,7 @@ void exec_vcombine (void)
VDUP(vector64_b, , uint, u, 64, 1, 0x88);
VDUP(vector64_b, , poly, p, 8, 8, 0x55);
VDUP(vector64_b, , poly, p, 16, 4, 0x66);
+ VDUP(vector64_b, , float, f, 16, 4, 2.25);
VDUP(vector64_b, , float, f, 32, 2, 3.3f);
clean_results ();
@@ -72,6 +76,7 @@ void exec_vcombine (void)
TEST_VCOMBINE(uint, u, 64, 1, 2);
TEST_VCOMBINE(poly, p, 8, 8, 16);
TEST_VCOMBINE(poly, p, 16, 4, 8);
+ TEST_VCOMBINE(float, f, 16, 4, 8);
TEST_VCOMBINE(float, f, 32, 2, 4);
CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
index b2289d3..9aafbc3 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x123456789abcdef0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
0x78, 0x56, 0x34, 0x12 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x9abcdef0, 0x12345678 };
#define INSN_NAME vcreate
@@ -38,6 +39,7 @@ FNNAME (INSN_NAME)
DECL_VAL(val, int, 16, 4);
DECL_VAL(val, int, 32, 2);
DECL_VAL(val, int, 64, 1);
+ DECL_VAL(val, float, 16, 4);
DECL_VAL(val, float, 32, 2);
DECL_VAL(val, uint, 8, 8);
DECL_VAL(val, uint, 16, 4);
@@ -50,6 +52,7 @@ FNNAME (INSN_NAME)
DECL_VARIABLE(vector_res, int, 16, 4);
DECL_VARIABLE(vector_res, int, 32, 2);
DECL_VARIABLE(vector_res, int, 64, 1);
+ DECL_VARIABLE(vector_res, float, 16, 4);
DECL_VARIABLE(vector_res, float, 32, 2);
DECL_VARIABLE(vector_res, uint, 8, 8);
DECL_VARIABLE(vector_res, uint, 16, 4);
@@ -65,6 +68,7 @@ FNNAME (INSN_NAME)
VECT_VAR(val, int, 16, 4) = 0x123456789abcdef0LL;
VECT_VAR(val, int, 32, 2) = 0x123456789abcdef0LL;
VECT_VAR(val, int, 64, 1) = 0x123456789abcdef0LL;
+ VECT_VAR(val, float, 16, 4) = 0x123456789abcdef0LL;
VECT_VAR(val, float, 32, 2) = 0x123456789abcdef0LL;
VECT_VAR(val, uint, 8, 8) = 0x123456789abcdef0ULL;
VECT_VAR(val, uint, 16, 4) = 0x123456789abcdef0ULL;
@@ -76,6 +80,7 @@ FNNAME (INSN_NAME)
TEST_VCREATE(int, s, 8, 8);
TEST_VCREATE(int, s, 16, 4);
TEST_VCREATE(int, s, 32, 2);
+ TEST_VCREATE(float, f, 16, 4);
TEST_VCREATE(float, f, 32, 2);
TEST_VCREATE(int, s, 64, 1);
TEST_VCREATE(uint, u, 8, 8);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
index b5132f4..22d45d5 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
@@ -187,13 +187,13 @@ void exec_vdup_vmov (void)
switch (i) {
case 0:
- CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
break;
case 1:
- CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected1, "");
break;
case 2:
- CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected2, "");
break;
default:
abort();
@@ -232,13 +232,13 @@ void exec_vdup_vmov (void)
switch (i) {
case 0:
- CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
break;
case 1:
- CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected1, "");
break;
case 2:
- CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected2, "");
break;
default:
abort();
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
index c1ff6dd..ef708dc 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
@@ -90,7 +90,7 @@ void exec_vdup_lane (void)
TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1);
TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
- CHECK_RESULTS (TEST_MSG, "");
+ CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
}
int main (void)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
index 0b014eb..98f88a6 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
@@ -113,7 +113,7 @@ void exec_vext (void)
TEST_VEXT(q, poly, p, 16, 8, 6);
TEST_VEXT(q, float, f, 32, 4, 3);
- CHECK_RESULTS (TEST_MSG, "");
+ CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
}
int main (void)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
index d758112..eeef870 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
#define TEST_MSG "VGET_HIGH"
@@ -31,6 +32,7 @@ void exec_vget_high (void)
DECL_VARIABLE_128BITS_VARIANTS(vector128);
TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer);
+ VLOAD(vector128, buffer, q, float, f, 16, 8);
VLOAD(vector128, buffer, q, float, f, 32, 4);
clean_results ();
@@ -46,6 +48,7 @@ void exec_vget_high (void)
TEST_VGET_HIGH(uint, u, 64, 1, 2);
TEST_VGET_HIGH(poly, p, 8, 8, 16);
TEST_VGET_HIGH(poly, p, 16, 4, 8);
+ TEST_VGET_HIGH(float, f, 16, 4, 8);
TEST_VGET_HIGH(float, f, 32, 2, 4);
CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
index 12ecfc2..0a81c5b 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
#define TEST_MSG "VGET_LOW"
@@ -31,6 +32,7 @@ void exec_vget_low (void)
DECL_VARIABLE_128BITS_VARIANTS(vector128);
TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer);
+ VLOAD(vector128, buffer, q, float, f, 16, 8);
VLOAD(vector128, buffer, q, float, f, 32, 4);
clean_results ();
@@ -46,6 +48,7 @@ void exec_vget_low (void)
TEST_VGET_LOW(uint, u, 64, 1, 2);
TEST_VGET_LOW(poly, p, 8, 8, 16);
TEST_VGET_LOW(poly, p, 16, 4, 8);
+ TEST_VGET_LOW(float, f, 16, 4, 8);
TEST_VGET_LOW(float, f, 32, 2, 4);
CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
index ced9d73..68641b0 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
@@ -44,6 +45,8 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
@@ -62,7 +65,9 @@ void exec_vld1 (void)
TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer);
+ TEST_VLD1(vector, buffer, , float, f, 16, 4);
TEST_VLD1(vector, buffer, , float, f, 32, 2);
+ TEST_VLD1(vector, buffer, q, float, f, 16, 8);
TEST_VLD1(vector, buffer, q, float, f, 32, 4);
CHECK_RESULTS (TEST_MSG, "");
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
index 0e05274..bf878cc 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
@@ -17,6 +17,7 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,hfloat,16,4) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00 };
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0,
@@ -44,6 +45,8 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,hfloat,16,8) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00,
+ 0xcc00, 0xcc00, 0xcc00, 0xcc00 };
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
0xc1800000, 0xc1800000 };
@@ -61,6 +64,7 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,hfloat,16,4) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80 };
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1,
@@ -88,6 +92,8 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,hfloat,16,8) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80,
+ 0xcb80, 0xcb80, 0xcb80, 0xcb80 };
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
0xc1700000, 0xc1700000 };
@@ -105,6 +111,7 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,hfloat,16,4) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00 };
VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2,
@@ -132,6 +139,8 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,hfloat,16,8) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00,
+ 0xcb00, 0xcb00, 0xcb00, 0xcb00 };
VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
0xc1600000, 0xc1600000 };
@@ -154,7 +163,9 @@ void exec_vld1_dup (void)
TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1_DUP, vector, buffer_dup);
+ TEST_VLD1_DUP(vector, buffer_dup, , float, f, 16, 4);
TEST_VLD1_DUP(vector, buffer_dup, , float, f, 32, 2);
+ TEST_VLD1_DUP(vector, buffer_dup, q, float, f, 16, 8);
TEST_VLD1_DUP(vector, buffer_dup, q, float, f, 32, 4);
switch (i) {
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c
index d5c5d22..3b521f7 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xf0 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xaaaa };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xc1800000 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa,
@@ -43,6 +44,8 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xf0, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xfff0, 0xaaaa };
+VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xcc00, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xc1800000, 0xaaaaaaaa };
@@ -72,6 +75,7 @@ void exec_vld1_lane (void)
ARRAY(buffer_src, uint, 64, 1);
ARRAY(buffer_src, poly, 8, 8);
ARRAY(buffer_src, poly, 16, 4);
+ ARRAY(buffer_src, float, 16, 4);
ARRAY(buffer_src, float, 32, 2);
ARRAY(buffer_src, int, 8, 16);
@@ -84,6 +88,7 @@ void exec_vld1_lane (void)
ARRAY(buffer_src, uint, 64, 2);
ARRAY(buffer_src, poly, 8, 16);
ARRAY(buffer_src, poly, 16, 8);
+ ARRAY(buffer_src, float, 16, 8);
ARRAY(buffer_src, float, 32, 4);
clean_results ();
@@ -99,6 +104,7 @@ void exec_vld1_lane (void)
TEST_VLD1_LANE(, uint, u, 64, 1, 0);
TEST_VLD1_LANE(, poly, p, 8, 8, 7);
TEST_VLD1_LANE(, poly, p, 16, 4, 3);
+ TEST_VLD1_LANE(, float, f, 16, 4, 2);
TEST_VLD1_LANE(, float, f, 32, 2, 1);
TEST_VLD1_LANE(q, int, s, 8, 16, 15);
@@ -111,6 +117,7 @@ void exec_vld1_lane (void)
TEST_VLD1_LANE(q, uint, u, 64, 2, 0);
TEST_VLD1_LANE(q, poly, p, 8, 16, 12);
TEST_VLD1_LANE(q, poly, p, 16, 8, 6);
+ TEST_VLD1_LANE(q, float, f, 16, 8, 5);
TEST_VLD1_LANE(q, float, f, 32, 4, 2);
CHECK_RESULTS (TEST_MSG, "");
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
index f20aa03..937b51d 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
@@ -18,6 +18,7 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
@@ -41,6 +42,8 @@ VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
@@ -58,6 +61,7 @@ VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7,
@@ -81,6 +85,8 @@ VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
0xc, 0xd, 0xe, 0xf };
VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld2_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500,
+ 0xc400, 0xc200, 0xc000, 0xbc00 };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
0xc1200000, 0xc1100000 };
@@ -98,6 +104,7 @@ VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
@@ -121,6 +128,8 @@ VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
@@ -138,6 +147,7 @@ VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7,
@@ -161,6 +171,8 @@ VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
0xc, 0xd, 0xe, 0xf };
VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld3_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500,
+ 0xc400, 0xc200, 0xc000, 0xbc00 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
0xc1200000, 0xc1100000 };
@@ -181,6 +193,7 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff8, 0xfff9,
0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
0x14, 0x15, 0x16, 0x17,
@@ -204,6 +217,8 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
0x1c, 0x1d, 0x1e, 0x1f };
VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,hfloat,16,8) [] = { 0x0000, 0x3c00, 0x4000, 0x4200,
+ 0x4400, 0x4500, 0x4600, 0x4700 };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000,
0xc0c00000, 0xc0a00000 };
@@ -223,6 +238,7 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
@@ -246,6 +262,8 @@ VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
@@ -263,6 +281,7 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7,
@@ -286,6 +305,8 @@ VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
0xc, 0xd, 0xe, 0xf };
VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500,
+ 0xc400, 0xc200, 0xc000, 0xbc00 };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
0xc1200000, 0xc1100000 };
@@ -303,6 +324,7 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
0x14, 0x15, 0x16, 0x17,
@@ -326,6 +348,8 @@ VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
0x1c, 0x1d, 0x1e, 0x1f };
VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,hfloat,16,8) [] = { 0x0000, 0x3c00, 0x4000, 0x4200,
+ 0x4400, 0x4500, 0x4600, 0x4700 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000,
0xc0c00000, 0xc0a00000 };
@@ -343,6 +367,7 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
0xc, 0xd, 0xe, 0xf };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xc400, 0xc200, 0xc000, 0xbc00 };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 };
VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
0x24, 0x25, 0x26, 0x27,
@@ -366,6 +391,8 @@ VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
0x2c, 0x2d, 0x2e, 0x2f };
VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,hfloat,16,8) [] = { 0x4800, 0x4880, 0x4900, 0x4980,
+ 0x4a00, 0x4a80, 0x4b00, 0x4b80 };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xc0800000, 0xc0400000,
0xc0000000, 0xbf800000 };
@@ -409,6 +436,7 @@ void exec_vldX (void)
DECL_VLDX(uint, 64, 1, X); \
DECL_VLDX(poly, 8, 8, X); \
DECL_VLDX(poly, 16, 4, X); \
+ DECL_VLDX(float, 16, 4, X); \
DECL_VLDX(float, 32, 2, X); \
DECL_VLDX(int, 8, 16, X); \
DECL_VLDX(int, 16, 8, X); \
@@ -418,6 +446,7 @@ void exec_vldX (void)
DECL_VLDX(uint, 32, 4, X); \
DECL_VLDX(poly, 8, 16, X); \
DECL_VLDX(poly, 16, 8, X); \
+ DECL_VLDX(float, 16, 8, X); \
DECL_VLDX(float, 32, 4, X)
#define TEST_ALL_VLDX(X) \
@@ -431,6 +460,7 @@ void exec_vldX (void)
TEST_VLDX(, uint, u, 64, 1, X); \
TEST_VLDX(, poly, p, 8, 8, X); \
TEST_VLDX(, poly, p, 16, 4, X); \
+ TEST_VLDX(, float, f, 16, 4, X); \
TEST_VLDX(, float, f, 32, 2, X); \
TEST_VLDX(q, int, s, 8, 16, X); \
TEST_VLDX(q, int, s, 16, 8, X); \
@@ -440,6 +470,7 @@ void exec_vldX (void)
TEST_VLDX(q, uint, u, 32, 4, X); \
TEST_VLDX(q, poly, p, 8, 16, X); \
TEST_VLDX(q, poly, p, 16, 8, X); \
+ TEST_VLDX(q, float, f, 16, 8, X); \
TEST_VLDX(q, float, f, 32, 4, X)
#define TEST_ALL_EXTRA_CHUNKS(X, Y) \
@@ -453,6 +484,7 @@ void exec_vldX (void)
TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(int, 8, 16, X, Y); \
TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
@@ -462,6 +494,7 @@ void exec_vldX (void)
TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 16, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
+ TEST_EXTRA_CHUNK(float, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
/* vldX supports all vector types except [u]int64x2. */
@@ -516,6 +549,8 @@ void exec_vldX (void)
PAD(buffer_vld2_pad, poly, 8, 8);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
PAD(buffer_vld2_pad, poly, 16, 4);
+ VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
+ PAD(buffer_vld2_pad, float, 16, 4);
VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2);
PAD(buffer_vld2_pad, float, 32, 2);
@@ -539,6 +574,8 @@ void exec_vldX (void)
PAD(buffer_vld2_pad, poly, 8, 16);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
PAD(buffer_vld2_pad, poly, 16, 8);
+ VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
+ PAD(buffer_vld2_pad, float, 16, 8);
VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4);
PAD(buffer_vld2_pad, float, 32, 4);
@@ -563,6 +600,8 @@ void exec_vldX (void)
PAD(buffer_vld3_pad, poly, 8, 8);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
PAD(buffer_vld3_pad, poly, 16, 4);
+ VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
+ PAD(buffer_vld3_pad, float, 16, 4);
VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2);
PAD(buffer_vld3_pad, float, 32, 2);
@@ -586,6 +625,8 @@ void exec_vldX (void)
PAD(buffer_vld3_pad, poly, 8, 16);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
PAD(buffer_vld3_pad, poly, 16, 8);
+ VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
+ PAD(buffer_vld3_pad, float, 16, 8);
VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4);
PAD(buffer_vld3_pad, float, 32, 4);
@@ -610,6 +651,8 @@ void exec_vldX (void)
PAD(buffer_vld4_pad, poly, 8, 8);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
PAD(buffer_vld4_pad, poly, 16, 4);
+ VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
+ PAD(buffer_vld4_pad, float, 16, 4);
VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2);
PAD(buffer_vld4_pad, float, 32, 2);
@@ -633,6 +676,8 @@ void exec_vldX (void)
PAD(buffer_vld4_pad, poly, 8, 16);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
PAD(buffer_vld4_pad, poly, 16, 8);
+ VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
+ PAD(buffer_vld4_pad, float, 16, 8);
VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4);
PAD(buffer_vld4_pad, float, 32, 4);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
index c66dade..1496717 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
@@ -18,6 +18,7 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
0xf0, 0xf1, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = {0xcc00, 0xcb80, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
/* vld2_dup/chunk 1. */
@@ -35,6 +36,7 @@ VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
0xf0, 0xf1, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff0, 0xfff1,
0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
/* vld3_dup/chunk 0. */
@@ -54,6 +56,7 @@ VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0,
0xf1, 0xf2, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1,
0xfff2, 0xfff0 };
+VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xcc00 };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
/* vld3_dup/chunk 1. */
@@ -73,6 +76,7 @@ VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2,
0xf0, 0xf1, 0xf2, 0xf0 };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff1, 0xfff2,
0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xcb80, 0xcb00, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1800000 };
/* vld3_dup/chunk 2. */
@@ -92,6 +96,7 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1,
0xf2, 0xf0, 0xf1, 0xf2 };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff2, 0xfff0,
0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xcc00, 0xcb80, 0xcb00 };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1700000, 0xc1600000 };
/* vld4_dup/chunk 0. */
@@ -109,6 +114,7 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
/* vld4_dup/chunk 1. */
@@ -125,6 +131,7 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
/* vld4_dup/chunk 2. */
@@ -141,6 +148,7 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
/* vld4_dup/chunk3. */
@@ -157,6 +165,7 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
void exec_vldX_dup (void)
@@ -199,6 +208,7 @@ void exec_vldX_dup (void)
DECL_VLDX_DUP(uint, 64, 1, X); \
DECL_VLDX_DUP(poly, 8, 8, X); \
DECL_VLDX_DUP(poly, 16, 4, X); \
+ DECL_VLDX_DUP(float, 16, 4, X); \
DECL_VLDX_DUP(float, 32, 2, X)
#define TEST_ALL_VLDX_DUP(X) \
@@ -212,6 +222,7 @@ void exec_vldX_dup (void)
TEST_VLDX_DUP(, uint, u, 64, 1, X); \
TEST_VLDX_DUP(, poly, p, 8, 8, X); \
TEST_VLDX_DUP(, poly, p, 16, 4, X); \
+ TEST_VLDX_DUP(, float, f, 16, 4, X); \
TEST_VLDX_DUP(, float, f, 32, 2, X)
#define TEST_ALL_EXTRA_CHUNKS(X, Y) \
@@ -225,6 +236,7 @@ void exec_vldX_dup (void)
TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y)
/* vldX_dup supports only 64-bit inputs. */
@@ -269,6 +281,8 @@ void exec_vldX_dup (void)
PAD(buffer_vld2_pad, poly, 8, 8);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
PAD(buffer_vld2_pad, poly, 16, 4);
+ VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
+ PAD(buffer_vld2_pad, float, 16, 4);
VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2);
PAD(buffer_vld2_pad, float, 32, 2);
@@ -292,6 +306,8 @@ void exec_vldX_dup (void)
PAD(buffer_vld2_pad, poly, 8, 16);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
PAD(buffer_vld2_pad, poly, 16, 8);
+ VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
+ PAD(buffer_vld2_pad, float, 16, 8);
VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4);
PAD(buffer_vld2_pad, float, 32, 4);
@@ -316,6 +332,8 @@ void exec_vldX_dup (void)
PAD(buffer_vld3_pad, poly, 8, 8);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
PAD(buffer_vld3_pad, poly, 16, 4);
+ VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
+ PAD(buffer_vld3_pad, float, 16, 4);
VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2);
PAD(buffer_vld3_pad, float, 32, 2);
@@ -339,6 +357,8 @@ void exec_vldX_dup (void)
PAD(buffer_vld3_pad, poly, 8, 16);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
PAD(buffer_vld3_pad, poly, 16, 8);
+ VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
+ PAD(buffer_vld3_pad, float, 16, 8);
VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4);
PAD(buffer_vld3_pad, float, 32, 4);
@@ -363,6 +383,8 @@ void exec_vldX_dup (void)
PAD(buffer_vld4_pad, poly, 8, 8);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
PAD(buffer_vld4_pad, poly, 16, 4);
+ VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
+ PAD(buffer_vld4_pad, float, 16, 4);
VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2);
PAD(buffer_vld4_pad, float, 32, 2);
@@ -386,6 +408,8 @@ void exec_vldX_dup (void)
PAD(buffer_vld4_pad, poly, 8, 16);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
PAD(buffer_vld4_pad, poly, 16, 8);
+ VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
+ PAD(buffer_vld4_pad, float, 16, 8);
VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4);
PAD(buffer_vld4_pad, float, 32, 4);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
index 2f2e62f..5e8eb44 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
@@ -18,6 +18,7 @@ VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -29,6 +30,8 @@ VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa } ;
VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -44,6 +47,7 @@ VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xfff0, 0xfff1, 0xaaaa, 0xaaaa };
@@ -55,6 +59,8 @@ VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_1,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xcc00, 0xcb80, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -70,6 +76,7 @@ VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -81,6 +88,8 @@ VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
0xfffffff2, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -96,6 +105,7 @@ VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 };
VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xf0, 0xf1, 0xf2, 0xaa };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -107,6 +117,8 @@ VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+VECT_VAR_DECL(expected_vld3_1,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xc1800000, 0xc1700000 };
@@ -122,6 +134,7 @@ VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 };
VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
0xfff2, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -133,6 +146,8 @@ VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_2,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xcb80,
+ 0xcb00, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1600000, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -148,6 +163,7 @@ VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -159,6 +175,8 @@ VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
0xfffffff2, 0xfffffff3 };
VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -174,6 +192,7 @@ VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -185,6 +204,8 @@ VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -200,6 +221,7 @@ VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -211,6 +233,8 @@ VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
@@ -226,6 +250,7 @@ VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
@@ -237,6 +262,8 @@ VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_3,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa };
@@ -252,6 +279,7 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 32, 2);
/* Input buffers for vld3_lane */
@@ -265,6 +293,7 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 32, 3);
/* Input buffers for vld4_lane */
@@ -278,6 +307,7 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 32, 4);
void exec_vldX_lane (void)
@@ -335,7 +365,9 @@ void exec_vldX_lane (void)
DECL_VLDX_LANE(uint, 16, 8, X); \
DECL_VLDX_LANE(uint, 32, 4, X); \
DECL_VLDX_LANE(poly, 16, 8, X); \
+ DECL_VLDX_LANE(float, 16, 4, X); \
DECL_VLDX_LANE(float, 32, 2, X); \
+ DECL_VLDX_LANE(float, 16, 8, X); \
DECL_VLDX_LANE(float, 32, 4, X)
/* Add some padding to try to catch out of bound accesses. */
@@ -360,7 +392,9 @@ void exec_vldX_lane (void)
TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5); \
TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0); \
TEST_VLDX_LANE(q, poly, p, 16, 8, X, 5); \
+ TEST_VLDX_LANE(, float, f, 16, 4, X, 2); \
TEST_VLDX_LANE(, float, f, 32, 2, X, 0); \
+ TEST_VLDX_LANE(q, float, f, 16, 8, X, 6); \
TEST_VLDX_LANE(q, float, f, 32, 4, X, 2)
#define TEST_ALL_EXTRA_CHUNKS(X, Y) \
@@ -377,7 +411,9 @@ void exec_vldX_lane (void)
TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
+ TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
+ TEST_EXTRA_CHUNK(float, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
/* vldX_lane supports only a subset of all variants. */
@@ -419,7 +455,9 @@ void exec_vldX_lane (void)
DUMMY_ARRAY(buffer_src, uint, 16, 8, 4);
DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
DUMMY_ARRAY(buffer_src, poly, 16, 8, 4);
+ DUMMY_ARRAY(buffer_src, float, 16, 4, 4);
DUMMY_ARRAY(buffer_src, float, 32, 2, 4);
+ DUMMY_ARRAY(buffer_src, float, 16, 8, 4);
DUMMY_ARRAY(buffer_src, float, 32, 4, 4);
/* Check vld2_lane/vld2q_lane. */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c
index 5159406..99cdf6d 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x88 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0x55, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0xfff3 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0x4840, 0xca80 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x4204cccd };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
@@ -41,6 +42,8 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xdd, 0xff };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xee, 0xfff7 };
+VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
+ 0xca00, 0x4480, 0xc900, 0xc880 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0x41333333 };
@@ -61,7 +64,9 @@ void exec_vset_lane (void)
/* Initialize input "vector" from "buffer". */
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+ VLOAD(vector, buffer, , float, f, 16, 4);
VLOAD(vector, buffer, , float, f, 32, 2);
+ VLOAD(vector, buffer, q, float, f, 16, 8);
VLOAD(vector, buffer, q, float, f, 32, 4);
/* Choose value and lane arbitrarily. */
@@ -75,6 +80,7 @@ void exec_vset_lane (void)
TEST_VSET_LANE(, uint, u, 64, 1, 0x88, 0);
TEST_VSET_LANE(, poly, p, 8, 8, 0x55, 6);
TEST_VSET_LANE(, poly, p, 16, 4, 0x66, 2);
+ TEST_VSET_LANE(, float, f, 16, 4, 8.5f, 2);
TEST_VSET_LANE(, float, f, 32, 2, 33.2f, 1);
TEST_VSET_LANE(q, int, s, 8, 16, 0x99, 15);
@@ -87,6 +93,7 @@ void exec_vset_lane (void)
TEST_VSET_LANE(q, uint, u, 64, 2, 0x11, 1);
TEST_VSET_LANE(q, poly, p, 8, 16, 0xDD, 14);
TEST_VSET_LANE(q, poly, p, 16, 8, 0xEE, 6);
+ TEST_VSET_LANE(q, float, f, 16, 8, 4.5f, 5);
TEST_VSET_LANE(q, float, f, 32, 4, 11.2f, 3);
CHECK_RESULTS(TEST_MSG, "");
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
index 08583b8..51d3fbe 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
@@ -16,6 +16,7 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcb80, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x33333333 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
@@ -42,6 +43,8 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfa, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xc900, 0x3333, 0x3333, 0x3333,
+ 0x3333, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0x33333333,
0x33333333, 0x33333333 };
@@ -69,6 +72,7 @@ void exec_vst1_lane (void)
TEST_VST1_LANE(, uint, u, 64, 1, 0);
TEST_VST1_LANE(, poly, p, 8, 8, 6);
TEST_VST1_LANE(, poly, p, 16, 4, 2);
+ TEST_VST1_LANE(, float, f, 16, 4, 1);
TEST_VST1_LANE(, float, f, 32, 2, 1);
TEST_VST1_LANE(q, int, s, 8, 16, 15);
@@ -81,6 +85,7 @@ void exec_vst1_lane (void)
TEST_VST1_LANE(q, uint, u, 64, 2, 0);
TEST_VST1_LANE(q, poly, p, 8, 16, 10);
TEST_VST1_LANE(q, poly, p, 16, 8, 4);
+ TEST_VST1_LANE(q, float, f, 16, 8, 6);
TEST_VST1_LANE(q, float, f, 32, 4, 1);
CHECK_RESULTS(TEST_MSG, "");
^ permalink raw reply [flat|nested] 35+ messages in thread