public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 1/2][ARM] PR/63870: Add qualifier to check lane bounds in expand
@ 2015-01-16 17:03 Alan Lawrence
  2015-01-16 17:33 ` [PATCH 2/2][ARM] PR/63870: Add a __builtin_lane_check Alan Lawrence
  2015-04-22 16:57 ` [PATCH 1/2][ARM] PR/63870: Add qualifier to check lane bounds in expand Alan Lawrence
  0 siblings, 2 replies; 5+ messages in thread
From: Alan Lawrence @ 2015-01-16 17:03 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 3633 bytes --]

This is based loosely upon svn r217440, "[AArch64] Add bounds checking to 
vqdm_lane intrinsics...", but applies to more intrinsics (including e.g. 
vget_lane), and does not do the endianness-flipping present on AArch64: the 
objective is to exactly preserve behaviour on all valid code. (Yes, the new 
qualifier may perhaps give us a location for flipping lanes according to 
endianness in the future, but I'm not doing that here.) Checks for lanes being 
in range for many insns are thus moved from assembly to expand time, with 
inlining history. For example, previous error message:

vqrdmulh_lane_s16_indices_1.c: In function 'test1':
vqrdmulh_lane_s16_indices_1.c:9:1: error: lane out of range
}
^

becomes:

In file included vqrdmulh_lane_s16_indices_1.c:3:0:
In function 'vqrdmulh_lane_s16',
inlined from 'test1' at 
gcc/testsuite/gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c:8:10:
.../install/lib/gcc/arm-none-eabi/5.0.0/include/arm_neon.h:6882:10: error: lane 
-1 out of range 0 - 3
return (int16x4_t)builtin_neon_vqrdmulh_lanev4hi (a, b, c);

Note the question of how to common up tests with those in 
gcc.target/aarch64/simd/*_indices_1.c is not resolved by this patch.

Cross-tested check-gcc on arm-none-eabi
Bootstrapped on arm-none-linux-gnueabihf cortex-a15

gcc/ChangeLog:

     * config/arm/arm-builtins.c (enum arm_type_qualifiers):
     Add qualifier_lane_index.
     (arm_binop_imm_qualifiers, BINOP_IMM_QUALIFIERS): New.
     (arm_getlane_qualifiers): Use qualifier_lane_index.
     (arm_lanemac_qualifiers): Rename to...
     (arm_mac_n_qualifiers): ...this.
     (LANEMAC_QUALIFIERS): Rename to...
     (MAC_N_QUALIFIERS): ...this.
     (arm_mac_lane_qualifiers, MAC_LANE_QUALIFIERS): New.
     (arm_setlane_qualifiers): Use qualifier_lane_index.
     (arm_ternop_imm_qualifiers, TERNOP_IMM_QUALIFIERS): New.
     (enum builtin_arg): Add NEON_ARG_LANE_INDEX.
     (arm_expand_neon_args): Handle NEON_ARG_LANE_INDEX.
     (arm_expand_neon_builtin): Handle qualifier_lane_index.

     * config/arm/arm-protos.h (neon_lane_bounds): Add const_tree parameter.
     * config/arm/arm.c (bounds_check): Likewise, improve error message.
     (neon_lane_bounds, neon_const_bounds): Add arguments to bounds_check.
     * config/arm/arm_neon_builtins.def (vshrs_n, vshru_n, vrshrs_n,
     vrshru_n, vshrn_n, vrshrn_n, vqshrns_n, vqshrnu_n, vqrshrns_n,
     vqrshrnu_n, vqshrun_n, vqrshrun_n, vshl_n, vqshl_s_n, vqshl_u_n,
     vqshlu_n, vshlls_n, vshllu_n): Change qualifiers to BINOP_IMM.
     (vsras_n, vsrau_n, vrsras_n, vrsrau_n, vsri_n, vsli_n): Change
     qualifiers to TERNOP_IMM.
     (vdup_lane): Change qualifiers to GETLANE.
     (vmla_lane, vmlals_lane, vmlalu_lane, vqdmlal_lane, vmls_lane,
     vmlsls_lane, vmlslu_lane, vqdmlsl_lane): Change qualifiers to MAC_LANE.
     (vmla_n, vmlals_n, vmlalu_n, vqdmlal_n, vmls_n, vmlsls_n, vmlslu_n,
     vqdmlsl_n): Change qualifiers to MAC_N.

     * config/arm/neon.md (neon_vget_lane<mode>, neon_vget_laneu<mode>,
     neon_vget_lanedi, neon_vget_lanev2di, neon_vset_lane<mode>,
     neon_vset_lanedi, neon_vdup_lane<mode>, neon_vdup_lanedi,
     neon_vdup_lanev2di, neon_vmul_lane<mode>, neon_vmul_lane<mode>,
     neon_vmull<sup>_lane<mode>, neon_vqdmull_lane<mode>,
     neon_vq<r>dmulh_lane<mode>, neon_vq<r>dmulh_lane<mode>,
     neon_vmla_lane<mode>, neon_vmla_lane<mode>, neon_vmlal<sup>_lane<mode>,
     neon_vqdmlal_lane<mode>, neon_vmls_lane<mode>, neon_vmls_lane<mode>,
     neon_vmlsl<sup>_lane<mode>, neon_vqdmlsl_lane<mode>):
     Remove call to neon_lane_bounds.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: qual_lane_index.patch --]
[-- Type: text/x-patch; name=qual_lane_index.patch, Size: 19329 bytes --]

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 2d2cafe56373fd9fb8cdba9c142c7ac9b188aed1..e7e16c21f619449d395efbfbe4efbda421ffb3a2 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -64,7 +64,9 @@ enum arm_type_qualifiers
   /* qualifier_const_pointer | qualifier_map_mode  */
   qualifier_const_pointer_map_mode = 0x86,
   /* Polynomial types.  */
-  qualifier_poly = 0x100
+  qualifier_poly = 0x100,
+  /* Lane indices - must be within range of previous argument = a vector.  */
+  qualifier_lane_index = 0x200
 };
 
 /*  The qualifier_internal allows generation of a unary builtin from
@@ -95,21 +97,40 @@ arm_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 
 /* T (T, immediate).  */
 static enum arm_type_qualifiers
-arm_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+arm_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_immediate };
+#define BINOP_IMM_QUALIFIERS (arm_binop_imm_qualifiers)
+
+/* T (T, lane index).  */
+static enum arm_type_qualifiers
+arm_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_lane_index };
 #define GETLANE_QUALIFIERS (arm_getlane_qualifiers)
 
 /* T (T, T, T, immediate).  */
 static enum arm_type_qualifiers
-arm_lanemac_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+arm_mac_n_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_none,
       qualifier_none, qualifier_immediate };
-#define LANEMAC_QUALIFIERS (arm_lanemac_qualifiers)
+#define MAC_N_QUALIFIERS (arm_mac_n_qualifiers)
+
+/* T (T, T, T, lane index).  */
+static enum arm_type_qualifiers
+arm_mac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_none, qualifier_lane_index };
+#define MAC_LANE_QUALIFIERS (arm_mac_lane_qualifiers)
 
 /* T (T, T, immediate).  */
 static enum arm_type_qualifiers
-arm_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+arm_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
+#define TERNOP_IMM_QUALIFIERS (arm_ternop_imm_qualifiers)
+
+/* T (T, T, lane index).  */
+static enum arm_type_qualifiers
+arm_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
 #define SETLANE_QUALIFIERS (arm_setlane_qualifiers)
 
 /* T (T, T).  */
@@ -1914,6 +1935,7 @@ arm_expand_unop_builtin (enum insn_code icode,
 typedef enum {
   NEON_ARG_COPY_TO_REG,
   NEON_ARG_CONSTANT,
+  NEON_ARG_LANE_INDEX,
   NEON_ARG_MEMORY,
   NEON_ARG_STOP
 } builtin_arg;
@@ -2030,6 +2052,16 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
 		op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
 	      break;
 
+	    case NEON_ARG_LANE_INDEX:
+	      /* Previous argument must be a vector, which this indexes.  */
+	      gcc_assert (argc > 0);
+	      if (CONST_INT_P (op[argc]))
+		{
+		  enum machine_mode vmode = mode[argc - 1];
+		  neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode), exp);
+		}
+	      /* Fall through - if the lane index isn't a constant then
+		 the next case will error.  */
 	    case NEON_ARG_CONSTANT:
 	      if (!(*insn_data[icode].operand[opno].predicate)
 		  (op[argc], mode[argc]))
@@ -2154,7 +2186,9 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
       int operands_k = k - is_void;
       int expr_args_k = k - 1;
 
-      if (d->qualifiers[qualifiers_k] & qualifier_immediate)
+      if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
+	args[k] = NEON_ARG_LANE_INDEX;
+      else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
 	args[k] = NEON_ARG_CONSTANT;
       else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
 	{
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index fc453480c42f333778a6abe6c406025e8af430a4..46fdd93f214c17d2aab19fce4357af8e3e2d493b 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -89,7 +89,7 @@ extern void neon_pairwise_reduce (rtx, rtx, machine_mode,
 extern rtx neon_make_constant (rtx);
 extern tree arm_builtin_vectorized_function (tree, tree, tree);
 extern void neon_expand_vector_init (rtx, rtx);
-extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
 extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
 extern HOST_WIDE_INT neon_element_bits (machine_mode);
 extern void neon_reinterpret (rtx, rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 8193bf1ce63c47de9e66a383f6a6e843d7ba22d6..5b9f091d0ccb5e8ec1194f3c79848709d6f08242 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -12778,12 +12778,12 @@ neon_expand_vector_init (rtx target, rtx vals)
 }
 
 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
-   ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
-   reported source locations are bogus.  */
+   ERR if it doesn't.  EXP indicates the source location, which includes the
+   inlining history for intrinsics.  */
 
 static void
 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
-	      const char *err)
+	      const_tree exp, const char *desc)
 {
   HOST_WIDE_INT lane;
 
@@ -12792,15 +12792,22 @@ bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
   lane = INTVAL (operand);
 
   if (lane < low || lane >= high)
-    error (err);
+    {
+      if (exp)
+	error ("%K%s %lld out of range %lld - %lld",
+	       exp, desc, lane, low, high - 1);
+      else
+	error ("%s %lld out of range %lld - %lld", desc, lane, low, high - 1);
+    }
 }
 
 /* Bounds-check lanes.  */
 
 void
-neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
+		  const_tree exp)
 {
-  bounds_check (operand, low, high, "lane out of range");
+  bounds_check (operand, low, high, exp, "lane");
 }
 
 /* Bounds-check constants.  */
@@ -12808,7 +12815,7 @@ neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
 void
 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
 {
-  bounds_check (operand, low, high, "constant out of range");
+  bounds_check (operand, low, high, NULL_TREE, "constant");
 }
 
 HOST_WIDE_INT
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index f55591da84505447a11f4c2731b2e14e4fe2ca6d..f150b98b8096e94c6b39bbe477e5052b15f0313f 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -67,28 +67,28 @@ VAR8 (BINOP, vqshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR8 (BINOP, vqshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR8 (BINOP, vqrshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR8 (BINOP, vqrshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vrshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vrshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR3 (GETLANE, vshrn_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vrshrn_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqshrns_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqshrnu_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqrshrns_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqrshrnu_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqshrun_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqrshrun_n, v8hi, v4si, v2di)
-VAR8 (GETLANE, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vqshl_s_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vqshl_u_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR3 (GETLANE, vshlls_n, v8qi, v4hi, v2si)
-VAR3 (GETLANE, vshllu_n, v8qi, v4hi, v2si)
-VAR8 (SETLANE, vsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (SETLANE, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (SETLANE, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (SETLANE, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vrshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vrshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vshrn_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vrshrn_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqshrns_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqshrnu_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqrshrns_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqrshrnu_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqshrun_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqrshrun_n, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vqshl_s_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vqshl_u_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vshlls_n, v8qi, v4hi, v2si)
+VAR3 (BINOP_IMM, vshllu_n, v8qi, v4hi, v2si)
+VAR8 (TERNOP_IMM, vsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR2 (BINOP, vsub, v2sf, v4sf)
 VAR3 (BINOP, vsubls, v8qi, v4hi, v2si)
 VAR3 (BINOP, vsublu, v8qi, v4hi, v2si)
@@ -140,8 +140,8 @@ VAR6 (BINOP, vpadals, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
 VAR6 (BINOP, vpadalu, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
 VAR2 (BINOP, vrecps, v2sf, v4sf)
 VAR2 (BINOP, vrsqrts, v2sf, v4sf)
-VAR8 (SETLANE, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (SETLANE, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
@@ -162,7 +162,7 @@ VAR10 (SETLANE, vset_lane,
 VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di)
 VAR10 (UNOP, vdup_n,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR10 (BINOP, vdup_lane,
+VAR10 (GETLANE, vdup_lane,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di)
 VAR5 (UNOP, vget_high, v16qi, v8hi, v4si, v4sf, v2di)
@@ -174,23 +174,23 @@ VAR3 (UNOP, vqmovun, v8hi, v4si, v2di)
 VAR3 (UNOP, vmovls, v8qi, v4hi, v2si)
 VAR3 (UNOP, vmovlu, v8qi, v4hi, v2si)
 VAR6 (SETLANE, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR2 (LANEMAC, vmlals_lane, v4hi, v2si)
-VAR2 (LANEMAC, vmlalu_lane, v4hi, v2si)
-VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si)
-VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR2 (LANEMAC, vmlsls_lane, v4hi, v2si)
-VAR2 (LANEMAC, vmlslu_lane, v4hi, v2si)
-VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si)
+VAR6 (MAC_LANE, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR2 (MAC_LANE, vmlals_lane, v4hi, v2si)
+VAR2 (MAC_LANE, vmlalu_lane, v4hi, v2si)
+VAR2 (MAC_LANE, vqdmlal_lane, v4hi, v2si)
+VAR6 (MAC_LANE, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR2 (MAC_LANE, vmlsls_lane, v4hi, v2si)
+VAR2 (MAC_LANE, vmlslu_lane, v4hi, v2si)
+VAR2 (MAC_LANE, vqdmlsl_lane, v4hi, v2si)
 VAR6 (BINOP, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR6 (LANEMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR2 (LANEMAC, vmlals_n, v4hi, v2si)
-VAR2 (LANEMAC, vmlalu_n, v4hi, v2si)
-VAR2 (LANEMAC, vqdmlal_n, v4hi, v2si)
-VAR6 (LANEMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR2 (LANEMAC, vmlsls_n, v4hi, v2si)
-VAR2 (LANEMAC, vmlslu_n, v4hi, v2si)
-VAR2 (LANEMAC, vqdmlsl_n, v4hi, v2si)
+VAR6 (MAC_N, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR2 (MAC_N, vmlals_n, v4hi, v2si)
+VAR2 (MAC_N, vmlalu_n, v4hi, v2si)
+VAR2 (MAC_N, vqdmlal_n, v4hi, v2si)
+VAR6 (MAC_N, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR2 (MAC_N, vmlsls_n, v4hi, v2si)
+VAR2 (MAC_N, vmlslu_n, v4hi, v2si)
+VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si)
 VAR10 (SETLANE, vext,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 63c327ec68cacc85446c2c4aa3bf99eee9ebe074..bf620c4173a771f927b74fed216c0bd0b700370d 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2722,8 +2722,6 @@
    (match_operand:SI 2 "immediate_operand" "")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
-
   if (BYTES_BIG_ENDIAN)
     {
       /* The intrinsics are defined in terms of a model where the
@@ -2753,8 +2751,6 @@
    (match_operand:SI 2 "immediate_operand" "")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
-
   if (BYTES_BIG_ENDIAN)
     {
       /* The intrinsics are defined in terms of a model where the
@@ -2784,7 +2780,6 @@
    (match_operand:SI 2 "immediate_operand" "")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[2], 0, 1);
   emit_move_insn (operands[0], operands[1]);
   DONE;
 })
@@ -2795,18 +2790,11 @@
    (match_operand:SI 2 "immediate_operand" "")]
   "TARGET_NEON"
 {
-  switch (INTVAL (operands[2]))
-    {
-    case 0:
-      emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
-      break;
-    case 1:
-      emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
-      break;
-    default:
-      neon_lane_bounds (operands[2], 0, 1);
-      FAIL;
-    }
+  int lane = INTVAL (operands[2]);
+  gcc_assert ((lane ==0) || (lane == 1));
+  emit_move_insn (operands[0], lane == 0
+				? gen_lowpart (DImode, operands[1])
+				: gen_highpart (DImode, operands[1]));
   DONE;
 })
 
@@ -2818,7 +2806,6 @@
   "TARGET_NEON"
 {
   unsigned int elt = INTVAL (operands[3]);
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
 
   if (BYTES_BIG_ENDIAN)
     {
@@ -2841,7 +2828,6 @@
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, 1);
   emit_move_insn (operands[0], operands[1]);
   DONE;
 })
@@ -2923,7 +2909,6 @@
    (match_operand:SI 2 "immediate_operand" "i")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
   if (BYTES_BIG_ENDIAN)
     {
       unsigned int elt = INTVAL (operands[2]);
@@ -2944,7 +2929,6 @@
    (match_operand:SI 2 "immediate_operand" "i")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[2], 0, 1);
   emit_move_insn (operands[0], operands[1]);
   DONE;
 })
@@ -2956,7 +2940,6 @@
    (match_operand:SI 2 "immediate_operand" "i")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[2], 0, 1);
   emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
   DONE;
 })
@@ -3156,7 +3139,6 @@
                     UNSPEC_VMUL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
 }
   [(set (attr "type")
@@ -3174,7 +3156,6 @@
                     UNSPEC_VMUL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
   return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
 }
   [(set (attr "type")
@@ -3192,7 +3173,6 @@
                           VMULL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
 }
   [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
@@ -3207,7 +3187,6 @@
                           UNSPEC_VQDMULL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
 }
   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
@@ -3222,7 +3201,6 @@
                       VQDMULH_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
 }
   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
@@ -3237,7 +3215,6 @@
                       VQDMULH_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
 }
   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
@@ -3253,7 +3230,6 @@
                      UNSPEC_VMLA_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
 }
   [(set (attr "type")
@@ -3272,7 +3248,6 @@
                      UNSPEC_VMLA_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
 }
   [(set (attr "type")
@@ -3291,7 +3266,6 @@
                           VMLAL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
 }
   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
@@ -3307,7 +3281,6 @@
                           UNSPEC_VQDMLAL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
 }
   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
@@ -3323,7 +3296,6 @@
                     UNSPEC_VMLS_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
 }
   [(set (attr "type")
@@ -3342,7 +3314,6 @@
                     UNSPEC_VMLS_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
 }
   [(set (attr "type")
@@ -3361,7 +3332,6 @@
                           VMLSL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
 }
   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
@@ -3377,7 +3347,6 @@
                           UNSPEC_VQDMLSL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
 }
   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/2][ARM] PR/63870: Add a __builtin_lane_check
  2015-01-16 17:03 [PATCH 1/2][ARM] PR/63870: Add qualifier to check lane bounds in expand Alan Lawrence
@ 2015-01-16 17:33 ` Alan Lawrence
  2015-04-22 16:58   ` Alan Lawrence
  2015-04-22 16:57 ` [PATCH 1/2][ARM] PR/63870: Add qualifier to check lane bounds in expand Alan Lawrence
  1 sibling, 1 reply; 5+ messages in thread
From: Alan Lawrence @ 2015-01-16 17:33 UTC (permalink / raw)
  To: gcc-patches; +Cc: Charles Baylis

[-- Attachment #1: Type: text/plain, Size: 978 bytes --]

This parallels the present form of __builtin_aarch64_im_lane_boundsi, and allows 
to check lane indices for intrinsics that can otherwise be written in terms of 
GCC vector extensions.

The new builtin is not used in this patch but is used in my series of float16_t 
intrinsics (https://gcc.gnu.org/ml/gcc-patches/2015-01/msg01434.html), and at 
some point in the future we should rewrite existing intrinsics (for other types) 
to this form too, but I'm leaving that for a later patch series :).

Cross-tested check-gcc on arm-none-eabi
Bootstrapped on arm-none-linux-gnueabihf cortex-a15

gcc/ChangeLog:

     * config/arm/arm-builtins.c (enum arm_builtins):
     Add ARM_BUILTIN_NEON_BASE and ARM_BUILTIN_NEON_LANE_CHECK.
     (ARM_BUILTIN_NEON_BASE): Rename macro to....
     (ARM_BUILTIN_NEON_PATTERN_START): ...this.
     (arm_init_neon_builtins): Register __builtin_arm_lane_check.
     (arm_expand_neon_builtin): Handle ARM_BUILTIN_NEON_LANE_CHECK.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: builtin_lane_check.patch --]
[-- Type: text/x-patch; name=builtin_lane_check.patch, Size: 2856 bytes --]

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 2ca7ac5ad3cf82941a5d3b6707a0a41f3157190b..baa83490fcd9bf68d9e9bdbd57cdf6f2d3d0e056 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -521,12 +521,16 @@ enum arm_builtins
 #undef CRYPTO2
 #undef CRYPTO3
 
+  ARM_BUILTIN_NEON_BASE,
+  ARM_BUILTIN_NEON_LANE_CHECK = ARM_BUILTIN_NEON_BASE,
+
 #include "arm_neon_builtins.def"
 
   ARM_BUILTIN_MAX
 };
 
-#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
+#define ARM_BUILTIN_NEON_PATTERN_START \
+    (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
 
 #undef CF
 #undef VAR1
@@ -885,7 +889,7 @@ arm_init_simd_builtin_scalar_types (void)
 static void
 arm_init_neon_builtins (void)
 {
-  unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
+  unsigned int i, fcode = ARM_BUILTIN_NEON_PATTERN_START;
 
   arm_init_simd_builtin_types ();
 
@@ -895,6 +899,15 @@ arm_init_neon_builtins (void)
      system.  */
   arm_init_simd_builtin_scalar_types ();
 
+  tree lane_check_fpr = build_function_type_list (void_type_node,
+						  intSI_type_node,
+						  intSI_type_node,
+						  NULL);
+  arm_builtin_decls[ARM_BUILTIN_NEON_LANE_CHECK] =
+      add_builtin_function ("__builtin_arm_lane_check", lane_check_fpr,
+			    ARM_BUILTIN_NEON_LANE_CHECK, BUILT_IN_MD,
+			    NULL, NULL_TREE);
+
   for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++, fcode++)
     {
       bool print_type_signature_p = false;
@@ -2155,14 +2168,28 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
   return target;
 }
 
-/* Expand a Neon builtin. These are "special" because they don't have symbolic
+/* Expand a Neon builtin, i.e. those registered only if TARGET_NEON holds.
+   Most of these are "special" because they don't have symbolic
    constants defined per-instruction or per instruction-variant. Instead, the
    required info is looked up in the table neon_builtin_data.  */
 static rtx
 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
 {
+  if (fcode == ARM_BUILTIN_NEON_LANE_CHECK)
+    {
+      tree nlanes = CALL_EXPR_ARG (exp, 0);
+      gcc_assert (TREE_CODE (nlanes) == INTEGER_CST);
+      rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 1));
+      if (CONST_INT_P (lane_idx))
+	neon_lane_bounds (lane_idx, 0, TREE_INT_CST_LOW (nlanes), exp);
+      else
+	error ("%Klane index must be a constant immediate", exp);
+      /* Don't generate any RTL.  */
+      return const0_rtx;
+    }
+
   neon_builtin_datum *d =
-		&neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
+		&neon_builtin_data[fcode - ARM_BUILTIN_NEON_PATTERN_START];
   enum insn_code icode = d->code;
   builtin_arg args[SIMD_MAX_BUILTIN_ARGS];
   int num_args = insn_data[d->code].n_operands;

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2][ARM] PR/63870: Add qualifier to check lane bounds in expand
  2015-01-16 17:03 [PATCH 1/2][ARM] PR/63870: Add qualifier to check lane bounds in expand Alan Lawrence
  2015-01-16 17:33 ` [PATCH 2/2][ARM] PR/63870: Add a __builtin_lane_check Alan Lawrence
@ 2015-04-22 16:57 ` Alan Lawrence
  2015-05-08 12:57   ` Alan Lawrence
  1 sibling, 1 reply; 5+ messages in thread
From: Alan Lawrence @ 2015-04-22 16:57 UTC (permalink / raw)
  To: gcc-patches; +Cc: Kyrylo Tkachov, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 4037 bytes --]

Ping (https://gcc.gnu.org/ml/gcc-patches/2015-01/msg01422.html).

These are required for float16 patches posted at 
https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01332.html .

Bootstrapped + check-gcc on arm-none-linux-gnueabihf.

Alan Lawrence wrote:
> This is based loosely upon svn r217440, "[AArch64] Add bounds checking to 
> vqdm_lane intrinsics...", but applies to more intrinsics (including e.g. 
> vget_lane), and does not do the endianness-flipping present on AArch64: the 
> objective is to exactly preserve behaviour on all valid code. (Yes, the new 
> qualifier may perhaps give us a location for flipping lanes according to 
> endianness in the future, but I'm not doing that here.) Checks for lanes being 
> in range for many insns are thus moved from assembly to expand time, with 
> inlining history. For example, previous error message:
> 
> vqrdmulh_lane_s16_indices_1.c: In function 'test1':
> vqrdmulh_lane_s16_indices_1.c:9:1: error: lane out of range
> }
> ^
> 
> becomes:
> 
> In file included vqrdmulh_lane_s16_indices_1.c:3:0:
> In function 'vqrdmulh_lane_s16',
> inlined from 'test1' at 
> gcc/testsuite/gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c:8:10:
> .../install/lib/gcc/arm-none-eabi/5.0.0/include/arm_neon.h:6882:10: error: lane 
> -1 out of range 0 - 3
> return (int16x4_t)builtin_neon_vqrdmulh_lanev4hi (a, b, c);
> 
> Note the question of how to common up tests with those in 
> gcc.target/aarch64/simd/*_indices_1.c is not resolved by this patch.
> 
> Cross-tested check-gcc on arm-none-eabi
> Bootstrapped on arm-none-linux-gnueabihf cortex-a15
> 
> gcc/ChangeLog:
> 
>      * config/arm/arm-builtins.c (enum arm_type_qualifiers):
>      Add qualifier_lane_index.
>      (arm_binop_imm_qualifiers, BINOP_IMM_QUALIFIERS): New.
>      (arm_getlane_qualifiers): Use qualifier_lane_index.
>      (arm_lanemac_qualifiers): Rename to...
>      (arm_mac_n_qualifiers): ...this.
>      (LANEMAC_QUALIFIERS): Rename to...
>      (MAC_N_QUALIFIERS): ...this.
>      (arm_mac_lane_qualifiers, MAC_LANE_QUALIFIERS): New.
>      (arm_setlane_qualifiers): Use qualifier_lane_index.
>      (arm_ternop_imm_qualifiers, TERNOP_IMM_QUALIFIERS): New.
>      (enum builtin_arg): Add NEON_ARG_LANE_INDEX.
>      (arm_expand_neon_args): Handle NEON_ARG_LANE_INDEX.
>      (arm_expand_neon_builtin): Handle qualifier_lane_index.
> 
>      * config/arm/arm-protos.h (neon_lane_bounds): Add const_tree parameter.
>      * config/arm/arm.c (bounds_check): Likewise, improve error message.
>      (neon_lane_bounds, neon_const_bounds): Add arguments to bounds_check.
>      * config/arm/arm_neon_builtins.def (vshrs_n, vshru_n, vrshrs_n,
>      vrshru_n, vshrn_n, vrshrn_n, vqshrns_n, vqshrnu_n, vqrshrns_n,
>      vqrshrnu_n, vqshrun_n, vqrshrun_n, vshl_n, vqshl_s_n, vqshl_u_n,
>      vqshlu_n, vshlls_n, vshllu_n): Change qualifiers to BINOP_IMM.
>      (vsras_n, vsrau_n, vrsras_n, vrsrau_n, vsri_n, vsli_n): Change
>      qualifiers to TERNOP_IMM.
>      (vdup_lane): Change qualifiers to GETLANE.
>      (vmla_lane, vmlals_lane, vmlalu_lane, vqdmlal_lane, vmls_lane,
>      vmlsls_lane, vmlslu_lane, vqdmlsl_lane): Change qualifiers to MAC_LANE.
>      (vmla_n, vmlals_n, vmlalu_n, vqdmlal_n, vmls_n, vmlsls_n, vmlslu_n,
>      vqdmlsl_n): Change qualifiers to MAC_N.
> 
>      * config/arm/neon.md (neon_vget_lane<mode>, neon_vget_laneu<mode>,
>      neon_vget_lanedi, neon_vget_lanev2di, neon_vset_lane<mode>,
>      neon_vset_lanedi, neon_vdup_lane<mode>, neon_vdup_lanedi,
>      neon_vdup_lanev2di, neon_vmul_lane<mode>, neon_vmul_lane<mode>,
>      neon_vmull<sup>_lane<mode>, neon_vqdmull_lane<mode>,
>      neon_vq<r>dmulh_lane<mode>, neon_vq<r>dmulh_lane<mode>,
>      neon_vmla_lane<mode>, neon_vmla_lane<mode>, neon_vmlal<sup>_lane<mode>,
>      neon_vqdmlal_lane<mode>, neon_vmls_lane<mode>, neon_vmls_lane<mode>,
>      neon_vmlsl<sup>_lane<mode>, neon_vqdmlsl_lane<mode>):
>      Remove call to neon_lane_bounds.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: arm_qualifier_lane_index.patch --]
[-- Type: text/x-patch; name=arm_qualifier_lane_index.patch, Size: 19003 bytes --]

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 7a45113..20d2198 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -89,7 +89,9 @@ enum arm_type_qualifiers
   /* qualifier_const_pointer | qualifier_map_mode  */
   qualifier_const_pointer_map_mode = 0x86,
   /* Polynomial types.  */
-  qualifier_poly = 0x100
+  qualifier_poly = 0x100,
+  /* Lane indices - must be within range of previous argument = a vector.  */
+  qualifier_lane_index = 0x200
 };
 
 /*  The qualifier_internal allows generation of a unary builtin from
@@ -120,21 +122,40 @@ arm_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 
 /* T (T, immediate).  */
 static enum arm_type_qualifiers
-arm_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+arm_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_immediate };
+#define BINOP_IMM_QUALIFIERS (arm_binop_imm_qualifiers)
+
+/* T (T, lane index).  */
+static enum arm_type_qualifiers
+arm_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_lane_index };
 #define GETLANE_QUALIFIERS (arm_getlane_qualifiers)
 
 /* T (T, T, T, immediate).  */
 static enum arm_type_qualifiers
-arm_lanemac_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+arm_mac_n_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_none,
       qualifier_none, qualifier_immediate };
-#define LANEMAC_QUALIFIERS (arm_lanemac_qualifiers)
+#define MAC_N_QUALIFIERS (arm_mac_n_qualifiers)
+
+/* T (T, T, T, lane index).  */
+static enum arm_type_qualifiers
+arm_mac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_none, qualifier_lane_index };
+#define MAC_LANE_QUALIFIERS (arm_mac_lane_qualifiers)
 
 /* T (T, T, immediate).  */
 static enum arm_type_qualifiers
-arm_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+arm_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
+#define TERNOP_IMM_QUALIFIERS (arm_ternop_imm_qualifiers)
+
+/* T (T, T, lane index).  */
+static enum arm_type_qualifiers
+arm_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
 #define SETLANE_QUALIFIERS (arm_setlane_qualifiers)
 
 /* T (T, T).  */
@@ -1939,6 +1960,7 @@ arm_expand_unop_builtin (enum insn_code icode,
 typedef enum {
   NEON_ARG_COPY_TO_REG,
   NEON_ARG_CONSTANT,
+  NEON_ARG_LANE_INDEX,
   NEON_ARG_MEMORY,
   NEON_ARG_STOP
 } builtin_arg;
@@ -2055,6 +2077,16 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
 		op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
 	      break;
 
+	    case NEON_ARG_LANE_INDEX:
+	      /* Previous argument must be a vector, which this indexes.  */
+	      gcc_assert (argc > 0);
+	      if (CONST_INT_P (op[argc]))
+		{
+		  enum machine_mode vmode = mode[argc - 1];
+		  neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode), exp);
+		}
+	      /* Fall through - if the lane index isn't a constant then
+		 the next case will error.  */
 	    case NEON_ARG_CONSTANT:
 	      if (!(*insn_data[icode].operand[opno].predicate)
 		  (op[argc], mode[argc]))
@@ -2182,7 +2214,9 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
       int operands_k = k - is_void;
       int expr_args_k = k - 1;
 
-      if (d->qualifiers[qualifiers_k] & qualifier_immediate)
+      if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
+	args[k] = NEON_ARG_LANE_INDEX;
+      else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
 	args[k] = NEON_ARG_CONSTANT;
       else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
 	{
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 16eb854..d99fd91 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -89,7 +89,7 @@ extern void neon_pairwise_reduce (rtx, rtx, machine_mode,
 extern rtx neon_make_constant (rtx);
 extern tree arm_builtin_vectorized_function (tree, tree, tree);
 extern void neon_expand_vector_init (rtx, rtx);
-extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
 extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
 extern HOST_WIDE_INT neon_element_bits (machine_mode);
 extern void neon_reinterpret (rtx, rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 50bd3eb..4181f12 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -12892,12 +12892,12 @@ neon_expand_vector_init (rtx target, rtx vals)
 }
 
 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
-   ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
-   reported source locations are bogus.  */
+   ERR if it doesn't.  EXP indicates the source location, which includes the
+   inlining history for intrinsics.  */
 
 static void
 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
-	      const char *err)
+	      const_tree exp, const char *desc)
 {
   HOST_WIDE_INT lane;
 
@@ -12906,15 +12906,22 @@ bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
   lane = INTVAL (operand);
 
   if (lane < low || lane >= high)
-    error (err);
+    {
+      if (exp)
+	error ("%K%s %lld out of range %lld - %lld",
+	       exp, desc, lane, low, high - 1);
+      else
+	error ("%s %lld out of range %lld - %lld", desc, lane, low, high - 1);
+    }
 }
 
 /* Bounds-check lanes.  */
 
 void
-neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
+		  const_tree exp)
 {
-  bounds_check (operand, low, high, "lane out of range");
+  bounds_check (operand, low, high, exp, "lane");
 }
 
 /* Bounds-check constants.  */
@@ -12922,7 +12929,7 @@ neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
 void
 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
 {
-  bounds_check (operand, low, high, "constant out of range");
+  bounds_check (operand, low, high, NULL_TREE, "constant");
 }
 
 HOST_WIDE_INT
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index f55591d..f150b98 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -67,28 +67,28 @@ VAR8 (BINOP, vqshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR8 (BINOP, vqshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR8 (BINOP, vqrshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR8 (BINOP, vqrshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vrshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vrshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR3 (GETLANE, vshrn_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vrshrn_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqshrns_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqshrnu_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqrshrns_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqrshrnu_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqshrun_n, v8hi, v4si, v2di)
-VAR3 (GETLANE, vqrshrun_n, v8hi, v4si, v2di)
-VAR8 (GETLANE, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vqshl_s_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vqshl_u_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (GETLANE, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR3 (GETLANE, vshlls_n, v8qi, v4hi, v2si)
-VAR3 (GETLANE, vshllu_n, v8qi, v4hi, v2si)
-VAR8 (SETLANE, vsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (SETLANE, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (SETLANE, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (SETLANE, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vrshrs_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vrshru_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vshrn_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vrshrn_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqshrns_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqshrnu_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqrshrns_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqrshrnu_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqshrun_n, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vqrshrun_n, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vqshl_s_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vqshl_u_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (BINOP_IMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR3 (BINOP_IMM, vshlls_n, v8qi, v4hi, v2si)
+VAR3 (BINOP_IMM, vshllu_n, v8qi, v4hi, v2si)
+VAR8 (TERNOP_IMM, vsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR2 (BINOP, vsub, v2sf, v4sf)
 VAR3 (BINOP, vsubls, v8qi, v4hi, v2si)
 VAR3 (BINOP, vsublu, v8qi, v4hi, v2si)
@@ -140,8 +140,8 @@ VAR6 (BINOP, vpadals, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
 VAR6 (BINOP, vpadalu, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
 VAR2 (BINOP, vrecps, v2sf, v4sf)
 VAR2 (BINOP, vrsqrts, v2sf, v4sf)
-VAR8 (SETLANE, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-VAR8 (SETLANE, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
+VAR8 (TERNOP_IMM, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
@@ -162,7 +162,7 @@ VAR10 (SETLANE, vset_lane,
 VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di)
 VAR10 (UNOP, vdup_n,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR10 (BINOP, vdup_lane,
+VAR10 (GETLANE, vdup_lane,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di)
 VAR5 (UNOP, vget_high, v16qi, v8hi, v4si, v4sf, v2di)
@@ -174,23 +174,23 @@ VAR3 (UNOP, vqmovun, v8hi, v4si, v2di)
 VAR3 (UNOP, vmovls, v8qi, v4hi, v2si)
 VAR3 (UNOP, vmovlu, v8qi, v4hi, v2si)
 VAR6 (SETLANE, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR2 (LANEMAC, vmlals_lane, v4hi, v2si)
-VAR2 (LANEMAC, vmlalu_lane, v4hi, v2si)
-VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si)
-VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR2 (LANEMAC, vmlsls_lane, v4hi, v2si)
-VAR2 (LANEMAC, vmlslu_lane, v4hi, v2si)
-VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si)
+VAR6 (MAC_LANE, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR2 (MAC_LANE, vmlals_lane, v4hi, v2si)
+VAR2 (MAC_LANE, vmlalu_lane, v4hi, v2si)
+VAR2 (MAC_LANE, vqdmlal_lane, v4hi, v2si)
+VAR6 (MAC_LANE, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR2 (MAC_LANE, vmlsls_lane, v4hi, v2si)
+VAR2 (MAC_LANE, vmlslu_lane, v4hi, v2si)
+VAR2 (MAC_LANE, vqdmlsl_lane, v4hi, v2si)
 VAR6 (BINOP, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR6 (LANEMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR2 (LANEMAC, vmlals_n, v4hi, v2si)
-VAR2 (LANEMAC, vmlalu_n, v4hi, v2si)
-VAR2 (LANEMAC, vqdmlal_n, v4hi, v2si)
-VAR6 (LANEMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR2 (LANEMAC, vmlsls_n, v4hi, v2si)
-VAR2 (LANEMAC, vmlslu_n, v4hi, v2si)
-VAR2 (LANEMAC, vqdmlsl_n, v4hi, v2si)
+VAR6 (MAC_N, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR2 (MAC_N, vmlals_n, v4hi, v2si)
+VAR2 (MAC_N, vmlalu_n, v4hi, v2si)
+VAR2 (MAC_N, vqdmlal_n, v4hi, v2si)
+VAR6 (MAC_N, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR2 (MAC_N, vmlsls_n, v4hi, v2si)
+VAR2 (MAC_N, vmlslu_n, v4hi, v2si)
+VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si)
 VAR10 (SETLANE, vext,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 63c327e..bf620c4 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2722,8 +2722,6 @@
    (match_operand:SI 2 "immediate_operand" "")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
-
   if (BYTES_BIG_ENDIAN)
     {
       /* The intrinsics are defined in terms of a model where the
@@ -2753,8 +2751,6 @@
    (match_operand:SI 2 "immediate_operand" "")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
-
   if (BYTES_BIG_ENDIAN)
     {
       /* The intrinsics are defined in terms of a model where the
@@ -2784,7 +2780,6 @@
    (match_operand:SI 2 "immediate_operand" "")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[2], 0, 1);
   emit_move_insn (operands[0], operands[1]);
   DONE;
 })
@@ -2795,18 +2790,11 @@
    (match_operand:SI 2 "immediate_operand" "")]
   "TARGET_NEON"
 {
-  switch (INTVAL (operands[2]))
-    {
-    case 0:
-      emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
-      break;
-    case 1:
-      emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
-      break;
-    default:
-      neon_lane_bounds (operands[2], 0, 1);
-      FAIL;
-    }
+  int lane = INTVAL (operands[2]);
+  gcc_assert ((lane ==0) || (lane == 1));
+  emit_move_insn (operands[0], lane == 0
+				? gen_lowpart (DImode, operands[1])
+				: gen_highpart (DImode, operands[1]));
   DONE;
 })
 
@@ -2818,7 +2806,6 @@
   "TARGET_NEON"
 {
   unsigned int elt = INTVAL (operands[3]);
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
 
   if (BYTES_BIG_ENDIAN)
     {
@@ -2841,7 +2828,6 @@
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, 1);
   emit_move_insn (operands[0], operands[1]);
   DONE;
 })
@@ -2923,7 +2909,6 @@
    (match_operand:SI 2 "immediate_operand" "i")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
   if (BYTES_BIG_ENDIAN)
     {
       unsigned int elt = INTVAL (operands[2]);
@@ -2944,7 +2929,6 @@
    (match_operand:SI 2 "immediate_operand" "i")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[2], 0, 1);
   emit_move_insn (operands[0], operands[1]);
   DONE;
 })
@@ -2956,7 +2940,6 @@
    (match_operand:SI 2 "immediate_operand" "i")]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[2], 0, 1);
   emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
   DONE;
 })
@@ -3156,7 +3139,6 @@
                     UNSPEC_VMUL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
 }
   [(set (attr "type")
@@ -3174,7 +3156,6 @@
                     UNSPEC_VMUL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
   return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
 }
   [(set (attr "type")
@@ -3192,7 +3173,6 @@
                           VMULL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
 }
   [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
@@ -3207,7 +3187,6 @@
                           UNSPEC_VQDMULL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
 }
   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
@@ -3222,7 +3201,6 @@
                       VQDMULH_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
 }
   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
@@ -3237,7 +3215,6 @@
                       VQDMULH_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
 }
   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
@@ -3253,7 +3230,6 @@
                      UNSPEC_VMLA_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
 }
   [(set (attr "type")
@@ -3272,7 +3248,6 @@
                      UNSPEC_VMLA_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
 }
   [(set (attr "type")
@@ -3291,7 +3266,6 @@
                           VMLAL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
 }
   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
@@ -3307,7 +3281,6 @@
                           UNSPEC_VQDMLAL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
 }
   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
@@ -3323,7 +3296,6 @@
                     UNSPEC_VMLS_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
 }
   [(set (attr "type")
@@ -3342,7 +3314,6 @@
                     UNSPEC_VMLS_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
 }
   [(set (attr "type")
@@ -3361,7 +3332,6 @@
                           VMLSL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
 }
   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
@@ -3377,7 +3347,6 @@
                           UNSPEC_VQDMLSL_LANE))]
   "TARGET_NEON"
 {
-  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
   return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
 }
   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2][ARM] PR/63870: Add a __builtin_lane_check
  2015-01-16 17:33 ` [PATCH 2/2][ARM] PR/63870: Add a __builtin_lane_check Alan Lawrence
@ 2015-04-22 16:58   ` Alan Lawrence
  0 siblings, 0 replies; 5+ messages in thread
From: Alan Lawrence @ 2015-04-22 16:58 UTC (permalink / raw)
  To: gcc-patches; +Cc: Kyrylo Tkachov, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 1276 bytes --]

Ping (https://gcc.gnu.org/ml/gcc-patches/2015-01/msg01436.html).

These are required for float16 patches posted at 
https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01332.html

Bootstrapped + check-gcc on arm-none-linux-gnueabihf.

Alan Lawrence wrote:
> This parallels the present form of __builtin_aarch64_im_lane_boundsi, and allows 
> to check lane indices for intrinsics that can otherwise be written in terms of 
> GCC vector extensions.
> 
> The new builtin is not used in this patch but is used in my series of float16_t 
> intrinsics (https://gcc.gnu.org/ml/gcc-patches/2015-01/msg01434.html), and at 
> some point in the future we should rewrite existing intrinsics (for other types) 
> to this form too, but I'm leaving that for a later patch series :).
> 
> Cross-tested check-gcc on arm-none-eabi
> Bootstrapped on arm-none-linux-gnueabihf cortex-a15
> 
> gcc/ChangeLog:
> 
>      * config/arm/arm-builtins.c (enum arm_builtins):
>      Add ARM_BUILTIN_NEON_BASE and ARM_BUILTIN_NEON_LANE_CHECK.
>      (ARM_BUILTIN_NEON_BASE): Rename macro to....
>      (ARM_BUILTIN_NEON_PATTERN_START): ...this.
>      (arm_init_neon_builtins): Register __builtin_arm_lane_check.
>      (arm_expand_neon_builtin): Handle ARM_BUILTIN_NEON_LANE_CHECK.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: arm_builtin_lane_check.patch --]
[-- Type: text/x-patch; name=arm_builtin_lane_check.patch, Size: 2965 bytes --]

commit 3d5f2b80dc4527b4874bff458bb047946322028f
Author: Alan Lawrence <alan.lawrence@arm.com>
Date:   Mon Dec 8 18:36:30 2014 +0000

    Add __builtin_arm_lane_check

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 20d2198..3de2be7 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -546,12 +546,16 @@ enum arm_builtins
 #undef CRYPTO2
 #undef CRYPTO3
 
+  ARM_BUILTIN_NEON_BASE,
+  ARM_BUILTIN_NEON_LANE_CHECK = ARM_BUILTIN_NEON_BASE,
+
 #include "arm_neon_builtins.def"
 
   ARM_BUILTIN_MAX
 };
 
-#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
+#define ARM_BUILTIN_NEON_PATTERN_START \
+    (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
 
 #undef CF
 #undef VAR1
@@ -910,7 +914,7 @@ arm_init_simd_builtin_scalar_types (void)
 static void
 arm_init_neon_builtins (void)
 {
-  unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
+  unsigned int i, fcode = ARM_BUILTIN_NEON_PATTERN_START;
 
   arm_init_simd_builtin_types ();
 
@@ -920,6 +924,15 @@ arm_init_neon_builtins (void)
      system.  */
   arm_init_simd_builtin_scalar_types ();
 
+  tree lane_check_fpr = build_function_type_list (void_type_node,
+						  intSI_type_node,
+						  intSI_type_node,
+						  NULL);
+  arm_builtin_decls[ARM_BUILTIN_NEON_LANE_CHECK] =
+      add_builtin_function ("__builtin_arm_lane_check", lane_check_fpr,
+			    ARM_BUILTIN_NEON_LANE_CHECK, BUILT_IN_MD,
+			    NULL, NULL_TREE);
+
   for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++, fcode++)
     {
       bool print_type_signature_p = false;
@@ -2183,14 +2196,28 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
   return target;
 }
 
-/* Expand a Neon builtin. These are "special" because they don't have symbolic
+/* Expand a Neon builtin, i.e. those registered only if TARGET_NEON holds.
+   Most of these are "special" because they don't have symbolic
    constants defined per-instruction or per instruction-variant. Instead, the
    required info is looked up in the table neon_builtin_data.  */
 static rtx
 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
 {
+  if (fcode == ARM_BUILTIN_NEON_LANE_CHECK)
+    {
+      tree nlanes = CALL_EXPR_ARG (exp, 0);
+      gcc_assert (TREE_CODE (nlanes) == INTEGER_CST);
+      rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 1));
+      if (CONST_INT_P (lane_idx))
+	neon_lane_bounds (lane_idx, 0, TREE_INT_CST_LOW (nlanes), exp);
+      else
+	error ("%Klane index must be a constant immediate", exp);
+      /* Don't generate any RTL.  */
+      return const0_rtx;
+    }
+
   neon_builtin_datum *d =
-		&neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
+		&neon_builtin_data[fcode - ARM_BUILTIN_NEON_PATTERN_START];
   enum insn_code icode = d->code;
   builtin_arg args[SIMD_MAX_BUILTIN_ARGS];
   int num_args = insn_data[d->code].n_operands;

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2][ARM] PR/63870: Add qualifier to check lane bounds in expand
  2015-04-22 16:57 ` [PATCH 1/2][ARM] PR/63870: Add qualifier to check lane bounds in expand Alan Lawrence
@ 2015-05-08 12:57   ` Alan Lawrence
  0 siblings, 0 replies; 5+ messages in thread
From: Alan Lawrence @ 2015-05-08 12:57 UTC (permalink / raw)
  To: gcc-patches; +Cc: Kyrylo Tkachov, Richard Earnshaw

Alan Lawrence wrote:
> Ping (https://gcc.gnu.org/ml/gcc-patches/2015-01/msg01422.html).
> 
> These are required for float16 patches posted at 
> https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01332.html .
> 
> Bootstrapped + check-gcc on arm-none-linux-gnueabihf.
> 
> Alan Lawrence wrote:
>> This is based loosely upon svn r217440, "[AArch64] Add bounds checking to 
>> vqdm_lane intrinsics...", but applies to more intrinsics (including e.g. 
>> vget_lane), and does not do the endianness-flipping present on AArch64: the 
>> objective is to exactly preserve behaviour on all valid code. (Yes, the new 
>> qualifier may perhaps give us a location for flipping lanes according to 
>> endianness in the future, but I'm not doing that here.) Checks for lanes being 
>> in range for many insns are thus moved from assembly to expand time, with 
>> inlining history. For example, previous error message:
>>
>> vqrdmulh_lane_s16_indices_1.c: In function 'test1':
>> vqrdmulh_lane_s16_indices_1.c:9:1: error: lane out of range
>> }
>> ^
>>
>> becomes:
>>
>> In file included vqrdmulh_lane_s16_indices_1.c:3:0:
>> In function 'vqrdmulh_lane_s16',
>> inlined from 'test1' at 
>> gcc/testsuite/gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c:8:10:
>> .../install/lib/gcc/arm-none-eabi/5.0.0/include/arm_neon.h:6882:10: error: lane 
>> -1 out of range 0 - 3
>> return (int16x4_t)builtin_neon_vqrdmulh_lanev4hi (a, b, c);
>>
>> Note the question of how to common up tests with those in 
>> gcc.target/aarch64/simd/*_indices_1.c is not resolved by this patch.
>>
>> Cross-tested check-gcc on arm-none-eabi
>> Bootstrapped on arm-none-linux-gnueabihf cortex-a15
>>
>> gcc/ChangeLog:
>>
>>      * config/arm/arm-builtins.c (enum arm_type_qualifiers):
>>      Add qualifier_lane_index.
>>      (arm_binop_imm_qualifiers, BINOP_IMM_QUALIFIERS): New.
>>      (arm_getlane_qualifiers): Use qualifier_lane_index.
>>      (arm_lanemac_qualifiers): Rename to...
>>      (arm_mac_n_qualifiers): ...this.
>>      (LANEMAC_QUALIFIERS): Rename to...
>>      (MAC_N_QUALIFIERS): ...this.
>>      (arm_mac_lane_qualifiers, MAC_LANE_QUALIFIERS): New.
>>      (arm_setlane_qualifiers): Use qualifier_lane_index.
>>      (arm_ternop_imm_qualifiers, TERNOP_IMM_QUALIFIERS): New.
>>      (enum builtin_arg): Add NEON_ARG_LANE_INDEX.
>>      (arm_expand_neon_args): Handle NEON_ARG_LANE_INDEX.
>>      (arm_expand_neon_builtin): Handle qualifier_lane_index.
>>
>>      * config/arm/arm-protos.h (neon_lane_bounds): Add const_tree parameter.
>>      * config/arm/arm.c (bounds_check): Likewise, improve error message.
>>      (neon_lane_bounds, neon_const_bounds): Add arguments to bounds_check.
>>      * config/arm/arm_neon_builtins.def (vshrs_n, vshru_n, vrshrs_n,
>>      vrshru_n, vshrn_n, vrshrn_n, vqshrns_n, vqshrnu_n, vqrshrns_n,
>>      vqrshrnu_n, vqshrun_n, vqrshrun_n, vshl_n, vqshl_s_n, vqshl_u_n,
>>      vqshlu_n, vshlls_n, vshllu_n): Change qualifiers to BINOP_IMM.
>>      (vsras_n, vsrau_n, vrsras_n, vrsrau_n, vsri_n, vsli_n): Change
>>      qualifiers to TERNOP_IMM.
>>      (vdup_lane): Change qualifiers to GETLANE.
>>      (vmla_lane, vmlals_lane, vmlalu_lane, vqdmlal_lane, vmls_lane,
>>      vmlsls_lane, vmlslu_lane, vqdmlsl_lane): Change qualifiers to MAC_LANE.
>>      (vmla_n, vmlals_n, vmlalu_n, vqdmlal_n, vmls_n, vmlsls_n, vmlslu_n,
>>      vqdmlsl_n): Change qualifiers to MAC_N.
>>
>>      * config/arm/neon.md (neon_vget_lane<mode>, neon_vget_laneu<mode>,
>>      neon_vget_lanedi, neon_vget_lanev2di, neon_vset_lane<mode>,
>>      neon_vset_lanedi, neon_vdup_lane<mode>, neon_vdup_lanedi,
>>      neon_vdup_lanev2di, neon_vmul_lane<mode>, neon_vmul_lane<mode>,
>>      neon_vmull<sup>_lane<mode>, neon_vqdmull_lane<mode>,
>>      neon_vq<r>dmulh_lane<mode>, neon_vq<r>dmulh_lane<mode>,
>>      neon_vmla_lane<mode>, neon_vmla_lane<mode>, neon_vmlal<sup>_lane<mode>,
>>      neon_vqdmlal_lane<mode>, neon_vmls_lane<mode>, neon_vmls_lane<mode>,
>>      neon_vmlsl<sup>_lane<mode>, neon_vqdmlsl_lane<mode>):
>>      Remove call to neon_lane_bounds.
> 

Ping^2.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-05-08 12:57 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-16 17:03 [PATCH 1/2][ARM] PR/63870: Add qualifier to check lane bounds in expand Alan Lawrence
2015-01-16 17:33 ` [PATCH 2/2][ARM] PR/63870: Add a __builtin_lane_check Alan Lawrence
2015-04-22 16:58   ` Alan Lawrence
2015-04-22 16:57 ` [PATCH 1/2][ARM] PR/63870: Add qualifier to check lane bounds in expand Alan Lawrence
2015-05-08 12:57   ` Alan Lawrence

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).