* [PATCH 0/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops
@ 2023-12-18 11:53 Andre Vieira
2023-12-18 11:53 ` [PATCH 1/2] arm: Add define_attr to to create a mapping between MVE predicated and unpredicated insns Andre Vieira
2023-12-18 11:53 ` [PATCH 2/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops Andre Vieira
0 siblings, 2 replies; 5+ messages in thread
From: Andre Vieira @ 2023-12-18 11:53 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard.Earnshaw, Andre Vieira
[-- Attachment #1: Type: text/plain, Size: 261 bytes --]
Resending series to make use of the Linaro pre-commit CI in patchworks.
Andre Vieira (2):
arm: Add define_attr to to create a mapping between MVE predicated and
unpredicated insns
arm: Add support for MVE Tail-Predicated Low Overhead Loops
--
2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/2] arm: Add define_attr to to create a mapping between MVE predicated and unpredicated insns
2023-12-18 11:53 [PATCH 0/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops Andre Vieira
@ 2023-12-18 11:53 ` Andre Vieira
2023-12-20 16:54 ` Andre Vieira (lists)
2023-12-18 11:53 ` [PATCH 2/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops Andre Vieira
1 sibling, 1 reply; 5+ messages in thread
From: Andre Vieira @ 2023-12-18 11:53 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard.Earnshaw, Stam Markianos-Wright
[-- Attachment #1: Type: text/plain, Size: 44 bytes --]
This is a multi-part message in MIME format.
[-- Attachment #2: Type: text/plain, Size: 152 bytes --]
Re-sending Stam's first patch, same as:
https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635301.html
Hopefully patchworks can pick this up :)
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: 0001-arm-Add-define_attr-to-to-create-a-mapping-between-M.patch --]
[-- Type: text/x-patch; name="0001-arm-Add-define_attr-to-to-create-a-mapping-between-M.patch", Size: 104533 bytes --]
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index a9c2752c0ea..0b0e8620717 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -2375,6 +2375,21 @@ extern int making_const_table;
else if (TARGET_THUMB1) \
thumb1_final_prescan_insn (INSN)
+/* These defines are useful to refer to the value of the mve_unpredicated_insn
+ insn attribute. Note that, because these use the get_attr_* function, these
+ will change recog_data if (INSN) isn't current_insn. */
+#define MVE_VPT_PREDICABLE_INSN_P(INSN) \
+ (recog_memoized (INSN) >= 0 \
+ && get_attr_mve_unpredicated_insn (INSN) != 0) \
+
+#define MVE_VPT_PREDICATED_INSN_P(INSN) \
+ (MVE_VPT_PREDICABLE_INSN_P (INSN) \
+ && recog_memoized (INSN) != get_attr_mve_unpredicated_insn (INSN)) \
+
+#define MVE_VPT_UNPREDICATED_INSN_P(INSN) \
+ (MVE_VPT_PREDICABLE_INSN_P (INSN) \
+ && recog_memoized (INSN) == get_attr_mve_unpredicated_insn (INSN)) \
+
#define ARM_SIGN_EXTEND(x) ((HOST_WIDE_INT) \
(HOST_BITS_PER_WIDE_INT <= 32 ? (unsigned HOST_WIDE_INT) (x) \
: ((((unsigned HOST_WIDE_INT)(x)) & (unsigned HOST_WIDE_INT) 0xffffffff) |\
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 07eaf06cdea..8efdebecc3c 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -124,6 +124,8 @@ (define_attr "fpu" "none,vfp"
; and not all ARM insns do.
(define_attr "predicated" "yes,no" (const_string "no"))
+(define_attr "mve_unpredicated_insn" "" (const_int 0))
+
; LENGTH of an instruction (in bytes)
(define_attr "length" ""
(const_int 4))
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index a9803538101..5ea2d9e8668 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -2305,6 +2305,7 @@ (define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8")
(define_int_attr mmla_sfx [(UNSPEC_MATMUL_S "s8") (UNSPEC_MATMUL_U "u8")
(UNSPEC_MATMUL_US "s8")])
+
;;MVE int attribute.
(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
(VREV16Q_U "u") (VMVNQ_N_S "s") (VMVNQ_N_U "u")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index b0d3443da9c..62df022ef19 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -17,7 +17,7 @@
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
-(define_insn "*mve_mov<mode>"
+(define_insn "mve_mov<mode>"
[(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w , w, r,Ux,w")
(match_operand:MVE_types 1 "general_operand" " w,r,w,DnDm,UxUi,r,w, Ul"))]
"TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
@@ -81,18 +81,27 @@ (define_insn "*mve_mov<mode>"
return "";
}
}
- [(set_attr "type" "mve_move,mve_move,mve_move,mve_move,mve_load,multiple,mve_store,mve_load")
+ [(set_attr_alternative "mve_unpredicated_insn" [(symbol_ref "CODE_FOR_mve_mov<mode>")
+ (symbol_ref "CODE_FOR_nothing")
+ (symbol_ref "CODE_FOR_nothing")
+ (symbol_ref "CODE_FOR_mve_mov<mode>")
+ (symbol_ref "CODE_FOR_mve_mov<mode>")
+ (symbol_ref "CODE_FOR_nothing")
+ (symbol_ref "CODE_FOR_mve_mov<mode>")
+ (symbol_ref "CODE_FOR_nothing")])
+ (set_attr "type" "mve_move,mve_move,mve_move,mve_move,mve_load,multiple,mve_store,mve_load")
(set_attr "length" "4,8,8,4,4,8,4,8")
(set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*")
(set_attr "neg_pool_range" "*,*,*,*,996,*,*,*")])
-(define_insn "*mve_vdup<mode>"
+(define_insn "mve_vdup<mode>"
[(set (match_operand:MVE_vecs 0 "s_register_operand" "=w")
(vec_duplicate:MVE_vecs
(match_operand:<V_elem> 1 "s_register_operand" "r")))]
"TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
"vdup.<V_sz_elem>\t%q0, %1"
- [(set_attr "length" "4")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vdup<mode>"))
+ (set_attr "length" "4")
(set_attr "type" "mve_move")])
;;
@@ -145,7 +154,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_mnemo>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -159,7 +169,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -173,7 +184,8 @@ (define_insn "mve_v<absneg_str>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"v<absneg_str>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_v<absneg_str>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -187,7 +199,8 @@ (define_insn "@mve_<mve_insn>q_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.%#<V_sz_elem>\t%q0, %1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -201,7 +214,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
;; [vcvttq_f32_f16])
@@ -214,7 +228,8 @@ (define_insn "mve_vcvttq_f32_f16v4sf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtt.f32.f16\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvttq_f32_f16v4sf"))
+ (set_attr "type" "mve_move")
])
;;
@@ -228,7 +243,8 @@ (define_insn "mve_vcvtbq_f32_f16v4sf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtb.f32.f16\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtbq_f32_f16v4sf"))
+ (set_attr "type" "mve_move")
])
;;
@@ -242,7 +258,8 @@ (define_insn "mve_vcvtq_to_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_to_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -256,7 +273,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -270,7 +288,8 @@ (define_insn "mve_vcvtq_from_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_from_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -284,7 +303,8 @@ (define_insn "mve_v<absneg_str>q_s<mode>"
]
"TARGET_HAVE_MVE"
"v<absneg_str>.s%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_v<absneg_str>q_s<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -297,7 +317,8 @@ (define_insn "mve_vmvnq_u<mode>"
]
"TARGET_HAVE_MVE"
"vmvn\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vmvnq_u<mode>"))
+ (set_attr "type" "mve_move")
])
(define_expand "mve_vmvnq_s<mode>"
[
@@ -318,7 +339,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.%#<V_sz_elem>\t%q0, %1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -331,7 +353,8 @@ (define_insn "@mve_vclzq_s<mode>"
]
"TARGET_HAVE_MVE"
"vclz.i%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vclzq_s<mode>"))
+ (set_attr "type" "mve_move")
])
(define_expand "mve_vclzq_u<mode>"
[
@@ -354,7 +377,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -368,7 +392,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -382,7 +407,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -397,7 +423,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -411,7 +438,8 @@ (define_insn "mve_vcvtpq_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtp.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtpq_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -425,7 +453,8 @@ (define_insn "mve_vcvtnq_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtn.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtnq_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -439,7 +468,8 @@ (define_insn "mve_vcvtmq_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtm.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtmq_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -453,7 +483,8 @@ (define_insn "mve_vcvtaq_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvta.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtaq_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -467,7 +498,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.i%#<V_sz_elem>\t%q0, %1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -481,7 +513,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -495,7 +528,8 @@ (define_insn "@mve_<mve_insn>q_<supf>v4si"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>32\t%Q0, %R0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
])
;;
@@ -509,7 +543,8 @@ (define_insn "mve_vctp<MVE_vctp>q<MVE_vpred>"
]
"TARGET_HAVE_MVE"
"vctp.<MVE_vctp>\t%1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vctp<MVE_vctp>q<MVE_vpred>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -523,7 +558,8 @@ (define_insn "mve_vpnotv16bi"
]
"TARGET_HAVE_MVE"
"vpnot"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vpnotv16bi"))
+ (set_attr "type" "mve_move")
])
;;
@@ -538,7 +574,8 @@ (define_insn "@mve_<mve_insn>q_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -553,7 +590,8 @@ (define_insn "mve_vcvtq_n_to_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvt.f<V_sz_elem>.<supf><V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_n_to_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;; [vcreateq_f])
@@ -599,7 +637,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf><V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;; Versions that take constant vectors as operand 2 (with all elements
@@ -617,7 +656,8 @@ (define_insn "mve_vshrq_n_s<mode>_imm"
VALID_NEON_QREG_MODE (<MODE>mode),
true);
}
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vshrq_n_s<mode>_imm"))
+ (set_attr "type" "mve_move")
])
(define_insn "mve_vshrq_n_u<mode>_imm"
[
@@ -632,7 +672,8 @@ (define_insn "mve_vshrq_n_u<mode>_imm"
VALID_NEON_QREG_MODE (<MODE>mode),
true);
}
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vshrq_n_u<mode>_imm"))
+ (set_attr "type" "mve_move")
])
;;
@@ -647,7 +688,8 @@ (define_insn "mve_vcvtq_n_from_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvt.<supf><V_sz_elem>.f<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_n_from_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -662,8 +704,9 @@ (define_insn "@mve_<mve_insn>q_p_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q1"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [vcmpneq_, vcmpcsq_, vcmpeqq_, vcmpgeq_, vcmpgtq_, vcmphiq_, vcmpleq_, vcmpltq_])
@@ -676,7 +719,8 @@ (define_insn "@mve_vcmp<mve_cmp_op>q_<mode>"
]
"TARGET_HAVE_MVE"
"vcmp.<mve_cmp_type>%#<V_sz_elem>\t<mve_cmp_op>, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op>q_<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -691,7 +735,8 @@ (define_insn "@mve_vcmp<mve_cmp_op>q_n_<mode>"
]
"TARGET_HAVE_MVE"
"vcmp.<mve_cmp_type>%#<V_sz_elem> <mve_cmp_op>, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op>q_n_<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -722,7 +767,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -739,7 +785,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.i%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -754,7 +801,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -769,7 +817,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -789,8 +838,11 @@ (define_insn "mve_vandq_u<mode>"
"@
vand\t%q0, %q1, %q2
* return neon_output_logic_immediate (\"vand\", &operands[2], <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));"
- [(set_attr "type" "mve_move")
+ [(set_attr_alternative "mve_unpredicated_insn" [(symbol_ref "CODE_FOR_mve_vandq_u<mode>")
+ (symbol_ref "CODE_FOR_nothing")])
+ (set_attr "type" "mve_move")
])
+
(define_expand "mve_vandq_s<mode>"
[
(set (match_operand:MVE_2 0 "s_register_operand")
@@ -811,7 +863,8 @@ (define_insn "mve_vbicq_u<mode>"
]
"TARGET_HAVE_MVE"
"vbic\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vbicq_u<mode>"))
+ (set_attr "type" "mve_move")
])
(define_expand "mve_vbicq_s<mode>"
@@ -835,7 +888,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -853,7 +907,8 @@ (define_insn "@mve_<mve_insn>q<mve_rot>_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<isu>%#<V_sz_elem>\t%q0, %q1, %q2, #<rot>"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;; Auto vectorizer pattern for int vcadd
@@ -876,7 +931,8 @@ (define_insn "mve_veorq_u<mode>"
]
"TARGET_HAVE_MVE"
"veor\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_veorq_u<mode>"))
+ (set_attr "type" "mve_move")
])
(define_expand "mve_veorq_s<mode>"
[
@@ -904,7 +960,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -920,7 +977,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.s%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -935,7 +993,8 @@ (define_insn "mve_<max_min_su_str>q_<max_min_supf><mode>"
]
"TARGET_HAVE_MVE"
"<max_min_su_str>.<max_min_supf>%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<max_min_su_str>q_<max_min_supf><mode>"))
+ (set_attr "type" "mve_move")
])
@@ -954,7 +1013,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -972,7 +1032,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -988,7 +1049,8 @@ (define_insn "@mve_<mve_insn>q_int_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<isu>%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_int_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1004,7 +1066,8 @@ (define_insn "mve_<mve_addsubmul>q<mode>"
]
"TARGET_HAVE_MVE"
"<mve_addsubmul>.i%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_addsubmul>q<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1018,7 +1081,8 @@ (define_insn "mve_vornq_s<mode>"
]
"TARGET_HAVE_MVE"
"vorn\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vornq_s<mode>"))
+ (set_attr "type" "mve_move")
])
(define_expand "mve_vornq_u<mode>"
@@ -1047,7 +1111,8 @@ (define_insn "mve_vorrq_s<mode>"
"@
vorr\t%q0, %q1, %q2
* return neon_output_logic_immediate (\"vorr\", &operands[2], <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vorrq_s<mode>"))
+ (set_attr "type" "mve_move")
])
(define_expand "mve_vorrq_u<mode>"
[
@@ -1071,7 +1136,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1087,7 +1153,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1103,7 +1170,8 @@ (define_insn "@mve_<mve_insn>q_r_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_r_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1118,7 +1186,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1133,7 +1202,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1148,7 +1218,8 @@ (define_insn "@mve_<mve_insn>q_<supf>v4si"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>32\t%Q0, %R0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1165,7 +1236,8 @@ (define_insn "@mve_<mve_insn>q_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1179,7 +1251,8 @@ (define_insn "mve_vandq_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vand\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vandq_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1193,7 +1266,8 @@ (define_insn "mve_vbicq_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vbic\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vbicq_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1209,7 +1283,8 @@ (define_insn "@mve_<mve_insn>q<mve_rot>_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%q0, %q1, %q2, #<rot>"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1223,7 +1298,8 @@ (define_insn "@mve_vcmp<mve_cmp_op>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcmp.f%#<V_sz_elem> <mve_cmp_op>, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1238,7 +1314,8 @@ (define_insn "@mve_vcmp<mve_cmp_op>q_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcmp.f%#<V_sz_elem> <mve_cmp_op>, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1253,8 +1330,10 @@ (define_insn "mve_vctp<MVE_vctp>q_m<MVE_vpred>"
]
"TARGET_HAVE_MVE"
"vpst\;vctpt.<MVE_vctp>\t%1"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vctp<MVE_vctp>q<MVE_vpred>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")
+])
;;
;; [vcvtbq_f16_f32])
@@ -1268,7 +1347,8 @@ (define_insn "mve_vcvtbq_f16_f32v8hf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtb.f16.f32\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtbq_f16_f32v8hf"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1283,7 +1363,8 @@ (define_insn "mve_vcvttq_f16_f32v8hf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtt.f16.f32\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvttq_f16_f32v8hf"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1297,7 +1378,8 @@ (define_insn "mve_veorq_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"veor\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_veorq_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1313,7 +1395,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1331,7 +1414,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1346,7 +1430,8 @@ (define_insn "@mve_<max_min_f_str>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<max_min_f_str>.f%#<V_sz_elem> %q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<max_min_f_str>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1364,7 +1449,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%Q0, %R0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1384,7 +1470,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<isu>%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1400,7 +1487,8 @@ (define_insn "mve_<mve_addsubmul>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_addsubmul>.f%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_addsubmul>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1414,7 +1502,8 @@ (define_insn "mve_vornq_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vorn\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vornq_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1428,7 +1517,8 @@ (define_insn "mve_vorrq_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vorr\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vorrq_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1444,7 +1534,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.i%#<V_sz_elem> %q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1460,7 +1551,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.s%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1476,7 +1568,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.s%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1494,7 +1587,8 @@ (define_insn "@mve_<mve_insn>q_<supf>v4si"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>32\t%Q0, %R0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1510,7 +1604,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1526,7 +1621,8 @@ (define_insn "@mve_<mve_insn>q_poly_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_poly_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1547,8 +1643,9 @@ (define_insn "@mve_vcmp<mve_cmp_op1>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcmpt.f%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %q2"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op1>q_f<mode>"))
+ (set_attr "length""8")])
+
;;
;; [vcvtaq_m_u, vcvtaq_m_s])
;;
@@ -1562,8 +1659,10 @@ (define_insn "mve_vcvtaq_m_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtat.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtaq_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
;;
;; [vcvtq_m_to_f_s, vcvtq_m_to_f_u])
;;
@@ -1577,8 +1676,9 @@ (define_insn "mve_vcvtq_m_to_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_to_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [vqrshrnbq_n_u, vqrshrnbq_n_s]
@@ -1604,7 +1704,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<isu>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1623,7 +1724,8 @@ (define_insn "@mve_<mve_insn>q_<supf>v4si"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>32\t%Q0, %R0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1639,7 +1741,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1685,7 +1788,10 @@ (define_insn "mve_vshlcq_<supf><mode>"
(match_dup 4)]
VSHLCQ))]
"TARGET_HAVE_MVE"
- "vshlc\t%q0, %1, %4")
+ "vshlc\t%q0, %1, %4"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vshlcq_<supf><mode>"))
+ (set_attr "type" "mve_move")
+])
;;
;; [vabsq_m_s]
@@ -1705,7 +1811,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<isu>%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1721,7 +1828,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1744,7 +1852,8 @@ (define_insn "@mve_vcmp<mve_cmp_op1>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;vcmpt.<isu>%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op1>q_n_<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1767,7 +1876,8 @@ (define_insn "@mve_vcmp<mve_cmp_op1>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;vcmpt.<isu>%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op1>q_<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1783,7 +1893,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1800,7 +1911,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.s%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1819,7 +1931,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1838,7 +1951,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1857,7 +1971,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1878,7 +1993,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1894,7 +2010,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1910,7 +2027,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1933,7 +2051,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.s%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1950,7 +2069,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1967,7 +2087,8 @@ (define_insn "@mve_<mve_insn>q_m_r_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_r_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1983,7 +2104,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1999,7 +2121,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -2015,7 +2138,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -2038,7 +2162,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_mnemo>t.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2054,7 +2179,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
;; [vcmlaq, vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270])
@@ -2072,7 +2198,9 @@ (define_insn "@mve_<mve_insn>q<mve_rot>_f<mode>"
"@
vcmul.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>
vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>"
- [(set_attr "type" "mve_move")
+ [(set_attr_alternative "mve_unpredicated_insn" [(symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_f<mode>")
+ (symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_f<mode>")])
+ (set_attr "type" "mve_move")
])
;;
@@ -2093,7 +2221,8 @@ (define_insn "@mve_vcmp<mve_cmp_op1>q_m_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcmpt.f%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op1>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2109,7 +2238,8 @@ (define_insn "mve_vcvtbq_m_f16_f32v8hf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtbt.f16.f32\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtbq_f16_f32v8hf"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2125,7 +2255,8 @@ (define_insn "mve_vcvtbq_m_f32_f16v4sf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtbt.f32.f16\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtbq_f32_f16v4sf"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2141,7 +2272,8 @@ (define_insn "mve_vcvttq_m_f16_f32v8hf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvttt.f16.f32\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvttq_f16_f32v8hf"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2157,8 +2289,9 @@ (define_insn "mve_vcvttq_m_f32_f16v4sf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvttt.f32.f16\t%q0, %q2"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvttq_f32_f16v4sf"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [vdupq_m_n_f])
@@ -2173,7 +2306,8 @@ (define_insn "@mve_<mve_insn>q_m_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2190,7 +2324,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -2207,7 +2342,8 @@ (define_insn "@mve_<mve_insn>q_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -2224,7 +2360,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2243,7 +2380,8 @@ (define_insn "@mve_<mve_insn>q_p_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.f%#<V_sz_elem>\t%0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2262,7 +2400,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%Q0, %R0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -2281,7 +2420,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%Q0, %R0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2298,7 +2438,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2319,7 +2460,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<isu>%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2335,7 +2477,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.i%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2352,7 +2495,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.i%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2368,7 +2512,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -2384,7 +2529,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2400,7 +2546,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2416,7 +2563,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2435,7 +2583,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2451,7 +2600,8 @@ (define_insn "mve_vcvtmq_m_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtmt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtmq_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2467,7 +2617,8 @@ (define_insn "mve_vcvtpq_m_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtpt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtpq_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2483,7 +2634,8 @@ (define_insn "mve_vcvtnq_m_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtnt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtnq_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2500,7 +2652,8 @@ (define_insn "mve_vcvtq_m_n_from_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_n_from_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2516,7 +2669,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2532,8 +2686,9 @@ (define_insn "mve_vcvtq_m_from_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_from_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [vabavq_p_s, vabavq_p_u])
@@ -2549,7 +2704,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length" "8")])
;;
@@ -2566,8 +2722,9 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\n\t<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
- (set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length" "8")])
;;
;; [vsriq_m_n_s, vsriq_m_n_u])
@@ -2583,8 +2740,9 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
- (set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length" "8")])
;;
;; [vcvtq_m_n_to_f_u, vcvtq_m_n_to_f_s])
@@ -2600,7 +2758,8 @@ (define_insn "mve_vcvtq_m_n_to_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_n_to_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2640,7 +2799,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2659,8 +2819,9 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.i%#<V_sz_elem> %q0, %q2, %3"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [vaddq_m_u, vaddq_m_s]
@@ -2678,7 +2839,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.i%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2698,7 +2860,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2715,8 +2878,9 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [vcaddq_rot90_m_u, vcaddq_rot90_m_s]
@@ -2735,7 +2899,8 @@ (define_insn "@mve_<mve_insn>q<mve_rot>_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<isu>%#<V_sz_elem>\t%q0, %q2, %q3, #<rot>"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2763,7 +2928,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2784,7 +2950,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2802,7 +2969,8 @@ (define_insn "@mve_<mve_insn>q_int_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_int_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2819,7 +2987,8 @@ (define_insn "mve_vornq_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;vornt\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vornq_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2837,7 +3006,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2855,7 +3025,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2872,7 +3043,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2892,7 +3064,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%Q0, %R0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2920,7 +3093,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<isu>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2940,7 +3114,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2958,7 +3133,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2976,7 +3152,8 @@ (define_insn "@mve_<mve_insn>q_poly_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_poly_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2994,7 +3171,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.s%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3012,7 +3190,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.s%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3036,7 +3215,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.f%#<V_sz_elem> %q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3057,7 +3237,8 @@ (define_insn "@mve_<mve_insn>q_m_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.f%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3077,7 +3258,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3094,7 +3276,8 @@ (define_insn "@mve_<mve_insn>q_m_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3116,7 +3299,8 @@ (define_insn "@mve_<mve_insn>q<mve_rot>_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.f%#<V_sz_elem>\t%q0, %q2, %q3, #<rot>"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3136,7 +3320,8 @@ (define_insn "@mve_<mve_insn>q<mve_rot>_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.f%#<V_sz_elem>\t%q0, %q2, %q3, #<rot>"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3153,7 +3338,8 @@ (define_insn "mve_vornq_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vornt\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vornq_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3173,7 +3359,8 @@ (define_insn "mve_vstrbq_<supf><mode>"
output_asm_insn("vstrb.<V_sz_elem>\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrbq_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vstrbq_scatter_offset_s vstrbq_scatter_offset_u]
@@ -3201,7 +3388,8 @@ (define_insn "mve_vstrbq_scatter_offset_<supf><mode>_insn"
VSTRBSOQ))]
"TARGET_HAVE_MVE"
"vstrb.<V_sz_elem>\t%q2, [%0, %q1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrbq_scatter_offset_<supf><mode>_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_scatter_base_s vstrwq_scatter_base_u]
@@ -3223,7 +3411,8 @@ (define_insn "mve_vstrwq_scatter_base_<supf>v4si"
output_asm_insn("vstrw.u32\t%q2, [%q0, %1]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_<supf>v4si"))
+ (set_attr "length" "4")])
;;
;; [vldrbq_gather_offset_s vldrbq_gather_offset_u]
@@ -3246,7 +3435,8 @@ (define_insn "mve_vldrbq_gather_offset_<supf><mode>"
output_asm_insn ("vldrb.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_gather_offset_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vldrbq_s vldrbq_u]
@@ -3268,7 +3458,8 @@ (define_insn "mve_vldrbq_<supf><mode>"
output_asm_insn ("vldrb.<supf><V_sz_elem>\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_gather_base_s vldrwq_gather_base_u]
@@ -3288,7 +3479,8 @@ (define_insn "mve_vldrwq_gather_base_<supf>v4si"
output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_<supf>v4si"))
+ (set_attr "length" "4")])
;;
;; [vstrbq_scatter_offset_p_s vstrbq_scatter_offset_p_u]
@@ -3320,7 +3512,8 @@ (define_insn "mve_vstrbq_scatter_offset_p_<supf><mode>_insn"
VSTRBSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrbt.<V_sz_elem>\t%q2, [%0, %q1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrbq_scatter_offset_<supf><mode>_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_base_p_s vstrwq_scatter_base_p_u]
@@ -3343,7 +3536,8 @@ (define_insn "mve_vstrwq_scatter_base_p_<supf>v4si"
output_asm_insn ("vpst\n\tvstrwt.u32\t%q2, [%q0, %1]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_<supf>v4si"))
+ (set_attr "length" "8")])
(define_insn "mve_vstrbq_p_<supf><mode>"
[(set (match_operand:<MVE_B_ELEM> 0 "mve_memory_operand" "=Ux")
@@ -3361,7 +3555,8 @@ (define_insn "mve_vstrbq_p_<supf><mode>"
output_asm_insn ("vpst\;vstrbt.<V_sz_elem>\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrbq_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vldrbq_gather_offset_z_s vldrbq_gather_offset_z_u]
@@ -3386,7 +3581,8 @@ (define_insn "mve_vldrbq_gather_offset_z_<supf><mode>"
output_asm_insn ("vpst\n\tvldrbt.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_gather_offset_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vldrbq_z_s vldrbq_z_u]
@@ -3409,7 +3605,8 @@ (define_insn "mve_vldrbq_z_<supf><mode>"
output_asm_insn ("vpst\;vldrbt.<supf><V_sz_elem>\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_gather_base_z_s vldrwq_gather_base_z_u]
@@ -3430,7 +3627,8 @@ (define_insn "mve_vldrwq_gather_base_z_<supf>v4si"
output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%q1, %2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_<supf>v4si"))
+ (set_attr "length" "8")])
;;
;; [vldrhq_f]
@@ -3449,7 +3647,8 @@ (define_insn "mve_vldrhq_fv8hf"
output_asm_insn ("vldrh.16\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_fv8hf"))
+ (set_attr "length" "4")])
;;
;; [vldrhq_gather_offset_s vldrhq_gather_offset_u]
@@ -3472,7 +3671,8 @@ (define_insn "mve_vldrhq_gather_offset_<supf><mode>"
output_asm_insn ("vldrh.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vldrhq_gather_offset_z_s vldrhq_gather_offset_z_u]
@@ -3497,7 +3697,8 @@ (define_insn "mve_vldrhq_gather_offset_z_<supf><mode>"
output_asm_insn ("vpst\n\tvldrht.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vldrhq_gather_shifted_offset_s vldrhq_gather_shifted_offset_u]
@@ -3520,7 +3721,8 @@ (define_insn "mve_vldrhq_gather_shifted_offset_<supf><mode>"
output_asm_insn ("vldrh.<supf><V_sz_elem>\t%q0, [%m1, %q2, uxtw #1]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_shifted_offset_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vldrhq_gather_shifted_offset_z_s vldrhq_gather_shited_offset_z_u]
@@ -3545,7 +3747,8 @@ (define_insn "mve_vldrhq_gather_shifted_offset_z_<supf><mode>"
output_asm_insn ("vpst\n\tvldrht.<supf><V_sz_elem>\t%q0, [%m1, %q2, uxtw #1]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_shifted_offset_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vldrhq_s, vldrhq_u]
@@ -3567,7 +3770,8 @@ (define_insn "mve_vldrhq_<supf><mode>"
output_asm_insn ("vldrh.<supf><V_sz_elem>\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vldrhq_z_f]
@@ -3587,7 +3791,8 @@ (define_insn "mve_vldrhq_z_fv8hf"
output_asm_insn ("vpst\;vldrht.16\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_fv8hf"))
+ (set_attr "length" "8")])
;;
;; [vldrhq_z_s vldrhq_z_u]
@@ -3610,7 +3815,8 @@ (define_insn "mve_vldrhq_z_<supf><mode>"
output_asm_insn ("vpst\;vldrht.<supf><V_sz_elem>\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_f]
@@ -3629,7 +3835,8 @@ (define_insn "mve_vldrwq_fv4sf"
output_asm_insn ("vldrw.32\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_s vldrwq_u]
@@ -3648,7 +3855,8 @@ (define_insn "mve_vldrwq_<supf>v4si"
output_asm_insn ("vldrw.32\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_<supf>v4si"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_z_f]
@@ -3668,7 +3876,8 @@ (define_insn "mve_vldrwq_z_fv4sf"
output_asm_insn ("vpst\;vldrwt.32\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_z_s vldrwq_z_u]
@@ -3688,7 +3897,8 @@ (define_insn "mve_vldrwq_z_<supf>v4si"
output_asm_insn ("vpst\;vldrwt.32\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_<supf>v4si"))
+ (set_attr "length" "8")])
(define_expand "@mve_vld1q_f<mode>"
[(match_operand:MVE_0 0 "s_register_operand")
@@ -3728,7 +3938,8 @@ (define_insn "mve_vldrdq_gather_base_<supf>v2di"
output_asm_insn ("vldrd.64\t%q0, [%q1, %2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_<supf>v2di"))
+ (set_attr "length" "4")])
;;
;; [vldrdq_gather_base_z_s vldrdq_gather_base_z_u]
@@ -3749,7 +3960,8 @@ (define_insn "mve_vldrdq_gather_base_z_<supf>v2di"
output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%q1, %2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_<supf>v2di"))
+ (set_attr "length" "8")])
;;
;; [vldrdq_gather_offset_s vldrdq_gather_offset_u]
@@ -3769,7 +3981,8 @@ (define_insn "mve_vldrdq_gather_offset_<supf>v2di"
output_asm_insn ("vldrd.u64\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_offset_<supf>v2di"))
+ (set_attr "length" "4")])
;;
;; [vldrdq_gather_offset_z_s vldrdq_gather_offset_z_u]
@@ -3790,7 +4003,8 @@ (define_insn "mve_vldrdq_gather_offset_z_<supf>v2di"
output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_offset_<supf>v2di"))
+ (set_attr "length" "8")])
;;
;; [vldrdq_gather_shifted_offset_s vldrdq_gather_shifted_offset_u]
@@ -3810,7 +4024,8 @@ (define_insn "mve_vldrdq_gather_shifted_offset_<supf>v2di"
output_asm_insn ("vldrd.u64\t%q0, [%m1, %q2, uxtw #3]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_shifted_offset_<supf>v2di"))
+ (set_attr "length" "4")])
;;
;; [vldrdq_gather_shifted_offset_z_s vldrdq_gather_shifted_offset_z_u]
@@ -3831,7 +4046,8 @@ (define_insn "mve_vldrdq_gather_shifted_offset_z_<supf>v2di"
output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%m1, %q2, uxtw #3]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_shifted_offset_<supf>v2di"))
+ (set_attr "length" "8")])
;;
;; [vldrhq_gather_offset_f]
@@ -3851,7 +4067,8 @@ (define_insn "mve_vldrhq_gather_offset_fv8hf"
output_asm_insn ("vldrh.f16\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_fv8hf"))
+ (set_attr "length" "4")])
;;
;; [vldrhq_gather_offset_z_f]
@@ -3873,7 +4090,8 @@ (define_insn "mve_vldrhq_gather_offset_z_fv8hf"
output_asm_insn ("vpst\n\tvldrht.f16\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_fv8hf"))
+ (set_attr "length" "8")])
;;
;; [vldrhq_gather_shifted_offset_f]
@@ -3893,7 +4111,8 @@ (define_insn "mve_vldrhq_gather_shifted_offset_fv8hf"
output_asm_insn ("vldrh.f16\t%q0, [%m1, %q2, uxtw #1]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_shifted_offset_fv8hf"))
+ (set_attr "length" "4")])
;;
;; [vldrhq_gather_shifted_offset_z_f]
@@ -3915,7 +4134,8 @@ (define_insn "mve_vldrhq_gather_shifted_offset_z_fv8hf"
output_asm_insn ("vpst\n\tvldrht.f16\t%q0, [%m1, %q2, uxtw #1]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_shifted_offset_fv8hf"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_gather_base_f]
@@ -3935,7 +4155,8 @@ (define_insn "mve_vldrwq_gather_base_fv4sf"
output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_gather_base_z_f]
@@ -3956,7 +4177,8 @@ (define_insn "mve_vldrwq_gather_base_z_fv4sf"
output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%q1, %2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_gather_offset_f]
@@ -3976,7 +4198,8 @@ (define_insn "mve_vldrwq_gather_offset_fv4sf"
output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_gather_offset_s vldrwq_gather_offset_u]
@@ -3996,7 +4219,8 @@ (define_insn "mve_vldrwq_gather_offset_<supf>v4si"
output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_<supf>v4si"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_gather_offset_z_f]
@@ -4018,7 +4242,8 @@ (define_insn "mve_vldrwq_gather_offset_z_fv4sf"
output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_gather_offset_z_s vldrwq_gather_offset_z_u]
@@ -4040,7 +4265,8 @@ (define_insn "mve_vldrwq_gather_offset_z_<supf>v4si"
output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_<supf>v4si"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_gather_shifted_offset_f]
@@ -4060,7 +4286,8 @@ (define_insn "mve_vldrwq_gather_shifted_offset_fv4sf"
output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2, uxtw #2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_gather_shifted_offset_s vldrwq_gather_shifted_offset_u]
@@ -4080,7 +4307,8 @@ (define_insn "mve_vldrwq_gather_shifted_offset_<supf>v4si"
output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2, uxtw #2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_<supf>v4si"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_gather_shifted_offset_z_f]
@@ -4102,7 +4330,8 @@ (define_insn "mve_vldrwq_gather_shifted_offset_z_fv4sf"
output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2, uxtw #2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_gather_shifted_offset_z_s vldrwq_gather_shifted_offset_z_u]
@@ -4124,7 +4353,8 @@ (define_insn "mve_vldrwq_gather_shifted_offset_z_<supf>v4si"
output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2, uxtw #2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_<supf>v4si"))
+ (set_attr "length" "8")])
;;
;; [vstrhq_f]
@@ -4143,7 +4373,8 @@ (define_insn "mve_vstrhq_fv8hf"
output_asm_insn ("vstrh.16\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_fv8hf"))
+ (set_attr "length" "4")])
;;
;; [vstrhq_p_f]
@@ -4164,7 +4395,8 @@ (define_insn "mve_vstrhq_p_fv8hf"
output_asm_insn ("vpst\;vstrht.16\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_fv8hf"))
+ (set_attr "length" "8")])
;;
;; [vstrhq_p_s vstrhq_p_u]
@@ -4186,7 +4418,8 @@ (define_insn "mve_vstrhq_p_<supf><mode>"
output_asm_insn ("vpst\;vstrht.<V_sz_elem>\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vstrhq_scatter_offset_p_s vstrhq_scatter_offset_p_u]
@@ -4218,7 +4451,8 @@ (define_insn "mve_vstrhq_scatter_offset_p_<supf><mode>_insn"
VSTRHSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_offset_<supf><mode>_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrhq_scatter_offset_s vstrhq_scatter_offset_u]
@@ -4246,7 +4480,8 @@ (define_insn "mve_vstrhq_scatter_offset_<supf><mode>_insn"
VSTRHSOQ))]
"TARGET_HAVE_MVE"
"vstrh.<V_sz_elem>\t%q2, [%0, %q1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_offset_<supf><mode>_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrhq_scatter_shifted_offset_p_s vstrhq_scatter_shifted_offset_p_u]
@@ -4278,7 +4513,8 @@ (define_insn "mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn"
VSTRHSSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrhq_scatter_shifted_offset_s vstrhq_scatter_shifted_offset_u]
@@ -4307,7 +4543,8 @@ (define_insn "mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn"
VSTRHSSOQ))]
"TARGET_HAVE_MVE"
"vstrh.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrhq_s, vstrhq_u]
@@ -4326,7 +4563,8 @@ (define_insn "mve_vstrhq_<supf><mode>"
output_asm_insn ("vstrh.<V_sz_elem>\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_f]
@@ -4345,7 +4583,8 @@ (define_insn "mve_vstrwq_fv4sf"
output_asm_insn ("vstrw.32\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_p_f]
@@ -4366,7 +4605,8 @@ (define_insn "mve_vstrwq_p_fv4sf"
output_asm_insn ("vpst\;vstrwt.32\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_p_s vstrwq_p_u]
@@ -4387,7 +4627,8 @@ (define_insn "mve_vstrwq_p_<supf>v4si"
output_asm_insn ("vpst\;vstrwt.32\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_<supf>v4si"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_s vstrwq_u]
@@ -4406,7 +4647,8 @@ (define_insn "mve_vstrwq_<supf>v4si"
output_asm_insn ("vstrw.32\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_<supf>v4si"))
+ (set_attr "length" "4")])
(define_expand "@mve_vst1q_f<mode>"
[(match_operand:<MVE_CNVT> 0 "mve_memory_operand")
@@ -4449,7 +4691,8 @@ (define_insn "mve_vstrdq_scatter_base_p_<supf>v2di"
output_asm_insn ("vpst\;\tvstrdt.u64\t%q2, [%q0, %1]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_<supf>v2di"))
+ (set_attr "length" "8")])
;;
;; [vstrdq_scatter_base_s vstrdq_scatter_base_u]
@@ -4471,7 +4714,8 @@ (define_insn "mve_vstrdq_scatter_base_<supf>v2di"
output_asm_insn ("vstrd.u64\t%q2, [%q0, %1]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_<supf>v2di"))
+ (set_attr "length" "4")])
;;
;; [vstrdq_scatter_offset_p_s vstrdq_scatter_offset_p_u]
@@ -4502,7 +4746,8 @@ (define_insn "mve_vstrdq_scatter_offset_p_<supf>v2di_insn"
VSTRDSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrdt.64\t%q2, [%0, %q1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_offset_<supf>v2di_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrdq_scatter_offset_s vstrdq_scatter_offset_u]
@@ -4530,7 +4775,8 @@ (define_insn "mve_vstrdq_scatter_offset_<supf>v2di_insn"
VSTRDSOQ))]
"TARGET_HAVE_MVE"
"vstrd.64\t%q2, [%0, %q1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_offset_<supf>v2di_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrdq_scatter_shifted_offset_p_s vstrdq_scatter_shifted_offset_p_u]
@@ -4562,7 +4808,8 @@ (define_insn "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn"
VSTRDSSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrdt.64\t%q2, [%0, %q1, uxtw #3]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrdq_scatter_shifted_offset_s vstrdq_scatter_shifted_offset_u]
@@ -4591,7 +4838,8 @@ (define_insn "mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn"
VSTRDSSOQ))]
"TARGET_HAVE_MVE"
"vstrd.64\t%q2, [%0, %q1, uxtw #3]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrhq_scatter_offset_f]
@@ -4619,7 +4867,8 @@ (define_insn "mve_vstrhq_scatter_offset_fv8hf_insn"
VSTRHQSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vstrh.16\t%q2, [%0, %q1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_offset_fv8hf_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrhq_scatter_offset_p_f]
@@ -4650,7 +4899,8 @@ (define_insn "mve_vstrhq_scatter_offset_p_fv8hf_insn"
VSTRHQSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vstrht.16\t%q2, [%0, %q1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_offset_fv8hf_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrhq_scatter_shifted_offset_f]
@@ -4678,7 +4928,8 @@ (define_insn "mve_vstrhq_scatter_shifted_offset_fv8hf_insn"
VSTRHQSSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vstrh.16\t%q2, [%0, %q1, uxtw #1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_fv8hf_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrhq_scatter_shifted_offset_p_f]
@@ -4710,7 +4961,8 @@ (define_insn "mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn"
VSTRHQSSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vstrht.16\t%q2, [%0, %q1, uxtw #1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_fv8hf_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_base_f]
@@ -4732,7 +4984,8 @@ (define_insn "mve_vstrwq_scatter_base_fv4sf"
output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_scatter_base_p_f]
@@ -4755,7 +5008,8 @@ (define_insn "mve_vstrwq_scatter_base_p_fv4sf"
output_asm_insn ("vpst\n\tvstrwt.u32\t%q2, [%q0, %1]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_offset_f]
@@ -4783,7 +5037,8 @@ (define_insn "mve_vstrwq_scatter_offset_fv4sf_insn"
VSTRWQSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vstrw.32\t%q2, [%0, %q1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_offset_fv4sf_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_scatter_offset_p_f]
@@ -4814,7 +5069,8 @@ (define_insn "mve_vstrwq_scatter_offset_p_fv4sf_insn"
VSTRWQSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vstrwt.32\t%q2, [%0, %q1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_offset_fv4sf_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_offset_s vstrwq_scatter_offset_u]
@@ -4845,7 +5101,8 @@ (define_insn "mve_vstrwq_scatter_offset_p_<supf>v4si_insn"
VSTRWSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrwt.32\t%q2, [%0, %q1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_offset_<supf>v4si_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_offset_s vstrwq_scatter_offset_u]
@@ -4873,7 +5130,8 @@ (define_insn "mve_vstrwq_scatter_offset_<supf>v4si_insn"
VSTRWSOQ))]
"TARGET_HAVE_MVE"
"vstrw.32\t%q2, [%0, %q1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_offset_<supf>v4si_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_scatter_shifted_offset_f]
@@ -4901,7 +5159,8 @@ (define_insn "mve_vstrwq_scatter_shifted_offset_fv4sf_insn"
VSTRWQSSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vstrw.32\t%q2, [%0, %q1, uxtw #2]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_fv4sf_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_shifted_offset_p_f]
@@ -4933,7 +5192,8 @@ (define_insn "mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn"
VSTRWQSSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_fv4sf_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_shifted_offset_p_s vstrwq_scatter_shifted_offset_p_u]
@@ -4965,7 +5225,8 @@ (define_insn "mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn"
VSTRWSSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_shifted_offset_s vstrwq_scatter_shifted_offset_u]
@@ -4994,7 +5255,8 @@ (define_insn "mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn"
VSTRWSSOQ))]
"TARGET_HAVE_MVE"
"vstrw.32\t%q2, [%0, %q1, uxtw #2]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn"))
+ (set_attr "length" "4")])
;;
;; [vidupq_n_u])
@@ -5062,7 +5324,8 @@ (define_insn "mve_vidupq_m_wb_u<mode>_insn"
(match_operand:SI 6 "immediate_operand" "i")))]
"TARGET_HAVE_MVE"
"vpst\;\tvidupt.u%#<V_sz_elem>\t%q0, %2, %4"
- [(set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vidupq_u<mode>_insn"))
+ (set_attr "length""8")])
;;
;; [vddupq_n_u])
@@ -5130,7 +5393,8 @@ (define_insn "mve_vddupq_m_wb_u<mode>_insn"
(match_operand:SI 6 "immediate_operand" "i")))]
"TARGET_HAVE_MVE"
"vpst\;vddupt.u%#<V_sz_elem>\t%q0, %2, %4"
- [(set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vddupq_u<mode>_insn"))
+ (set_attr "length""8")])
;;
;; [vdwdupq_n_u])
@@ -5246,8 +5510,9 @@ (define_insn "mve_vdwdupq_m_wb_u<mode>_insn"
]
"TARGET_HAVE_MVE"
"vpst\;vdwdupt.u%#<V_sz_elem>\t%q2, %3, %R4, %5"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vdwdupq_wb_u<mode>_insn"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [viwdupq_n_u])
@@ -5363,7 +5628,8 @@ (define_insn "mve_viwdupq_m_wb_u<mode>_insn"
]
"TARGET_HAVE_MVE"
"vpst\;\tviwdupt.u%#<V_sz_elem>\t%q2, %3, %R4, %5"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_viwdupq_wb_u<mode>_insn"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -5389,7 +5655,8 @@ (define_insn "mve_vstrwq_scatter_base_wb_<supf>v4si"
output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_<supf>v4si"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_scatter_base_wb_p_s vstrwq_scatter_base_wb_p_u]
@@ -5415,7 +5682,8 @@ (define_insn "mve_vstrwq_scatter_base_wb_p_<supf>v4si"
output_asm_insn ("vpst\;\tvstrwt.u32\t%q2, [%q0, %1]!",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_<supf>v4si"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_base_wb_f]
@@ -5440,7 +5708,8 @@ (define_insn "mve_vstrwq_scatter_base_wb_fv4sf"
output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_scatter_base_wb_p_f]
@@ -5466,7 +5735,8 @@ (define_insn "mve_vstrwq_scatter_base_wb_p_fv4sf"
output_asm_insn ("vpst\;vstrwt.u32\t%q2, [%q0, %1]!",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u]
@@ -5491,7 +5761,8 @@ (define_insn "mve_vstrdq_scatter_base_wb_<supf>v2di"
output_asm_insn ("vstrd.u64\t%q2, [%q0, %1]!",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_wb_<supf>v2di"))
+ (set_attr "length" "4")])
;;
;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u]
@@ -5517,7 +5788,8 @@ (define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di"
output_asm_insn ("vpst\;vstrdt.u64\t%q2, [%q0, %1]!",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_wb_<supf>v2di"))
+ (set_attr "length" "8")])
(define_expand "mve_vldrwq_gather_base_wb_<supf>v4si"
[(match_operand:V4SI 0 "s_register_operand")
@@ -5569,7 +5841,8 @@ (define_insn "mve_vldrwq_gather_base_wb_<supf>v4si_insn"
output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_<supf>v4si_insn"))
+ (set_attr "length" "4")])
(define_expand "mve_vldrwq_gather_base_wb_z_<supf>v4si"
[(match_operand:V4SI 0 "s_register_operand")
@@ -5625,7 +5898,8 @@ (define_insn "mve_vldrwq_gather_base_wb_z_<supf>v4si_insn"
output_asm_insn ("vpst\;vldrwt.u32\t%q0, [%q1, %2]!",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_<supf>v4si_insn"))
+ (set_attr "length" "8")])
(define_expand "mve_vldrwq_gather_base_wb_fv4sf"
[(match_operand:V4SI 0 "s_register_operand")
@@ -5677,7 +5951,8 @@ (define_insn "mve_vldrwq_gather_base_wb_fv4sf_insn"
output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_fv4sf_insn"))
+ (set_attr "length" "4")])
(define_expand "mve_vldrwq_gather_base_wb_z_fv4sf"
[(match_operand:V4SI 0 "s_register_operand")
@@ -5734,7 +6009,8 @@ (define_insn "mve_vldrwq_gather_base_wb_z_fv4sf_insn"
output_asm_insn ("vpst\;vldrwt.u32\t%q0, [%q1, %2]!",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_fv4sf_insn"))
+ (set_attr "length" "8")])
(define_expand "mve_vldrdq_gather_base_wb_<supf>v2di"
[(match_operand:V2DI 0 "s_register_operand")
@@ -5787,7 +6063,8 @@ (define_insn "mve_vldrdq_gather_base_wb_<supf>v2di_insn"
output_asm_insn ("vldrd.64\t%q0, [%q1, %2]!",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_wb_<supf>v2di_insn"))
+ (set_attr "length" "4")])
(define_expand "mve_vldrdq_gather_base_wb_z_<supf>v2di"
[(match_operand:V2DI 0 "s_register_operand")
@@ -5826,7 +6103,7 @@ (define_insn "get_fpscr_nzcvqc"
(unspec_volatile:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))]
"TARGET_HAVE_MVE"
"vmrs\\t%0, FPSCR_nzcvqc"
- [(set_attr "type" "mve_move")])
+ [(set_attr "type" "mve_move")])
(define_insn "set_fpscr_nzcvqc"
[(set (reg:SI VFPCC_REGNUM)
@@ -5834,7 +6111,7 @@ (define_insn "set_fpscr_nzcvqc"
VUNSPEC_SET_FPSCR_NZCVQC))]
"TARGET_HAVE_MVE"
"vmsr\\tFPSCR_nzcvqc, %0"
- [(set_attr "type" "mve_move")])
+ [(set_attr "type" "mve_move")])
;;
;; [vldrdq_gather_base_wb_z_s vldrdq_gather_base_wb_z_u]
@@ -5859,7 +6136,8 @@ (define_insn "mve_vldrdq_gather_base_wb_z_<supf>v2di_insn"
output_asm_insn ("vpst\;vldrdt.u64\t%q0, [%q1, %2]!",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_wb_<supf>v2di_insn"))
+ (set_attr "length" "8")])
;;
;; [vadciq_m_s, vadciq_m_u])
;;
@@ -5876,7 +6154,8 @@ (define_insn "mve_vadciq_m_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;vadcit.i32\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vadciq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "8")])
;;
@@ -5893,7 +6172,8 @@ (define_insn "mve_vadciq_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vadci.i32\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vadciq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "4")])
;;
@@ -5912,7 +6192,8 @@ (define_insn "mve_vadcq_m_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;vadct.i32\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vadcq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "8")])
;;
@@ -5929,7 +6210,8 @@ (define_insn "mve_vadcq_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vadc.i32\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vadcq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "4")
(set_attr "conds" "set")])
@@ -5949,7 +6231,8 @@ (define_insn "mve_vsbciq_m_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;vsbcit.i32\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vsbciq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "8")])
;;
@@ -5966,7 +6249,8 @@ (define_insn "mve_vsbciq_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vsbci.i32\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vsbciq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "4")])
;;
@@ -5985,7 +6269,8 @@ (define_insn "mve_vsbcq_m_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;vsbct.i32\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vsbcq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "8")])
;;
@@ -6002,7 +6287,8 @@ (define_insn "mve_vsbcq_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vsbc.i32\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vsbcq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "4")])
;;
@@ -6031,7 +6317,7 @@ (define_insn "mve_vst2q<mode>"
"vst21.<V_sz_elem>\t{%q0, %q1}, %3", ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set_attr "length" "8")])
;;
;; [vld2q])
@@ -6059,7 +6345,7 @@ (define_insn "mve_vld2q<mode>"
"vld21.<V_sz_elem>\t{%q0, %q1}, %3", ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set_attr "length" "8")])
;;
;; [vld4q])
@@ -6402,7 +6688,8 @@ (define_insn "mve_vshlcq_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;vshlct\t%q0, %1, %4"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vshlcq_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length" "8")])
;; CDE instructions on MVE registers.
@@ -6414,7 +6701,8 @@ (define_insn "arm_vcx1qv16qi"
UNSPEC_VCDE))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx1\\tp%c1, %q0, #%c2"
- [(set_attr "type" "coproc")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx1qv16qi"))
+ (set_attr "type" "coproc")]
)
(define_insn "arm_vcx1qav16qi"
@@ -6425,7 +6713,8 @@ (define_insn "arm_vcx1qav16qi"
UNSPEC_VCDEA))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx1a\\tp%c1, %q0, #%c3"
- [(set_attr "type" "coproc")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx1qav16qi"))
+ (set_attr "type" "coproc")]
)
(define_insn "arm_vcx2qv16qi"
@@ -6436,7 +6725,8 @@ (define_insn "arm_vcx2qv16qi"
UNSPEC_VCDE))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx2\\tp%c1, %q0, %q2, #%c3"
- [(set_attr "type" "coproc")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx2qv16qi"))
+ (set_attr "type" "coproc")]
)
(define_insn "arm_vcx2qav16qi"
@@ -6448,7 +6738,8 @@ (define_insn "arm_vcx2qav16qi"
UNSPEC_VCDEA))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx2a\\tp%c1, %q0, %q3, #%c4"
- [(set_attr "type" "coproc")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx2qav16qi"))
+ (set_attr "type" "coproc")]
)
(define_insn "arm_vcx3qv16qi"
@@ -6460,7 +6751,8 @@ (define_insn "arm_vcx3qv16qi"
UNSPEC_VCDE))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx3\\tp%c1, %q0, %q2, %q3, #%c4"
- [(set_attr "type" "coproc")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx3qv16qi"))
+ (set_attr "type" "coproc")]
)
(define_insn "arm_vcx3qav16qi"
@@ -6473,7 +6765,8 @@ (define_insn "arm_vcx3qav16qi"
UNSPEC_VCDEA))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx3a\\tp%c1, %q0, %q3, %q4, #%c5"
- [(set_attr "type" "coproc")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx3qav16qi"))
+ (set_attr "type" "coproc")]
)
(define_insn "arm_vcx1q<a>_p_v16qi"
@@ -6485,7 +6778,8 @@ (define_insn "arm_vcx1q<a>_p_v16qi"
CDE_VCX))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vpst\;vcx1<a>t\\tp%c1, %q0, #%c3"
- [(set_attr "type" "coproc")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx1q<a>v16qi"))
+ (set_attr "type" "coproc")
(set_attr "length" "8")]
)
@@ -6499,7 +6793,8 @@ (define_insn "arm_vcx2q<a>_p_v16qi"
CDE_VCX))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vpst\;vcx2<a>t\\tp%c1, %q0, %q3, #%c4"
- [(set_attr "type" "coproc")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx2q<a>v16qi"))
+ (set_attr "type" "coproc")
(set_attr "length" "8")]
)
@@ -6514,11 +6809,12 @@ (define_insn "arm_vcx3q<a>_p_v16qi"
CDE_VCX))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vpst\;vcx3<a>t\\tp%c1, %q0, %q3, %q4, #%c5"
- [(set_attr "type" "coproc")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx3q<a>v16qi"))
+ (set_attr "type" "coproc")
(set_attr "length" "8")]
)
-(define_insn "*movmisalign<mode>_mve_store"
+(define_insn "movmisalign<mode>_mve_store"
[(set (match_operand:MVE_VLD_ST 0 "mve_memory_operand" "=Ux")
(unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 "s_register_operand" " w")]
UNSPEC_MISALIGNED_ACCESS))]
@@ -6526,11 +6822,12 @@ (define_insn "*movmisalign<mode>_mve_store"
|| (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode)))
&& !BYTES_BIG_ENDIAN && unaligned_access"
"vstr<V_sz_elem1>.<V_sz_elem>\t%q1, %E0"
- [(set_attr "type" "mve_store")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_movmisalign<mode>_mve_store"))
+ (set_attr "type" "mve_store")]
)
-(define_insn "*movmisalign<mode>_mve_load"
+(define_insn "movmisalign<mode>_mve_load"
[(set (match_operand:MVE_VLD_ST 0 "s_register_operand" "=w")
(unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 "mve_memory_operand" " Ux")]
UNSPEC_MISALIGNED_ACCESS))]
@@ -6538,7 +6835,8 @@ (define_insn "*movmisalign<mode>_mve_load"
|| (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode)))
&& !BYTES_BIG_ENDIAN && unaligned_access"
"vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1"
- [(set_attr "type" "mve_load")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_movmisalign<mode>_mve_load"))
+ (set_attr "type" "mve_load")]
)
;; Expander for VxBI moves
@@ -6620,3 +6918,40 @@ (define_expand "@arm_mve_reinterpret<mode>"
}
}
)
+
+;; Originally expanded by 'predicated_doloop_end'.
+;; In the rare situation where the branch is too far, we do also need to
+;; revert FPSCR.LTPSIZE back to 0x100 after the last iteration.
+(define_insn "*predicated_doloop_end_internal"
+ [(set (pc)
+ (if_then_else
+ (ge (plus:SI (reg:SI LR_REGNUM)
+ (match_operand:SI 0 "const_int_operand" ""))
+ (const_int 0))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))
+ (set (reg:SI LR_REGNUM)
+ (plus:SI (reg:SI LR_REGNUM) (match_dup 0)))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_32BIT && TARGET_HAVE_LOB && TARGET_HAVE_MVE && TARGET_THUMB2"
+ {
+ if (get_attr_length (insn) == 4)
+ return "letp\t%|lr, %l1";
+ else
+ return "subs\t%|lr, #%n0\n\tbgt\t%l1\n\tlctp";
+ }
+ [(set (attr "length")
+ (if_then_else
+ (ltu (minus (pc) (match_dup 1)) (const_int 1024))
+ (const_int 4)
+ (const_int 6)))
+ (set_attr "type" "branch")])
+
+(define_insn "dlstp<mode1>_insn"
+ [
+ (set (reg:SI LR_REGNUM)
+ (unspec:SI [(match_operand:SI 0 "s_register_operand" "r")]
+ DLSTP))
+ ]
+ "TARGET_32BIT && TARGET_HAVE_LOB && TARGET_HAVE_MVE && TARGET_THUMB2"
+ "dlstp.<mode1>\t%|lr, %0")
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 9af8429968d..74871cb984b 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -366,7 +366,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
"@
<mve_insn>.<supf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
* return neon_output_shift_immediate (\"vshl\", 'i', &operands[2], <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), true);"
- [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
)
(define_expand "vashl<mode>3"
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops
2023-12-18 11:53 [PATCH 0/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops Andre Vieira
2023-12-18 11:53 ` [PATCH 1/2] arm: Add define_attr to to create a mapping between MVE predicated and unpredicated insns Andre Vieira
@ 2023-12-18 11:53 ` Andre Vieira
2023-12-20 16:54 ` Andre Vieira (lists)
1 sibling, 1 reply; 5+ messages in thread
From: Andre Vieira @ 2023-12-18 11:53 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard.Earnshaw, Stam Markianos-Wright
[-- Attachment #1: Type: text/plain, Size: 44 bytes --]
This is a multi-part message in MIME format.
[-- Attachment #2: Type: text/plain, Size: 881 bytes --]
Reworked Stam's patch after comments in:
https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640362.html
The original gcc ChangeLog remains unchanged, but I did split up some tests so
here is the testsuite ChangeLog.
gcc/testsuite/ChangeLog:
* gcc.target/arm/lob.h: Update framework.
* gcc.target/arm/lob1.c: Likewise.
* gcc.target/arm/lob6.c: Likewise.
* gcc.target/arm/mve/dlstp-compile-asm.c: New test.
* gcc.target/arm/mve/dlstp-int16x8.c: New test.
* gcc.target/arm/mve/dlstp-int16x8-run.c: New test.
* gcc.target/arm/mve/dlstp-int32x4.c: New test.
* gcc.target/arm/mve/dlstp-int32x4-run.c: New test.
* gcc.target/arm/mve/dlstp-int64x2.c: New test.
* gcc.target/arm/mve/dlstp-int64x2-run.c: New test.
* gcc.target/arm/mve/dlstp-int8x16.c: New test.
* gcc.target/arm/mve/dlstp-int8x16-run.c: New test.
* gcc.target/arm/mve/dlstp-invalid-asm.c: New test.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: 0002-arm-Add-support-for-MVE-Tail-Predicated-Low-Overhead.patch --]
[-- Type: text/x-patch; name="0002-arm-Add-support-for-MVE-Tail-Predicated-Low-Overhead.patch", Size: 107172 bytes --]
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 2f5ca79ed8d..4f164c54740 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -65,8 +65,8 @@ extern void arm_emit_speculation_barrier_function (void);
extern void arm_decompose_di_binop (rtx, rtx, rtx *, rtx *, rtx *, rtx *);
extern bool arm_q_bit_access (void);
extern bool arm_ge_bits_access (void);
-extern bool arm_target_insn_ok_for_lob (rtx);
-
+extern bool arm_target_bb_ok_for_lob (basic_block);
+extern rtx arm_attempt_dlstp_transform (rtx);
#ifdef RTX_CODE
enum reg_class
arm_mode_base_reg_class (machine_mode);
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 0c0cb14a8a4..1ee72bcb7ec 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -668,6 +668,12 @@ static const scoped_attribute_specs *const arm_attribute_table[] =
#undef TARGET_HAVE_CONDITIONAL_EXECUTION
#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST arm_loop_unroll_adjust
+
+#undef TARGET_PREDICT_DOLOOP_P
+#define TARGET_PREDICT_DOLOOP_P arm_predict_doloop_p
+
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
@@ -34483,19 +34489,1147 @@ arm_invalid_within_doloop (const rtx_insn *insn)
}
bool
-arm_target_insn_ok_for_lob (rtx insn)
+arm_target_bb_ok_for_lob (basic_block bb)
{
- basic_block bb = BLOCK_FOR_INSN (insn);
/* Make sure the basic block of the target insn is a simple latch
having as single predecessor and successor the body of the loop
itself. Only simple loops with a single basic block as body are
supported for 'low over head loop' making sure that LE target is
above LE itself in the generated code. */
-
return single_succ_p (bb)
- && single_pred_p (bb)
- && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
- && contains_no_active_insn_p (bb);
+ && single_pred_p (bb)
+ && single_succ_edge (bb)->dest == single_pred_edge (bb)->src;
+}
+
+/* Utility fuction: Given a VCTP or a VCTP_M insn, return the number of MVE
+ lanes based on the machine mode being used. */
+
+static int
+arm_mve_get_vctp_lanes (rtx_insn *insn)
+{
+ rtx insn_set = single_set (insn);
+ if (insn_set
+ && GET_CODE (SET_SRC (insn_set)) == UNSPEC
+ && (XINT (SET_SRC (insn_set), 1) == VCTP
+ || XINT (SET_SRC (insn_set), 1) == VCTP_M))
+ {
+ machine_mode mode = GET_MODE (SET_SRC (insn_set));
+ return (VECTOR_MODE_P (mode) && VALID_MVE_PRED_MODE (mode))
+ ? GET_MODE_NUNITS (mode) : 0;
+ }
+ return 0;
+}
+
+/* Check if INSN requires the use of the VPR reg, if it does, return the
+ sub-rtx of the VPR reg. The TYPE argument controls whether
+ this function should:
+ * For TYPE == 0, check all operands, including the OUT operands,
+ and return the first occurrence of the VPR reg.
+ * For TYPE == 1, only check the input operands.
+ * For TYPE == 2, only check the output operands.
+ (INOUT operands are considered both as input and output operands)
+*/
+static rtx
+arm_get_required_vpr_reg (rtx_insn *insn, unsigned int type = 0)
+{
+ gcc_assert (type < 3);
+ if (!NONJUMP_INSN_P (insn))
+ return NULL_RTX;
+
+ bool requires_vpr;
+ extract_constrain_insn (insn);
+ int n_operands = recog_data.n_operands;
+ if (recog_data.n_alternatives == 0)
+ return NULL_RTX;
+
+ /* Fill in recog_op_alt with information about the constraints of
+ this insn. */
+ preprocess_constraints (insn);
+
+ for (int op = 0; op < n_operands; op++)
+ {
+ requires_vpr = true;
+ if (type == 1 && recog_data.operand_type[op] == OP_OUT)
+ continue;
+ else if (type == 2 && recog_data.operand_type[op] == OP_IN)
+ continue;
+
+ /* Iterate through alternatives of operand "op" in recog_op_alt and
+ identify if the operand is required to be the VPR. */
+ for (int alt = 0; alt < recog_data.n_alternatives; alt++)
+ {
+ const operand_alternative *op_alt
+ = &recog_op_alt[alt * n_operands];
+ /* Fetch the reg_class for each entry and check it against the
+ VPR_REG reg_class. */
+ if (alternative_class (op_alt, op) != VPR_REG)
+ requires_vpr = false;
+ }
+ /* If all alternatives of the insn require the VPR reg for this operand,
+ it means that either this is VPR-generating instruction, like a vctp,
+ vcmp, etc., or it is a VPT-predicated insruction. Return the subrtx
+ of the VPR reg operand. */
+ if (requires_vpr)
+ return recog_data.operand[op];
+ }
+ return NULL_RTX;
+}
+
+/* Wrapper function of arm_get_required_vpr_reg with TYPE == 1, so return
+ something only if the VPR reg is an input operand to the insn. */
+
+static rtx
+arm_get_required_vpr_reg_param (rtx_insn *insn)
+{
+ return arm_get_required_vpr_reg (insn, 1);
+}
+
+/* Wrapper function of arm_get_required_vpr_reg with TYPE == 2, so return
+ something only if the VPR reg is the return value, an output of, or is
+ clobbered by the insn. */
+
+static rtx
+arm_get_required_vpr_reg_ret_val (rtx_insn *insn)
+{
+ return arm_get_required_vpr_reg (insn, 2);
+}
+
+/* Scan the basic block of a loop body for a vctp instruction. If there is
+ at least vctp instruction, return the first rtx_insn *. */
+
+static rtx_insn *
+arm_mve_get_loop_vctp (basic_block bb)
+{
+ rtx_insn *insn = BB_HEAD (bb);
+
+ /* Now scan through all the instruction patterns and pick out the VCTP
+ instruction. We require arm_get_required_vpr_reg_param to be false
+ to make sure we pick up a VCTP, rather than a VCTP_M. */
+ FOR_BB_INSNS (bb, insn)
+ if (NONDEBUG_INSN_P (insn))
+ if (arm_get_required_vpr_reg_ret_val (insn)
+ && (arm_mve_get_vctp_lanes (insn) != 0)
+ && !arm_get_required_vpr_reg_param (insn))
+ return insn;
+ return NULL;
+}
+
+/* Return true if INSN is a MVE instruction that is VPT-predicable, but in
+ its unpredicated form, or if it is predicated, but on a predicate other
+ than VPR_REG. */
+
+static bool
+arm_mve_vec_insn_is_unpredicated_or_uses_other_predicate (rtx_insn *insn,
+ rtx vpr_reg)
+{
+ rtx insn_vpr_reg_operand;
+ if (MVE_VPT_UNPREDICATED_INSN_P (insn)
+ || (MVE_VPT_PREDICATED_INSN_P (insn)
+ && (insn_vpr_reg_operand = arm_get_required_vpr_reg_param (insn))
+ && !rtx_equal_p (vpr_reg, insn_vpr_reg_operand)))
+ return true;
+ else
+ return false;
+}
+
+/* Return true if INSN is a MVE instruction that is VPT-predicable and is
+ predicated on VPR_REG. */
+
+static bool
+arm_mve_vec_insn_is_predicated_with_this_predicate (rtx_insn *insn,
+ rtx vpr_reg)
+{
+ rtx insn_vpr_reg_operand;
+ if (MVE_VPT_PREDICATED_INSN_P (insn)
+ && (insn_vpr_reg_operand = arm_get_required_vpr_reg_param (insn))
+ && rtx_equal_p (vpr_reg, insn_vpr_reg_operand))
+ return true;
+ else
+ return false;
+}
+
+/* Utility function to identify if INSN is an MVE instruction that performs
+ some across-vector operation (and as a result does not align with normal
+ lane predication rules). All such instructions give one only scalar
+ output, except for vshlcq which gives a PARALLEL of a vector and a scalar
+ (one vector result and one carry output). */
+
+static bool
+arm_is_mve_across_vector_insn (rtx_insn* insn)
+{
+ df_ref insn_defs = NULL;
+ if (!MVE_VPT_PREDICABLE_INSN_P (insn))
+ return false;
+
+ bool is_across_vector = false;
+ FOR_EACH_INSN_DEF (insn_defs, insn)
+ if (!VALID_MVE_MODE (GET_MODE (DF_REF_REG (insn_defs)))
+ && !arm_get_required_vpr_reg_ret_val (insn))
+ is_across_vector = true;
+
+ return is_across_vector;
+}
+
+/* Utility function to identify if INSN is an MVE load or store instruction.
+ * For TYPE == 0, check all operands. If the function returns true,
+ INSN is a load or a store insn.
+ * For TYPE == 1, only check the input operands. If the function returns
+ true, INSN is a load insn.
+ * For TYPE == 2, only check the output operands. If the function returns
+ true, INSN is a store insn. */
+
+static bool
+arm_is_mve_load_store_insn (rtx_insn* insn, int type = 0)
+{
+ int n_operands = recog_data.n_operands;
+ extract_insn (insn);
+
+ for (int op = 0; op < n_operands; op++)
+ {
+ if (type == 1 && recog_data.operand_type[op] == OP_OUT)
+ continue;
+ else if (type == 2 && recog_data.operand_type[op] == OP_IN)
+ continue;
+ if (mve_memory_operand (recog_data.operand[op],
+ GET_MODE (recog_data.operand[op])))
+ return true;
+ }
+ return false;
+}
+
+/* When transforming an MVE intrinsic loop into an MVE Tail Predicated Low
+ Overhead Loop, there are a number of instructions that, if in their
+ unpredicated form, act across vector lanes, but are still safe to include
+ within the loop, despite the implicit predication added to the vector lanes.
+ This list has been compiled by carefully analyzing the instruction
+ pseudocode in the Arm-ARM.
+ All other across-vector instructions aren't allowed, because the addition
+ of implicit predication could influnce the result of the operation.
+ Any new across-vector instructions to the MVE ISA will have to assessed for
+ inclusion to this list. */
+
+static bool
+arm_mve_is_allowed_unpredic_across_vector_insn (rtx_insn* insn)
+{
+ gcc_assert (MVE_VPT_UNPREDICATED_INSN_P (insn)
+ && arm_is_mve_across_vector_insn (insn));
+ rtx insn_set = single_set (insn);
+ if (!insn_set)
+ return false;
+ rtx unspec = SET_SRC (insn_set);
+ if (GET_CODE (unspec) != UNSPEC)
+ return false;
+ switch (XINT (unspec, 1))
+ {
+ case VADDVQ_U:
+ case VADDVQ_S:
+ case VADDVAQ_U:
+ case VADDVAQ_S:
+ case VMLADAVQ_U:
+ case VMLADAVQ_S:
+ case VMLADAVXQ_S:
+ case VMLADAVAQ_U:
+ case VMLADAVAQ_S:
+ case VMLADAVAXQ_S:
+ case VABAVQ_S:
+ case VABAVQ_U:
+ case VADDLVQ_S:
+ case VADDLVQ_U:
+ case VADDLVAQ_S:
+ case VADDLVAQ_U:
+ case VMAXVQ_U:
+ case VMAXAVQ_S:
+ case VMLALDAVQ_U:
+ case VMLALDAVXQ_U:
+ case VMLALDAVXQ_S:
+ case VMLALDAVQ_S:
+ case VMLALDAVAQ_S:
+ case VMLALDAVAQ_U:
+ case VMLALDAVAXQ_S:
+ case VMLALDAVAXQ_U:
+ case VMLSDAVQ_S:
+ case VMLSDAVXQ_S:
+ case VMLSDAVAXQ_S:
+ case VMLSDAVAQ_S:
+ case VMLSLDAVQ_S:
+ case VMLSLDAVXQ_S:
+ case VMLSLDAVAQ_S:
+ case VMLSLDAVAXQ_S:
+ case VRMLALDAVHXQ_S:
+ case VRMLALDAVHQ_U:
+ case VRMLALDAVHQ_S:
+ case VRMLALDAVHAQ_S:
+ case VRMLALDAVHAQ_U:
+ case VRMLALDAVHAXQ_S:
+ case VRMLSLDAVHQ_S:
+ case VRMLSLDAVHXQ_S:
+ case VRMLSLDAVHAQ_S:
+ case VRMLSLDAVHAXQ_S:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+/* Scan through the DF chain backwards within the basic block and
+ determine if any of the USEs of the original insn (or the USEs of the insns
+ where thy were DEF-ed, etc.) were affected by implicit VPT
+ predication of an MVE_VPT_UNPREDICATED_INSN_P in a dlstp/letp loop.
+ This function returns true if the insn is affected implicit predication
+ and false otherwise.
+ Having such implicit predication on an unpredicated insn wouldn't in itself
+ block tail predication, because the output of that insn might then be used
+ in a correctly predicated store insn, where the disabled lanes will be
+ ignored. To verify this we later call:
+ `arm_mve_check_df_chain_fwd_for_implic_predic_impact`, which will check the
+ DF chains forward to see if any implicitly-predicated operand gets used in
+ an improper way. */
+
+static bool
+arm_mve_check_df_chain_back_for_implic_predic
+ (hash_map <rtx_insn *, bool> *safe_insn_map, rtx_insn *insn_in,
+ rtx vctp_vpr_generated)
+{
+
+ auto_vec<rtx_insn *> worklist;
+ worklist.safe_push (insn_in);
+
+ bool *temp = NULL;
+
+ while (worklist.length () > 0)
+ {
+ rtx_insn *insn = worklist.pop ();
+
+ if ((temp = safe_insn_map->get (insn)))
+ return *temp;
+
+ basic_block body = BLOCK_FOR_INSN (insn);
+
+ /* The circumstances under which an instruction is affected by "implicit
+ predication" are as follows:
+ * It is an UNPREDICATED_INSN_P:
+ * That loads/stores from/to memory.
+ * Where any one of its operands is an MVE vector from outside the
+ loop body bb.
+ Or:
+ * Any of it's operands were affected earlier in the insn chain. */
+ if (MVE_VPT_UNPREDICATED_INSN_P (insn)
+ && (arm_is_mve_load_store_insn (insn)
+ || (arm_is_mve_across_vector_insn (insn)
+ && !arm_mve_is_allowed_unpredic_across_vector_insn (insn))))
+ {
+ safe_insn_map->put (insn, true);
+ return true;
+ }
+
+ df_ref insn_uses = NULL;
+ FOR_EACH_INSN_USE (insn_uses, insn)
+ {
+ /* If the operand is in the input reg set to the the basic block,
+ (i.e. it has come from outside the loop!), consider it unsafe if:
+ * It's being used in an unpredicated insn.
+ * It is a predicable MVE vector. */
+ if (MVE_VPT_UNPREDICATED_INSN_P (insn)
+ && VALID_MVE_MODE (GET_MODE (DF_REF_REG (insn_uses)))
+ && REGNO_REG_SET_P (DF_LR_IN (body), DF_REF_REGNO (insn_uses)))
+ {
+ safe_insn_map->put (insn, true);
+ return true;
+ }
+
+ /* Scan backwards from the current INSN through the instruction chain
+ until the start of the basic block. */
+ for (rtx_insn *prev_insn = PREV_INSN (insn);
+ prev_insn && prev_insn != PREV_INSN (BB_HEAD (body));
+ prev_insn = PREV_INSN (prev_insn))
+ {
+ /* If a previous insn defines a register that INSN uses, then
+ add to the worklist to check that insn's USEs. If any of these
+ insns return true as MVE_VPT_UNPREDICATED_INSN_Ps, then the
+ whole chain is affected by the change in behaviour from being
+ placed in dlstp/letp loop. */
+ df_ref prev_insn_defs = NULL;
+ FOR_EACH_INSN_DEF (prev_insn_defs, prev_insn)
+ {
+ if (DF_REF_REGNO (insn_uses) == DF_REF_REGNO (prev_insn_defs)
+ && !arm_mve_vec_insn_is_predicated_with_this_predicate
+ (insn, vctp_vpr_generated))
+ worklist.safe_push (prev_insn);
+ }
+ }
+ }
+ }
+ safe_insn_map->put (insn_in, false);
+ return false;
+}
+
+/* If we have identified that the current DEF will be modified
+ by such implicit predication, scan through all the
+ insns that USE it and bail out if any one is outside the
+ current basic block (i.e. the reg is live after the loop)
+ or if any are store insns that are unpredicated or using a
+ predicate other than the loop VPR.
+ This function returns true if the insn is not suitable for
+ implicit predication and false otherwise.*/
+
+static bool
+arm_mve_check_df_chain_fwd_for_implic_predic_impact (rtx_insn *insn,
+ rtx vctp_vpr_generated)
+{
+
+ /* If this insn is indeed an unpredicated store to memory, bail out. */
+ if (arm_mve_vec_insn_is_unpredicated_or_uses_other_predicate
+ (insn, vctp_vpr_generated)
+ && (arm_is_mve_load_store_insn (insn, 2)
+ || arm_is_mve_across_vector_insn (insn)))
+ return true;
+
+ /* Next, scan forward to the various USEs of the DEFs in this insn. */
+ df_ref insn_def = NULL;
+ FOR_EACH_INSN_DEF (insn_def, insn)
+ {
+ for (df_ref use = DF_REG_USE_CHAIN (DF_REF_REGNO (insn_def)); use;
+ use = DF_REF_NEXT_REG (use))
+ {
+ rtx_insn *next_use_insn = DF_REF_INSN (use);
+ if (next_use_insn != insn
+ && NONDEBUG_INSN_P (next_use_insn))
+ {
+ /* If the USE is outside the loop body bb, or it is inside, but
+ is an differently-predicated store to memory or it is any
+ across-vector instruction. */
+ if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (next_use_insn)
+ || (arm_mve_vec_insn_is_unpredicated_or_uses_other_predicate
+ (next_use_insn, vctp_vpr_generated)
+ && (arm_is_mve_load_store_insn (next_use_insn, 2)
+ || arm_is_mve_across_vector_insn (next_use_insn))))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/* Helper function to `arm_mve_dlstp_check_inc_counter` and to
+ `arm_mve_dlstp_check_dec_counter`. In the situations where the loop counter
+ is incrementing by 1 or decrementing by 1 in each iteration, ensure that the
+ target value or the initialisation value, respectively, was a calculation
+ of the number of iterations of the loop, which is expected to be an ASHIFTRT
+ by VCTP_STEP. */
+
+static bool
+arm_mve_check_reg_origin_is_num_elems (basic_block body, rtx reg, rtx vctp_step)
+{
+ /* Ok, we now know the loop starts from zero and increments by one.
+ Now just show that the max value of the counter came from an
+ appropriate ASHIFRT expr of the correct amount. */
+ basic_block pre_loop_bb = body->prev_bb;
+ while (pre_loop_bb && BB_END (pre_loop_bb)
+ && !df_bb_regno_only_def_find (pre_loop_bb, REGNO (reg)))
+ pre_loop_bb = pre_loop_bb->prev_bb;
+
+ df_ref counter_max_last_def = df_bb_regno_only_def_find (pre_loop_bb, REGNO (reg));
+ if (!counter_max_last_def)
+ return false;
+ rtx counter_max_last_set = single_set (DF_REF_INSN (counter_max_last_def));
+ if (!counter_max_last_set)
+ return false;
+
+ /* If we encounter a simple SET from a REG, follow it through. */
+ if (REG_P (SET_SRC (counter_max_last_set)))
+ return arm_mve_check_reg_origin_is_num_elems
+ (pre_loop_bb->next_bb, SET_SRC (counter_max_last_set), vctp_step);
+
+ /* If we encounter a SET from an IF_THEN_ELSE where one of the operands is a
+ constant and the other is a REG, follow through to that REG. */
+ if (GET_CODE (SET_SRC (counter_max_last_set)) == IF_THEN_ELSE
+ && REG_P (XEXP (SET_SRC (counter_max_last_set), 1))
+ && CONST_INT_P (XEXP (SET_SRC (counter_max_last_set), 2)))
+ return arm_mve_check_reg_origin_is_num_elems
+ (pre_loop_bb->next_bb, XEXP (SET_SRC (counter_max_last_set), 1), vctp_step);
+
+ if (GET_CODE (SET_SRC (counter_max_last_set)) == ASHIFTRT
+ && CONST_INT_P (XEXP (SET_SRC (counter_max_last_set), 1))
+ && ((1 << INTVAL (XEXP (SET_SRC (counter_max_last_set), 1)))
+ == abs (INTVAL (vctp_step))))
+ return true;
+
+ return false;
+}
+
+/* If we have identified the loop to have an incrementing counter, we need to
+ make sure that it increments by 1 and that the loop is structured correctly:
+ * The counter starts from 0
+ * The counter terminates at (num_of_elem + num_of_lanes - 1) / num_of_lanes
+ * The vctp insn uses a reg that decrements appropriately in each iteration.
+*/
+
+static rtx_insn*
+arm_mve_dlstp_check_inc_counter (basic_block body, rtx_insn* vctp_insn,
+ rtx condconst, rtx condcount)
+{
+ rtx vctp_reg = XVECEXP (XEXP (PATTERN (vctp_insn), 1), 0, 0);
+ /* The loop latch has to be empty. When compiling all the known MVE LoLs in
+ user applications, none of those with incrementing counters had any real
+ insns in the loop latch. As such, this function has only been tested with
+ an empty latch and may misbehave or ICE if we somehow get here with an
+ increment in the latch, so, for correctness, error out early. */
+ if (!empty_block_p (body->loop_father->latch))
+ return NULL;
+
+ class rtx_iv vctp_reg_iv;
+ /* For loops of type B) the loop counter is independent of the decrement
+ of the reg used in the vctp_insn. So run iv analysis on that reg. This
+ has to succeed for such loops to be supported. */
+ if (!iv_analyze (vctp_insn, as_a<scalar_int_mode> (GET_MODE (vctp_reg)),
+ vctp_reg, &vctp_reg_iv))
+ return NULL;
+
+ /* Extract the decrementnum of the vctp reg from the iv. This decrementnum
+ is the number of lanes/elements it decrements from the remaining number of
+ lanes/elements to process in the loop, for this reason this is always a
+ negative number, but to simplify later checks we use it's absolute value. */
+ int decrementnum = INTVAL (vctp_reg_iv.step);
+ if (decrementnum >= 0)
+ return NULL;
+ decrementnum = abs (decrementnum);
+
+ /* Find where both of those are modified in the loop body bb. */
+ df_ref condcount_reg_set_df = df_bb_regno_only_def_find (body, REGNO (condcount));
+ df_ref vctp_reg_set_df = df_bb_regno_only_def_find (body, REGNO (vctp_reg));
+ if (!condcount_reg_set_df || !vctp_reg_set_df)
+ return NULL;
+ rtx condcount_reg_set = single_set (DF_REF_INSN (condcount_reg_set_df));
+ rtx vctp_reg_set = single_set (DF_REF_INSN (vctp_reg_set_df));
+ if (!condcount_reg_set || !vctp_reg_set)
+ return NULL;
+
+ /* Ensure the modification of the vctp reg from df is consistent with
+ the iv and the number of lanes on the vctp insn. */
+ if (GET_CODE (SET_SRC (vctp_reg_set)) != PLUS
+ || !REG_P (SET_DEST (vctp_reg_set))
+ || !REG_P (XEXP (SET_SRC (vctp_reg_set), 0))
+ || REGNO (SET_DEST (vctp_reg_set))
+ != REGNO (XEXP (SET_SRC (vctp_reg_set), 0))
+ || !CONST_INT_P (XEXP (SET_SRC (vctp_reg_set), 1))
+ || INTVAL (XEXP (SET_SRC (vctp_reg_set), 1)) >= 0
+ || decrementnum != abs (INTVAL (XEXP (SET_SRC (vctp_reg_set), 1)))
+ || decrementnum != arm_mve_get_vctp_lanes (vctp_insn))
+ return NULL;
+
+ if (REG_P (condcount) && REG_P (condconst))
+ {
+ /* First we need to prove that the loop is going 0..condconst with an
+ inc of 1 in each iteration. */
+ if (GET_CODE (SET_SRC (condcount_reg_set)) == PLUS
+ && CONST_INT_P (XEXP (SET_SRC (condcount_reg_set), 1))
+ && INTVAL (XEXP (SET_SRC (condcount_reg_set), 1)) == 1)
+ {
+ rtx counter_reg = SET_DEST (condcount_reg_set);
+ /* Check that the counter did indeed start from zero. */
+ df_ref this_set = DF_REG_DEF_CHAIN (REGNO (counter_reg));
+ if (!this_set)
+ return NULL;
+ df_ref last_set_def = DF_REF_NEXT_REG (this_set);
+ if (!last_set_def)
+ return NULL;
+ rtx_insn* last_set_insn = DF_REF_INSN (last_set_def);
+ rtx last_set = single_set (last_set_insn);
+ if (!last_set)
+ return NULL;
+ rtx counter_orig_set;
+ counter_orig_set = SET_SRC (last_set);
+ if (!CONST_INT_P (counter_orig_set)
+ || (INTVAL (counter_orig_set) != 0))
+ return NULL;
+ /* And finally check that the target value of the counter,
+ condconst, is of the correct shape. */
+ if (!arm_mve_check_reg_origin_is_num_elems (body, condconst,
+ vctp_reg_iv.step))
+ return NULL;
+ }
+ else
+ return NULL;
+ }
+ else
+ return NULL;
+
+ /* Everything looks valid. */
+ return vctp_insn;
+}
+
+/* Helper function to `arm_mve_loop_valid_for_dlstp`. In the case of a
+ counter that is decrementing, ensure that it is decrementing by the
+ right amount in each iteration and that the target condition is what
+ we expect. */
+
+static rtx_insn*
+arm_mve_dlstp_check_dec_counter (basic_block body, rtx_insn* vctp_insn,
+ rtx condconst, rtx condcount)
+{
+ rtx vctp_reg = XVECEXP (XEXP (PATTERN (vctp_insn), 1), 0, 0);
+ class rtx_iv vctp_reg_iv;
+ int decrementnum;
+ /* For decrementing loops of type A), the counter is usually present in the
+ loop latch. Here we simply need to verify that this counter is the same
+ reg that is also used in the vctp_insn and that it is not otherwise
+ modified. */
+ rtx_insn *dec_insn = BB_END (body->loop_father->latch);
+ /* If not in the loop latch, try to find the decrement in the loop body. */
+ if (!NONDEBUG_INSN_P (dec_insn))
+ {
+ df_ref temp = df_bb_regno_only_def_find (body, REGNO (condcount));
+ /* If we haven't been able to find the decrement, bail out. */
+ if (!temp)
+ return NULL;
+ dec_insn = DF_REF_INSN (temp);
+ }
+
+ rtx dec_set = single_set (dec_insn);
+
+ /* Next, ensure that it is a PLUS of the form:
+ (set (reg a) (plus (reg a) (const_int)))
+ where (reg a) is the same as condcount. */
+ if (!dec_set
+ || !REG_P (SET_DEST (dec_set))
+ || !REG_P (XEXP (SET_SRC (dec_set), 0))
+ || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
+ || REGNO (SET_DEST (dec_set))
+ != REGNO (XEXP (SET_SRC (dec_set), 0))
+ || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
+ return NULL;
+
+ decrementnum = INTVAL (XEXP (SET_SRC (dec_set), 1));
+
+ /* This decrementnum is the number of lanes/elements it decrements from the
+ remaining number of lanes/elements to process in the loop, for this reason
+ this is always a negative number, but to simplify later checks we use it's
+ absolute value. */
+ if (decrementnum >= 0)
+ return NULL;
+ decrementnum = abs (decrementnum);
+
+ /* Ok, so we now know the loop decrement. If it is a 1, then we need to
+ look at the loop vctp_reg and verify that it also decrements correctly.
+ Then, we need to establish that the starting value of the loop decrement
+ originates from the starting value of the vctp decrement. */
+ if (decrementnum == 1)
+ {
+ class rtx_iv vctp_reg_iv;
+ /* The loop counter is found to be independent of the decrement
+ of the reg used in the vctp_insn, again. Ensure that IV analysis
+ succeeds and check the step. */
+ if (!iv_analyze (vctp_insn, as_a<scalar_int_mode> (GET_MODE (vctp_reg)),
+ vctp_reg, &vctp_reg_iv))
+ return NULL;
+ /* Ensure it matches the number of lanes of the vctp instruction. */
+ if (abs (INTVAL (vctp_reg_iv.step))
+ != arm_mve_get_vctp_lanes (vctp_insn))
+ return NULL;
+ if (!arm_mve_check_reg_origin_is_num_elems (body, condcount, vctp_reg_iv.step))
+ return NULL;
+ }
+ /* If the decrements are the same, then the situation is simple: either they
+ are also the same reg, which is safe, or they are different registers, in
+ which case makse sure that there is a only simple SET from one to the
+ other inside the loop.*/
+ else if (decrementnum == arm_mve_get_vctp_lanes (vctp_insn))
+ {
+ if (REGNO (condcount) != REGNO (vctp_reg))
+ {
+ /* It wasn't the same reg, but it could be behild a
+ (set (vctp_reg) (condcount)), so instead find where
+ the VCTP insn is DEF'd inside the loop. */
+ rtx_insn *vctp_reg_insn
+ = DF_REF_INSN (df_bb_regno_only_def_find (body, REGNO (vctp_reg)));
+ rtx vctp_reg_set = single_set (vctp_reg_insn);
+ /* This must just be a simple SET from the condcount. */
+ if (!vctp_reg_set
+ || !REG_P (SET_DEST (vctp_reg_set))
+ || !REG_P (SET_SRC (vctp_reg_set))
+ || REGNO (SET_SRC (vctp_reg_set)) != REGNO (condcount))
+ return NULL;
+ }
+ }
+ else
+ return NULL;
+
+ /* We now only need to find out that the loop terminates with a LE
+ zero condition. If condconst is a const_int, then this is easy.
+ If its a REG, look at the last condition+jump in a bb before
+ the loop, because that usually will have a branch jumping over
+ the loop body. */
+ if (CONST_INT_P (condconst)
+ && !(INTVAL (condconst) == 0 && JUMP_P (BB_END (body))
+ && GET_CODE (XEXP (PATTERN (BB_END (body)), 1)) == IF_THEN_ELSE
+ && (GET_CODE (XEXP (XEXP (PATTERN (BB_END (body)), 1), 0)) == NE
+ ||GET_CODE (XEXP (XEXP (PATTERN (BB_END (body)), 1), 0)) == GT)))
+ return NULL;
+ else if (REG_P (condconst))
+ {
+ basic_block pre_loop_bb = body;
+ while (pre_loop_bb->prev_bb && BB_END (pre_loop_bb->prev_bb)
+ && !JUMP_P (BB_END (pre_loop_bb->prev_bb)))
+ pre_loop_bb = pre_loop_bb->prev_bb;
+ if (pre_loop_bb && BB_END (pre_loop_bb))
+ pre_loop_bb = pre_loop_bb->prev_bb;
+ else
+ return NULL;
+ rtx initial_compare = NULL_RTX;
+ if (!(prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb))
+ && INSN_P (prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb)))))
+ return NULL;
+ else
+ initial_compare
+ = single_set (prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb)));
+ if (!(initial_compare
+ && cc_register (SET_DEST (initial_compare), VOIDmode)
+ && GET_CODE (SET_SRC (initial_compare)) == COMPARE
+ && CONST_INT_P (XEXP (SET_SRC (initial_compare), 1))
+ && INTVAL (XEXP (SET_SRC (initial_compare), 1)) == 0))
+ return NULL;
+
+ /* Usually this is a LE condition, but it can also just be a GT or an EQ
+ condition (if the value is unsigned or the compiler knows its not negative) */
+ rtx_insn *loop_jumpover = BB_END (pre_loop_bb);
+ if (!(JUMP_P (loop_jumpover)
+ && GET_CODE (XEXP (PATTERN (loop_jumpover), 1)) == IF_THEN_ELSE
+ && (GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover), 1), 0)) == LE
+ || GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover), 1), 0)) == GT
+ || GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover), 1), 0)) == EQ)))
+ return NULL;
+ }
+
+ /* Everything looks valid. */
+ return vctp_insn;
+}
+
+/* Function to check a loop's structure to see if it is a valid candidate for
+ an MVE Tail Predicated Low-Overhead Loop. Returns the loop's VCTP_INSN if
+ it is valid, or NULL if it isn't. */
+
+static rtx_insn*
+arm_mve_loop_valid_for_dlstp (basic_block body)
+{
+ /* Doloop can only be done "elementwise" with predicated dlstp/letp if it
+ contains a VCTP on the number of elements processed by the loop.
+ Find the VCTP predicate generation inside the loop body BB. */
+ rtx_insn *vctp_insn = arm_mve_get_loop_vctp (body);
+ if (!vctp_insn)
+ return NULL;
+
+ /* There are only two types of loops that can be turned into dlstp/letp
+ loops:
+ A) Loops of the form:
+ while (num_of_elem > 0)
+ {
+ p = vctp<size> (num_of_elem)
+ n -= num_of_lanes;
+ }
+ B) Loops of the form:
+ int num_of_iters = (num_of_elem + num_of_lanes - 1) / num_of_lanes
+ for (i = 0; i < num_of_iters; i++)
+ {
+ p = vctp<size> (num_of_elem)
+ n -= num_of_lanes;
+ }
+
+ Then, depending on the type of loop above we need will need to do
+ different sets of checks. */
+ iv_analysis_loop_init (body->loop_father);
+
+ /* In order to find out if the loop is of type A or B above look for the
+ loop counter: it will either be incrementing by one per iteration or
+ it will be decrementing by num_of_lanes. We can find the loop counter
+ in the condition at the end of the loop. */
+ rtx_insn *loop_cond = prev_nonnote_nondebug_insn_bb (BB_END (body));
+ if (!(cc_register (XEXP (PATTERN (loop_cond), 0), VOIDmode)
+ && GET_CODE (XEXP (PATTERN (loop_cond), 1)) == COMPARE))
+ return NULL;
+
+ /* The operands in the condition: Try to identify which one is the
+ constant and which is the counter and run IV analysis on the latter. */
+ rtx cond_arg_1 = XEXP (XEXP (PATTERN (loop_cond), 1), 0);
+ rtx cond_arg_2 = XEXP (XEXP (PATTERN (loop_cond), 1), 1);
+
+ rtx loop_cond_constant;
+ rtx loop_counter;
+ class rtx_iv cond_counter_iv, cond_temp_iv;
+
+ if (CONST_INT_P (cond_arg_1))
+ {
+ /* cond_arg_1 is the constant and cond_arg_2 is the counter. */
+ loop_cond_constant = cond_arg_1;
+ loop_counter = cond_arg_2;
+ iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_2)),
+ cond_arg_2, &cond_counter_iv);
+ }
+ else if (CONST_INT_P (cond_arg_2))
+ {
+ /* cond_arg_2 is the constant and cond_arg_1 is the counter. */
+ loop_cond_constant = cond_arg_2;
+ loop_counter = cond_arg_1;
+ iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_1)),
+ cond_arg_1, &cond_counter_iv);
+ }
+ else if (REG_P (cond_arg_1) && REG_P (cond_arg_2))
+ {
+ /* If both operands to the compare are REGs, we can safely
+ run IV analysis on both and then determine which is the
+ constant by looking at the step.
+ First assume cond_arg_1 is the counter. */
+ loop_counter = cond_arg_1;
+ loop_cond_constant = cond_arg_2;
+ iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_1)),
+ cond_arg_1, &cond_counter_iv);
+ iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_2)),
+ cond_arg_2, &cond_temp_iv);
+
+ /* Look at the steps and swap around the rtx's if needed. Error out if
+ one of them cannot be identified as constant. */
+ if (!CONST_INT_P (cond_counter_iv.step) || !CONST_INT_P (cond_temp_iv.step))
+ return NULL;
+ if (INTVAL (cond_counter_iv.step) != 0 && INTVAL (cond_temp_iv.step) != 0)
+ return NULL;
+ if (INTVAL (cond_counter_iv.step) == 0 && INTVAL (cond_temp_iv.step) != 0)
+ {
+ loop_counter = cond_arg_2;
+ loop_cond_constant = cond_arg_1;
+ cond_counter_iv = cond_temp_iv;
+ }
+ }
+ else
+ return NULL;
+
+ if (!REG_P (loop_counter))
+ return NULL;
+ if (!(REG_P (loop_cond_constant) || CONST_INT_P (loop_cond_constant)))
+ return NULL;
+
+ /* Now we have extracted the IV step of the loop counter, call the
+ appropriate checking function. */
+ if (INTVAL (cond_counter_iv.step) > 0)
+ return arm_mve_dlstp_check_inc_counter (body, vctp_insn,
+ loop_cond_constant, loop_counter);
+ else if (INTVAL (cond_counter_iv.step) < 0)
+ return arm_mve_dlstp_check_dec_counter (body, vctp_insn,
+ loop_cond_constant, loop_counter);
+ else
+ return NULL;
+}
+
+/* Predict whether the given loop in gimple will be transformed in the RTL
+ doloop_optimize pass. It could be argued that turning large enough loops
+ into low-overhead loops would not show a signficant performance boost.
+ Howeer, in the case of tail predication we would still avoid using VPT/VPST
+ instructions inside the loop, and in either case using low-overhead loops
+ would not be detrimental, so we decided to not consider size, avoiding the
+ need of a heuristic to determine what an appropriate size boundary is. */
+
+static bool
+arm_predict_doloop_p (struct loop *loop)
+{
+ gcc_assert (loop);
+ /* On arm, targetm.can_use_doloop_p is actually
+ can_use_doloop_if_innermost. Ensure the loop is innermost,
+ it is valid and as per arm_target_bb_ok_for_lob and the
+ correct architecture flags are enabled. */
+ if (!(TARGET_HAVE_LOB && optimize > 0))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Predict doloop failure due to"
+ " target architecture or optimisation flags.\n");
+ return false;
+ }
+ else if (loop->inner != NULL)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Predict doloop failure due to"
+ " loop nesting.\n");
+ return false;
+ }
+ else if (!arm_target_bb_ok_for_lob (loop->header->next_bb))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Predict doloop failure due to"
+ " loop bb complexity.\n");
+ return false;
+ }
+
+ return true;
+}
+
+/* Implement targetm.loop_unroll_adjust. Use this to block unrolling of loops
+ that may later be turned into MVE Tail Predicated Low Overhead Loops. The
+ performance benefit of an MVE LoL is likely to be much higher than that of
+ the unrolling. */
+
+unsigned
+arm_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
+{
+ if (TARGET_HAVE_MVE
+ && arm_target_bb_ok_for_lob (loop->latch)
+ && arm_mve_loop_valid_for_dlstp (loop->header))
+ return 0;
+ else
+ return nunroll;
+}
+
+/* Function to hadle emitting a VPT-unpredicated version of a VPT-predicated
+ insn to a sequence. */
+
+static bool
+arm_emit_mve_unpredicated_insn_to_seq (rtx_insn* insn)
+{
+ rtx insn_vpr_reg_operand = arm_get_required_vpr_reg_param (insn);
+ int new_icode = get_attr_mve_unpredicated_insn (insn);
+ if (!in_sequence_p ()
+ || !MVE_VPT_PREDICATED_INSN_P (insn)
+ || (!insn_vpr_reg_operand)
+ || (!new_icode))
+ return false;
+
+ extract_insn (insn);
+ rtx arr[8];
+ int j = 0;
+
+ /* When transforming a VPT-predicated instruction
+ into its unpredicated equivalent we need to drop
+ the VPR operand and we may need to also drop a
+ merge "vuninit" input operand, depending on the
+ instruction pattern. Here ensure that we have at
+ most a two-operand difference between the two
+ instrunctions. */
+ int n_operands_diff
+ = recog_data.n_operands - insn_data[new_icode].n_operands;
+ if (!(n_operands_diff > 0 && n_operands_diff <= 2))
+ return false;
+
+ /* Then, loop through the operands of the predicated
+ instruction, and retain the ones that map to the
+ unpredicated instruction. */
+ for (int i = 0; i < recog_data.n_operands; i++)
+ {
+ /* Ignore the VPR and, if needed, the vuninit
+ operand. */
+ if (insn_vpr_reg_operand == recog_data.operand[i]
+ || (n_operands_diff == 2
+ && !strcmp (recog_data.constraints[i], "0")))
+ continue;
+ else
+ {
+ arr[j] = recog_data.operand[i];
+ j++;
+ }
+ }
+
+ /* Finally, emit the upredicated instruction. */
+ rtx_insn *new_insn;
+ switch (j)
+ {
+ case 1:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0]));
+ break;
+ case 2:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1]));
+ break;
+ case 3:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2]));
+ break;
+ case 4:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
+ arr[3]));
+ break;
+ case 5:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
+ arr[3], arr[4]));
+ break;
+ case 6:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
+ arr[3], arr[4], arr[5]));
+ break;
+ case 7:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
+ arr[3], arr[4], arr[5],
+ arr[6]));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
+ return true;
+}
+
+/* When a vctp insn is used, its out is often followed by
+ a zero-extend insn to SImode, which is then SUBREG'd into a
+ vector form of mode VALID_MVE_PRED_MODE: this vector form is
+ what is then used as an input to the instructions within the
+ loop. Hence, store that vector form of the VPR reg into
+ vctp_vpr_generated, so that we can match it with instructions
+ in the loop to determine if they are predicated on this same
+ VPR. If there is no zero-extend and subreg or it is otherwise
+ invalid, then return NULL to cancel the dlstp transform. */
+
+static rtx
+arm_mve_get_vctp_vec_form (rtx_insn *insn)
+{
+ rtx vctp_vpr_generated = NULL_RTX;
+ rtx_insn *next_use1 = NULL;
+ df_ref use;
+ for (use
+ = DF_REG_USE_CHAIN
+ (DF_REF_REGNO (DF_INSN_INFO_DEFS (DF_INSN_INFO_GET (insn))));
+ use; use = DF_REF_NEXT_REG (use))
+ if (!next_use1 && NONDEBUG_INSN_P (DF_REF_INSN (use)))
+ next_use1 = DF_REF_INSN (use);
+
+ rtx next_use1_set = single_set (next_use1);
+ if (next_use1_set
+ && GET_CODE (SET_SRC (next_use1_set)) == ZERO_EXTEND)
+ {
+ rtx_insn *next_use2 = NULL;
+ for (use
+ = DF_REG_USE_CHAIN
+ (DF_REF_REGNO
+ (DF_INSN_INFO_DEFS (DF_INSN_INFO_GET (next_use1))));
+ use; use = DF_REF_NEXT_REG (use))
+ if (!next_use2 && NONDEBUG_INSN_P (DF_REF_INSN (use)))
+ next_use2 = DF_REF_INSN (use);
+
+ rtx next_use2_set = single_set (next_use2);
+ if (next_use2_set
+ && GET_CODE (SET_SRC (next_use2_set)) == SUBREG)
+ vctp_vpr_generated = SET_DEST (next_use2_set);
+ }
+
+ if (!vctp_vpr_generated || !REG_P (vctp_vpr_generated)
+ || !VALID_MVE_PRED_MODE (GET_MODE (vctp_vpr_generated)))
+ return NULL_RTX;
+
+ return vctp_vpr_generated;
+}
+
+/* Attempt to transform the loop contents of loop basic block from VPT
+ predicated insns into unpredicated insns for a dlstp/letp loop. Returns
+ rtx constant value to decrement from the total number of elements. Return
+ (const_int 1) if we can't use tail predication and fallback to scalar
+ low-overhead loops. */
+
+rtx
+arm_attempt_dlstp_transform (rtx label)
+{
+ basic_block body = BLOCK_FOR_INSN (label)->prev_bb;
+
+ /* Ensure that the bb is within a loop that has all required metadata. */
+ if (!body->loop_father || !body->loop_father->header
+ || !body->loop_father->simple_loop_desc)
+ return const1_rtx;
+
+ rtx_insn *vctp_insn = arm_mve_loop_valid_for_dlstp (body);
+ if (!vctp_insn)
+ return const1_rtx;
+
+ gcc_assert (single_set (vctp_insn));
+
+ rtx vctp_vpr_generated = arm_mve_get_vctp_vec_form (vctp_insn);
+ if (!vctp_vpr_generated)
+ return const1_rtx;
+
+ /* decrementunum is already known to be valid at this point. */
+ int decrementnum = arm_mve_get_vctp_lanes (vctp_insn);
+
+ rtx_insn *insn = 0;
+ rtx_insn *cur_insn = 0;
+ rtx_insn *seq;
+ hash_map <rtx_insn *, bool> *safe_insn_map
+ = new hash_map <rtx_insn *, bool>;
+
+ /* Scan through the insns in the loop bb and emit the transformed bb
+ insns to a sequence. */
+ start_sequence ();
+ FOR_BB_INSNS (body, insn)
+ {
+ if (GET_CODE (insn) == CODE_LABEL || NOTE_INSN_BASIC_BLOCK_P (insn))
+ continue;
+ else if (NOTE_P (insn))
+ emit_note ((enum insn_note)NOTE_KIND (insn));
+ else if (DEBUG_INSN_P (insn))
+ emit_debug_insn (PATTERN (insn));
+ else if (!INSN_P (insn))
+ {
+ end_sequence ();
+ return const1_rtx;
+ }
+ /* When we find the vctp instruction: continue. */
+ else if (insn == vctp_insn)
+ continue;
+ /* If the insn pattern requires the use of the VPR value from the
+ vctp as an input parameter for predication. */
+ else if (arm_mve_vec_insn_is_predicated_with_this_predicate
+ (insn, vctp_vpr_generated))
+ {
+ bool success = arm_emit_mve_unpredicated_insn_to_seq (insn);
+ if (!success)
+ {
+ end_sequence ();
+ return const1_rtx;
+ }
+ }
+ /* If the insn isn't VPT predicated on vctp_vpr_generated, we need to
+ make sure that it is still valid within the dlstp/letp loop. */
+ else
+ {
+ /* If this instruction USE-s the vctp_vpr_generated other than for
+ predication, this blocks the transformation as we are not allowed
+ to optimise the VPR value away. */
+ df_ref insn_uses = NULL;
+ FOR_EACH_INSN_USE (insn_uses, insn)
+ {
+ if (rtx_equal_p (vctp_vpr_generated, DF_REF_REG (insn_uses)))
+ {
+ end_sequence ();
+ return const1_rtx;
+ }
+ }
+ /* If within the loop we have an MVE vector instruction that is
+ unpredicated, the dlstp/letp looping will add implicit
+ predication to it. This will result in a change in behaviour
+ of the instruction, so we need to find out if any instructions
+ that feed into the current instruction were implicitly
+ predicated. */
+ if (arm_mve_check_df_chain_back_for_implic_predic
+ (safe_insn_map, insn, vctp_vpr_generated))
+ {
+ if (arm_mve_check_df_chain_fwd_for_implic_predic_impact
+ (insn, vctp_vpr_generated))
+ {
+ end_sequence ();
+ return const1_rtx;
+ }
+ }
+ emit_insn (PATTERN (insn));
+ }
+ }
+ seq = get_insns ();
+ end_sequence ();
+
+ /* Re-write the entire BB contents with the transformed
+ sequence. */
+ FOR_BB_INSNS_SAFE (body, insn, cur_insn)
+ if (!(GET_CODE (insn) == CODE_LABEL || NOTE_INSN_BASIC_BLOCK_P (insn)))
+ delete_insn (insn);
+
+ emit_insn_after (seq, BB_END (body));
+
+ /* The transformation has succeeded, so now modify the "count"
+ (a.k.a. niter_expr) for the middle-end. Also set noloop_assumptions
+ to NULL to stop the middle-end from making assumptions about the
+ number of iterations. */
+ simple_loop_desc (body->loop_father)->niter_expr
+ = XVECEXP (SET_SRC (PATTERN (vctp_insn)), 0, 0);
+ simple_loop_desc (body->loop_father)->noloop_assumptions = NULL_RTX;
+ return GEN_INT (decrementnum);
}
#if CHECKING_P
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 8efdebecc3c..da745288f26 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -124,6 +124,11 @@ (define_attr "fpu" "none,vfp"
; and not all ARM insns do.
(define_attr "predicated" "yes,no" (const_string "no"))
+
+; An attribute that encodes the CODE_FOR_<insn> of the MVE VPT unpredicated
+; version of a VPT-predicated instruction. For unpredicated instructions
+; that are predicable, encode the same pattern's CODE_FOR_<insn> as a way to
+; encode that it is a predicable instruction.
(define_attr "mve_unpredicated_insn" "" (const_int 0))
; LENGTH of an instruction (in bytes)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 5ea2d9e8668..9398702cddd 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -2673,6 +2673,17 @@ (define_int_iterator MRRCI [VUNSPEC_MRRC VUNSPEC_MRRC2])
(define_int_attr mrrc [(VUNSPEC_MRRC "mrrc") (VUNSPEC_MRRC2 "mrrc2")])
(define_int_attr MRRC [(VUNSPEC_MRRC "MRRC") (VUNSPEC_MRRC2 "MRRC2")])
+(define_int_attr dlstp_elemsize [(DLSTP8 "8") (DLSTP16 "16") (DLSTP32 "32")
+ (DLSTP64 "64")])
+
+(define_int_attr letp_num_lanes [(LETP8 "16") (LETP16 "8") (LETP32 "4")
+ (LETP64 "2")])
+(define_int_attr letp_num_lanes_neg [(LETP8 "-16") (LETP16 "-8") (LETP32 "-4")
+ (LETP64 "-2")])
+
+(define_int_attr letp_num_lanes_minus_1 [(LETP8 "15") (LETP16 "7") (LETP32 "3")
+ (LETP64 "1")])
+
(define_int_attr opsuffix [(UNSPEC_DOT_S "s8")
(UNSPEC_DOT_U "u8")
(UNSPEC_DOT_US "s8")
@@ -2916,6 +2927,10 @@ (define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
(define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
(define_int_iterator VQSHLUQ_M_N [VQSHLUQ_M_N_S])
(define_int_iterator VQSHLUQ_N [VQSHLUQ_N_S])
+(define_int_iterator DLSTP [DLSTP8 DLSTP16 DLSTP32
+ DLSTP64])
+(define_int_iterator LETP [LETP8 LETP16 LETP32
+ LETP64])
;; Define iterators for VCMLA operations
(define_int_iterator VCMLA_OP [UNSPEC_VCMLA
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 62df022ef19..5748e2333eb 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -6922,23 +6922,24 @@ (define_expand "@arm_mve_reinterpret<mode>"
;; Originally expanded by 'predicated_doloop_end'.
;; In the rare situation where the branch is too far, we do also need to
;; revert FPSCR.LTPSIZE back to 0x100 after the last iteration.
-(define_insn "*predicated_doloop_end_internal"
+(define_insn "predicated_doloop_end_internal<letp_num_lanes>"
[(set (pc)
(if_then_else
- (ge (plus:SI (reg:SI LR_REGNUM)
- (match_operand:SI 0 "const_int_operand" ""))
- (const_int 0))
- (label_ref (match_operand 1 "" ""))
+ (gtu (unspec:SI [(plus:SI (match_operand:SI 0 "s_register_operand" "=r")
+ (const_int <letp_num_lanes_neg>))]
+ LETP)
+ (const_int <letp_num_lanes_minus_1>))
+ (match_operand 1 "" "")
(pc)))
- (set (reg:SI LR_REGNUM)
- (plus:SI (reg:SI LR_REGNUM) (match_dup 0)))
+ (set (match_dup 0)
+ (plus:SI (match_dup 0) (const_int <letp_num_lanes_neg>)))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_32BIT && TARGET_HAVE_LOB && TARGET_HAVE_MVE && TARGET_THUMB2"
+ "TARGET_HAVE_MVE"
{
if (get_attr_length (insn) == 4)
return "letp\t%|lr, %l1";
else
- return "subs\t%|lr, #%n0\n\tbgt\t%l1\n\tlctp";
+ return "subs\t%|lr, #<letp_num_lanes>\n\tbhi\t%l1\n\tlctp";
}
[(set (attr "length")
(if_then_else
@@ -6947,11 +6948,11 @@ (define_insn "*predicated_doloop_end_internal"
(const_int 6)))
(set_attr "type" "branch")])
-(define_insn "dlstp<mode1>_insn"
+(define_insn "dlstp<dlstp_elemsize>_insn"
[
(set (reg:SI LR_REGNUM)
(unspec:SI [(match_operand:SI 0 "s_register_operand" "r")]
DLSTP))
]
- "TARGET_32BIT && TARGET_HAVE_LOB && TARGET_HAVE_MVE && TARGET_THUMB2"
- "dlstp.<mode1>\t%|lr, %0")
+ "TARGET_HAVE_MVE"
+ "dlstp.<dlstp_elemsize>\t%|lr, %0")
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index e1e013befa7..f2801cea36a 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1613,7 +1613,7 @@ (define_expand "doloop_end"
(use (match_operand 1 "" ""))] ; label
"TARGET_32BIT"
"
- {
+{
/* Currently SMS relies on the do-loop pattern to recognize loops
where (1) the control part consists of all insns defining and/or
using a certain 'count' register and (2) the loop count can be
@@ -1623,41 +1623,77 @@ (define_expand "doloop_end"
Also used to implement the low over head loops feature, which is part of
the Armv8.1-M Mainline Low Overhead Branch (LOB) extension. */
- if (optimize > 0 && (flag_modulo_sched || TARGET_HAVE_LOB))
- {
- rtx s0;
- rtx bcomp;
- rtx loc_ref;
- rtx cc_reg;
- rtx insn;
- rtx cmp;
-
- if (GET_MODE (operands[0]) != SImode)
- FAIL;
-
- s0 = operands [0];
-
- /* Low over head loop instructions require the first operand to be LR. */
- if (TARGET_HAVE_LOB && arm_target_insn_ok_for_lob (operands [1]))
- s0 = gen_rtx_REG (SImode, LR_REGNUM);
-
- if (TARGET_THUMB2)
- insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1)));
- else
- insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1)));
-
- cmp = XVECEXP (PATTERN (insn), 0, 0);
- cc_reg = SET_DEST (cmp);
- bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
- loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]);
- emit_jump_insn (gen_rtx_SET (pc_rtx,
- gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
- loc_ref, pc_rtx)));
- DONE;
- }
- else
- FAIL;
- }")
+ if (optimize > 0 && (flag_modulo_sched || TARGET_HAVE_LOB))
+ {
+ rtx s0;
+ rtx bcomp;
+ rtx loc_ref;
+ rtx cc_reg;
+ rtx insn;
+ rtx cmp;
+ rtx decrement_num;
+
+ if (GET_MODE (operands[0]) != SImode)
+ FAIL;
+
+ s0 = operands[0];
+
+ if (TARGET_HAVE_LOB && arm_target_bb_ok_for_lob (BLOCK_FOR_INSN (operands[1])))
+ {
+ s0 = gen_rtx_REG (SImode, LR_REGNUM);
+
+ /* If we have a compatible MVE target, try and analyse the loop
+ contents to determine if we can use predicated dlstp/letp
+ looping. */
+ if (TARGET_HAVE_MVE
+ && (decrement_num = arm_attempt_dlstp_transform (operands[1]))
+ && (INTVAL (decrement_num) != 1))
+ {
+ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[1]);
+ switch (INTVAL (decrement_num))
+ {
+ case 2:
+ insn = emit_jump_insn (gen_predicated_doloop_end_internal2
+ (s0, loc_ref));
+ break;
+ case 4:
+ insn = emit_jump_insn (gen_predicated_doloop_end_internal4
+ (s0, loc_ref));
+ break;
+ case 8:
+ insn = emit_jump_insn (gen_predicated_doloop_end_internal8
+ (s0, loc_ref));
+ break;
+ case 16:
+ insn = emit_jump_insn (gen_predicated_doloop_end_internal16
+ (s0, loc_ref));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+ }
+ }
+
+ /* Otherwise, try standard decrement-by-one dls/le looping. */
+ if (TARGET_THUMB2)
+ insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0,
+ GEN_INT (-1)));
+ else
+ insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1)));
+
+ cmp = XVECEXP (PATTERN (insn), 0, 0);
+ cc_reg = SET_DEST (cmp);
+ bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
+ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[1]);
+ emit_jump_insn (gen_rtx_SET (pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+ loc_ref, pc_rtx)));
+ DONE;
+ }
+ else
+ FAIL;
+}")
(define_insn "*clear_apsr"
[(unspec_volatile:SI [(const_int 0)] VUNSPEC_CLRM_APSR)
@@ -1755,7 +1791,37 @@ (define_expand "doloop_begin"
{
if (REGNO (operands[0]) == LR_REGNUM)
{
- emit_insn (gen_dls_insn (operands[0]));
+ /* Pick out the number by which we are decrementing the loop counter
+ in every iteration. If it's > 1, then use dlstp. */
+ int const_int_dec_num
+ = abs (INTVAL (XEXP (XEXP (XVECEXP (PATTERN (operands[1]), 0, 1),
+ 1),
+ 1)));
+ switch (const_int_dec_num)
+ {
+ case 16:
+ emit_insn (gen_dlstp8_insn (operands[0]));
+ break;
+
+ case 8:
+ emit_insn (gen_dlstp16_insn (operands[0]));
+ break;
+
+ case 4:
+ emit_insn (gen_dlstp32_insn (operands[0]));
+ break;
+
+ case 2:
+ emit_insn (gen_dlstp64_insn (operands[0]));
+ break;
+
+ case 1:
+ emit_insn (gen_dls_insn (operands[0]));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
DONE;
}
else
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 4713ec840ab..2d6f27c14f4 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -583,6 +583,14 @@ (define_c_enum "unspec" [
VADDLVQ_U
VCTP
VCTP_M
+ DLSTP8
+ DLSTP16
+ DLSTP32
+ DLSTP64
+ LETP8
+ LETP16
+ LETP32
+ LETP64
VPNOT
VCREATEQ_F
VCVTQ_N_TO_F_S
diff --git a/gcc/df-core.cc b/gcc/df-core.cc
index d4812b04a7c..4fcc14bf790 100644
--- a/gcc/df-core.cc
+++ b/gcc/df-core.cc
@@ -1964,6 +1964,21 @@ df_bb_regno_last_def_find (basic_block bb, unsigned int regno)
return NULL;
}
+/* Return the one and only def of REGNO within BB. If there is no def or
+ there are multiple defs, return NULL. */
+
+df_ref
+df_bb_regno_only_def_find (basic_block bb, unsigned int regno)
+{
+ df_ref temp = df_bb_regno_first_def_find (bb, regno);
+ if (!temp)
+ return NULL;
+ else if (temp == df_bb_regno_last_def_find (bb, regno))
+ return temp;
+ else
+ return NULL;
+}
+
/* Finds the reference corresponding to the definition of REG in INSN.
DF is the dataflow object. */
diff --git a/gcc/df.h b/gcc/df.h
index 402657a7076..98623637f9c 100644
--- a/gcc/df.h
+++ b/gcc/df.h
@@ -987,6 +987,7 @@ extern void df_check_cfg_clean (void);
#endif
extern df_ref df_bb_regno_first_def_find (basic_block, unsigned int);
extern df_ref df_bb_regno_last_def_find (basic_block, unsigned int);
+extern df_ref df_bb_regno_only_def_find (basic_block, unsigned int);
extern df_ref df_find_def (rtx_insn *, rtx);
extern bool df_reg_defined (rtx_insn *, rtx);
extern df_ref df_find_use (rtx_insn *, rtx);
diff --git a/gcc/loop-doloop.cc b/gcc/loop-doloop.cc
index 4feb0a25ab9..d919207505c 100644
--- a/gcc/loop-doloop.cc
+++ b/gcc/loop-doloop.cc
@@ -85,10 +85,10 @@ doloop_condition_get (rtx_insn *doloop_pat)
forms:
1) (parallel [(set (pc) (if_then_else (condition)
- (label_ref (label))
- (pc)))
- (set (reg) (plus (reg) (const_int -1)))
- (additional clobbers and uses)])
+ (label_ref (label))
+ (pc)))
+ (set (reg) (plus (reg) (const_int -1)))
+ (additional clobbers and uses)])
The branch must be the first entry of the parallel (also required
by jump.cc), and the second entry of the parallel must be a set of
@@ -96,19 +96,34 @@ doloop_condition_get (rtx_insn *doloop_pat)
the loop counter in an if_then_else too.
2) (set (reg) (plus (reg) (const_int -1))
- (set (pc) (if_then_else (reg != 0)
- (label_ref (label))
- (pc))).
+ (set (pc) (if_then_else (reg != 0)
+ (label_ref (label))
+ (pc))).
Some targets (ARM) do the comparison before the branch, as in the
following form:
- 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
- (set (reg) (plus (reg) (const_int -1)))])
- (set (pc) (if_then_else (cc == NE)
- (label_ref (label))
- (pc))) */
-
+ 3) (parallel [(set (cc) (compare (plus (reg) (const_int -1)) 0))
+ (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (cc == NE)
+ (label_ref (label))
+ (pc)))
+
+ The ARM target also supports a special case of a counter that decrements
+ by `n` and terminating in a GTU condition. In that case, the compare and
+ branch are all part of one insn, containing an UNSPEC:
+
+ 4) (parallel [
+ (set (pc)
+ (if_then_else (gtu (unspec:SI [(plus:SI (reg:SI 14 lr)
+ (const_int -n))])
+ (const_int n-1]))
+ (label_ref)
+ (pc)))
+ (set (reg:SI 14 lr)
+ (plus:SI (reg:SI 14 lr)
+ (const_int -n)))
+ */
pattern = PATTERN (doloop_pat);
if (GET_CODE (pattern) != PARALLEL)
@@ -143,7 +158,7 @@ doloop_condition_get (rtx_insn *doloop_pat)
|| GET_CODE (cmp_arg1) != PLUS)
return 0;
reg_orig = XEXP (cmp_arg1, 0);
- if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
+ if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
|| !REG_P (reg_orig))
return 0;
cc_reg = SET_DEST (cmp_orig);
@@ -173,15 +188,17 @@ doloop_condition_get (rtx_insn *doloop_pat)
if (! REG_P (reg))
return 0;
- /* Check if something = (plus (reg) (const_int -1)).
+ /* Check if something = (plus (reg) (const_int -n)).
On IA-64, this decrement is wrapped in an if_then_else. */
inc_src = SET_SRC (inc);
if (GET_CODE (inc_src) == IF_THEN_ELSE)
inc_src = XEXP (inc_src, 1);
if (GET_CODE (inc_src) != PLUS
|| XEXP (inc_src, 0) != reg
- || XEXP (inc_src, 1) != constm1_rtx)
+ || !CONST_INT_P (XEXP (inc_src, 1))
+ || INTVAL (XEXP (inc_src, 1)) >= 0)
return 0;
+ int dec_num = abs (INTVAL (XEXP (inc_src, 1)));
/* Check for (set (pc) (if_then_else (condition)
(label_ref (label))
@@ -196,60 +213,71 @@ doloop_condition_get (rtx_insn *doloop_pat)
/* Extract loop termination condition. */
condition = XEXP (SET_SRC (cmp), 0);
- /* We expect a GE or NE comparison with 0 or 1. */
- if ((GET_CODE (condition) != GE
- && GET_CODE (condition) != NE)
- || (XEXP (condition, 1) != const0_rtx
- && XEXP (condition, 1) != const1_rtx))
+ /* We expect a GE or NE comparison with 0 or 1, or a GTU comparison with
+ dec_num - 1. */
+ if (!((GET_CODE (condition) == GE
+ || GET_CODE (condition) == NE)
+ && (XEXP (condition, 1) == const0_rtx
+ || XEXP (condition, 1) == const1_rtx ))
+ &&!(GET_CODE (condition) == GTU
+ && ((INTVAL (XEXP (condition, 1))) == (dec_num - 1))))
return 0;
- if ((XEXP (condition, 0) == reg)
+ /* For the ARM special case of having a GTU: re-form the condition without
+ the unspec for the benefit of the middle-end. */
+ if (GET_CODE (condition) == GTU)
+ {
+ condition = gen_rtx_fmt_ee (GTU, VOIDmode, inc_src,
+ GEN_INT (dec_num - 1));
+ return condition;
+ }
+ else if ((XEXP (condition, 0) == reg)
/* For the third case: */
|| ((cc_reg != NULL_RTX)
&& (XEXP (condition, 0) == cc_reg)
&& (reg_orig == reg))
|| (GET_CODE (XEXP (condition, 0)) == PLUS
&& XEXP (XEXP (condition, 0), 0) == reg))
- {
+ {
if (GET_CODE (pattern) != PARALLEL)
/* For the second form we expect:
- (set (reg) (plus (reg) (const_int -1))
- (set (pc) (if_then_else (reg != 0)
- (label_ref (label))
- (pc))).
+ (set (reg) (plus (reg) (const_int -1))
+ (set (pc) (if_then_else (reg != 0)
+ (label_ref (label))
+ (pc))).
- is equivalent to the following:
+ is equivalent to the following:
- (parallel [(set (pc) (if_then_else (reg != 1)
- (label_ref (label))
- (pc)))
- (set (reg) (plus (reg) (const_int -1)))
- (additional clobbers and uses)])
+ (parallel [(set (pc) (if_then_else (reg != 1)
+ (label_ref (label))
+ (pc)))
+ (set (reg) (plus (reg) (const_int -1)))
+ (additional clobbers and uses)])
- For the third form we expect:
+ For the third form we expect:
- (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
- (set (reg) (plus (reg) (const_int -1)))])
- (set (pc) (if_then_else (cc == NE)
- (label_ref (label))
- (pc)))
+ (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
+ (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (cc == NE)
+ (label_ref (label))
+ (pc)))
- which is equivalent to the following:
+ which is equivalent to the following:
- (parallel [(set (cc) (compare (reg, 1))
- (set (reg) (plus (reg) (const_int -1)))
- (set (pc) (if_then_else (NE == cc)
- (label_ref (label))
- (pc))))])
+ (parallel [(set (cc) (compare (reg, 1))
+ (set (reg) (plus (reg) (const_int -1)))
+ (set (pc) (if_then_else (NE == cc)
+ (label_ref (label))
+ (pc))))])
- So we return the second form instead for the two cases.
+ So we return the second form instead for the two cases.
*/
- condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
+ condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
return condition;
- }
+ }
/* ??? If a machine uses a funny comparison, we could return a
canonicalized form here. */
@@ -507,6 +535,11 @@ doloop_modify (class loop *loop, class niter_desc *desc,
nonneg = 1;
break;
+ case GTU:
+ /* The iteration count does not need incrementing for a GTU test. */
+ increment_count = false;
+ break;
+
/* Abort if an invalid doloop pattern has been generated. */
default:
gcc_unreachable ();
@@ -529,6 +562,10 @@ doloop_modify (class loop *loop, class niter_desc *desc,
if (desc->noloop_assumptions)
{
+ /* The GTU case has only been implemented for the ARM target, where
+ noloop_assumptions gets explicitly set to NULL for that case, so
+ assert here for safety. */
+ gcc_assert (GET_CODE (condition) != GTU);
rtx ass = copy_rtx (desc->noloop_assumptions);
basic_block preheader = loop_preheader_edge (loop)->src;
basic_block set_zero = split_edge (loop_preheader_edge (loop));
@@ -642,7 +679,7 @@ doloop_optimize (class loop *loop)
{
scalar_int_mode mode;
rtx doloop_reg;
- rtx count;
+ rtx count = NULL_RTX;
widest_int iterations, iterations_max;
rtx_code_label *start_label;
rtx condition;
@@ -685,17 +722,6 @@ doloop_optimize (class loop *loop)
return false;
}
- max_cost
- = COSTS_N_INSNS (param_max_iterations_computation_cost);
- if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop))
- > max_cost)
- {
- if (dump_file)
- fprintf (dump_file,
- "Doloop: number of iterations too costly to compute.\n");
- return false;
- }
-
if (desc->const_iter)
iterations = widest_int::from (rtx_mode_t (desc->niter_expr, mode),
UNSIGNED);
@@ -716,12 +742,25 @@ doloop_optimize (class loop *loop)
/* Generate looping insn. If the pattern FAILs then give up trying
to modify the loop since there is some aspect the back-end does
- not like. */
- count = copy_rtx (desc->niter_expr);
+ not like. If this succeeds, there is a chance that the loop
+ desc->niter_expr has been altered by the backend, so only extract
+ that data after the gen_doloop_end. */
start_label = block_label (desc->in_edge->dest);
doloop_reg = gen_reg_rtx (mode);
rtx_insn *doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
+ max_cost
+ = COSTS_N_INSNS (param_max_iterations_computation_cost);
+ if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop))
+ > max_cost)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "Doloop: number of iterations too costly to compute.\n");
+ return false;
+ }
+
+ count = copy_rtx (desc->niter_expr);
word_mode_size = GET_MODE_PRECISION (word_mode);
word_mode_max = (HOST_WIDE_INT_1U << (word_mode_size - 1) << 1) - 1;
if (! doloop_seq
diff --git a/gcc/testsuite/gcc.target/arm/lob.h b/gcc/testsuite/gcc.target/arm/lob.h
index feaae7cc899..3941fe7a8b6 100644
--- a/gcc/testsuite/gcc.target/arm/lob.h
+++ b/gcc/testsuite/gcc.target/arm/lob.h
@@ -1,15 +1,131 @@
#include <string.h>
-
+#include <stdint.h>
/* Common code for lob tests. */
#define NO_LOB asm volatile ("@ clobber lr" : : : "lr" )
-#define N 10000
+#define N 100
+
+static void
+reset_data (int *a, int *b, int *c, int x)
+{
+ memset (a, -1, x * sizeof (*a));
+ memset (b, -1, x * sizeof (*b));
+ memset (c, 0, x * sizeof (*c));
+}
+
+static void
+reset_data8 (int8_t *a, int8_t *b, int8_t *c, int x)
+{
+ memset (a, -1, x * sizeof (*a));
+ memset (b, -1, x * sizeof (*b));
+ memset (c, 0, x * sizeof (*c));
+}
+
+static void
+reset_data16 (int16_t *a, int16_t *b, int16_t *c, int x)
+{
+ memset (a, -1, x * sizeof (*a));
+ memset (b, -1, x * sizeof (*b));
+ memset (c, 0, x * sizeof (*c));
+}
+
+static void
+reset_data32 (int32_t *a, int32_t *b, int32_t *c, int x)
+{
+ memset (a, -1, x * sizeof (*a));
+ memset (b, -1, x * sizeof (*b));
+ memset (c, 0, x * sizeof (*c));
+}
+
+static void
+reset_data64 (int64_t *a, int64_t *c, int x)
+{
+ memset (a, -1, x * sizeof (*a));
+ memset (c, 0, x * sizeof (*c));
+}
+
+static void
+check_plus (int *a, int *b, int *c, int x)
+{
+ for (int i = 0; i < N; i++)
+ {
+ NO_LOB;
+ if (i < x)
+ {
+ if (c[i] != (a[i] + b[i])) abort ();
+ }
+ else
+ {
+ if (c[i] != 0) abort ();
+ }
+ }
+}
+
+static void
+check_plus8 (int8_t *a, int8_t *b, int8_t *c, int x)
+{
+ for (int i = 0; i < N; i++)
+ {
+ NO_LOB;
+ if (i < x)
+ {
+ if (c[i] != (a[i] + b[i])) abort ();
+ }
+ else
+ {
+ if (c[i] != 0) abort ();
+ }
+ }
+}
+
+static void
+check_plus16 (int16_t *a, int16_t *b, int16_t *c, int x)
+{
+ for (int i = 0; i < N; i++)
+ {
+ NO_LOB;
+ if (i < x)
+ {
+ if (c[i] != (a[i] + b[i])) abort ();
+ }
+ else
+ {
+ if (c[i] != 0) abort ();
+ }
+ }
+}
+
+static void
+check_plus32 (int32_t *a, int32_t *b, int32_t *c, int x)
+{
+ for (int i = 0; i < N; i++)
+ {
+ NO_LOB;
+ if (i < x)
+ {
+ if (c[i] != (a[i] + b[i])) abort ();
+ }
+ else
+ {
+ if (c[i] != 0) abort ();
+ }
+ }
+}
static void
-reset_data (int *a, int *b, int *c)
+check_memcpy64 (int64_t *a, int64_t *c, int x)
{
- memset (a, -1, N * sizeof (*a));
- memset (b, -1, N * sizeof (*b));
- memset (c, -1, N * sizeof (*c));
+ for (int i = 0; i < N; i++)
+ {
+ NO_LOB;
+ if (i < x)
+ {
+ if (c[i] != a[i]) abort ();
+ }
+ else
+ {
+ if (c[i] != 0) abort ();
+ }
+ }
}
diff --git a/gcc/testsuite/gcc.target/arm/lob1.c b/gcc/testsuite/gcc.target/arm/lob1.c
index ba5c82cd55c..c8ce653a5c3 100644
--- a/gcc/testsuite/gcc.target/arm/lob1.c
+++ b/gcc/testsuite/gcc.target/arm/lob1.c
@@ -54,29 +54,18 @@ loop3 (int *a, int *b, int *c)
} while (i < N);
}
-void
-check (int *a, int *b, int *c)
-{
- for (int i = 0; i < N; i++)
- {
- NO_LOB;
- if (c[i] != a[i] + b[i])
- abort ();
- }
-}
-
int
main (void)
{
- reset_data (a, b, c);
+ reset_data (a, b, c, N);
loop1 (a, b ,c);
- check (a, b ,c);
- reset_data (a, b, c);
+ check_plus (a, b, c, N);
+ reset_data (a, b, c, N);
loop2 (a, b ,c);
- check (a, b ,c);
- reset_data (a, b, c);
+ check_plus (a, b, c, N);
+ reset_data (a, b, c, N);
loop3 (a, b ,c);
- check (a, b ,c);
+ check_plus (a, b, c, N);
return 0;
}
diff --git a/gcc/testsuite/gcc.target/arm/lob6.c b/gcc/testsuite/gcc.target/arm/lob6.c
index 17b6124295e..4fe116e2c2b 100644
--- a/gcc/testsuite/gcc.target/arm/lob6.c
+++ b/gcc/testsuite/gcc.target/arm/lob6.c
@@ -79,14 +79,14 @@ check (void)
int
main (void)
{
- reset_data (a1, b1, c1);
- reset_data (a2, b2, c2);
+ reset_data (a1, b1, c1, N);
+ reset_data (a2, b2, c2, N);
loop1 (a1, b1, c1);
ref1 (a2, b2, c2);
check ();
- reset_data (a1, b1, c1);
- reset_data (a2, b2, c2);
+ reset_data (a1, b1, c1, N);
+ reset_data (a2, b2, c2, N);
loop2 (a1, b1, c1);
ref2 (a2, b2, c2);
check ();
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm.c
new file mode 100644
index 00000000000..5ddd994e53d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm.c
@@ -0,0 +1,561 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-options "-O3 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include <arm_mve.h>
+
+#define IMM 5
+
+#define TEST_COMPILE_IN_DLSTP_TERNARY(BITS, LANES, LDRSTRYTPE, TYPE, SIGN, NAME, PRED) \
+void test_##NAME##PRED##_##SIGN##BITS (TYPE##BITS##_t *a, TYPE##BITS##_t *b, TYPE##BITS##_t *c, int n) \
+{ \
+ while (n > 0) \
+ { \
+ mve_pred16_t p = vctp##BITS##q (n); \
+ TYPE##BITS##x##LANES##_t va = vldr##LDRSTRYTPE##q_z_##SIGN##BITS (a, p); \
+ TYPE##BITS##x##LANES##_t vb = vldr##LDRSTRYTPE##q_z_##SIGN##BITS (b, p); \
+ TYPE##BITS##x##LANES##_t vc = NAME##PRED##_##SIGN##BITS (va, vb, p); \
+ vstr##LDRSTRYTPE##q_p_##SIGN##BITS (c, vc, p); \
+ c += LANES; \
+ a += LANES; \
+ b += LANES; \
+ n -= LANES; \
+ } \
+}
+
+#define TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY(BITS, LANES, LDRSTRYTPE, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY (BITS, LANES, LDRSTRYTPE, int, s, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY (BITS, LANES, LDRSTRYTPE, uint, u, NAME, PRED)
+
+#define TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY(NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY (8, 16, b, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY (16, 8, h, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY (32, 4, w, NAME, PRED)
+
+
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY (vaddq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY (vmulq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY (vsubq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY (vhaddq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY (vorrq, _x)
+
+
+#define TEST_COMPILE_IN_DLSTP_TERNARY_M(BITS, LANES, LDRSTRYTPE, TYPE, SIGN, NAME, PRED) \
+void test_##NAME##PRED##_##SIGN##BITS (TYPE##BITS##x##LANES##_t __inactive, TYPE##BITS##_t *a, TYPE##BITS##_t *b, TYPE##BITS##_t *c, int n) \
+{ \
+ while (n > 0) \
+ { \
+ mve_pred16_t p = vctp##BITS##q (n); \
+ TYPE##BITS##x##LANES##_t va = vldr##LDRSTRYTPE##q_z_##SIGN##BITS (a, p); \
+ TYPE##BITS##x##LANES##_t vb = vldr##LDRSTRYTPE##q_z_##SIGN##BITS (b, p); \
+ TYPE##BITS##x##LANES##_t vc = NAME##PRED##_##SIGN##BITS (__inactive, va, vb, p); \
+ vstr##LDRSTRYTPE##q_p_##SIGN##BITS (c, vc, p); \
+ c += LANES; \
+ a += LANES; \
+ b += LANES; \
+ n -= LANES; \
+ } \
+}
+
+#define TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M(BITS, LANES, LDRSTRYTPE, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY_M (BITS, LANES, LDRSTRYTPE, int, s, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY_M (BITS, LANES, LDRSTRYTPE, uint, u, NAME, PRED)
+
+#define TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M(NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M (8, 16, b, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M (16, 8, h, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M (32, 4, w, NAME, PRED)
+
+
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M (vaddq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M (vmulq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M (vsubq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M (vhaddq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M (vorrq, _m)
+
+#define TEST_COMPILE_IN_DLSTP_TERNARY_N(BITS, LANES, LDRSTRYTPE, TYPE, SIGN, NAME, PRED) \
+void test_##NAME##PRED##_n_##SIGN##BITS (TYPE##BITS##_t *a, TYPE##BITS##_t *c, int n) \
+{ \
+ while (n > 0) \
+ { \
+ mve_pred16_t p = vctp##BITS##q (n); \
+ TYPE##BITS##x##LANES##_t va = vldr##LDRSTRYTPE##q_z_##SIGN##BITS (a, p); \
+ TYPE##BITS##x##LANES##_t vc = NAME##PRED##_n_##SIGN##BITS (va, IMM, p); \
+ vstr##LDRSTRYTPE##q_p_##SIGN##BITS (c, vc, p); \
+ c += LANES; \
+ a += LANES; \
+ n -= LANES; \
+ } \
+}
+
+#define TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_N(BITS, LANES, LDRSTRYTPE, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY_N (BITS, LANES, LDRSTRYTPE, int, s, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY_N (BITS, LANES, LDRSTRYTPE, uint, u, NAME, PRED)
+
+#define TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N(NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_N (8, 16, b, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_N (16, 8, h, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_N (32, 4, w, NAME, PRED)
+
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vaddq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vmulq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vsubq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vhaddq, _x)
+
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vbrsrq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vshlq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vshrq, _x)
+
+#define TEST_COMPILE_IN_DLSTP_TERNARY_M_N(BITS, LANES, LDRSTRYTPE, TYPE, SIGN, NAME, PRED) \
+void test_##NAME##PRED##_n_##SIGN##BITS (TYPE##BITS##x##LANES##_t __inactive, TYPE##BITS##_t *a, TYPE##BITS##_t *c, int n) \
+{ \
+ while (n > 0) \
+ { \
+ mve_pred16_t p = vctp##BITS##q (n); \
+ TYPE##BITS##x##LANES##_t va = vldr##LDRSTRYTPE##q_z_##SIGN##BITS (a, p); \
+ TYPE##BITS##x##LANES##_t vc = NAME##PRED##_n_##SIGN##BITS (__inactive, va, IMM, p); \
+ vstr##LDRSTRYTPE##q_p_##SIGN##BITS (c, vc, p); \
+ c += LANES; \
+ a += LANES; \
+ n -= LANES; \
+ } \
+}
+
+#define TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M_N(BITS, LANES, LDRSTRYTPE, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY_M_N (BITS, LANES, LDRSTRYTPE, int, s, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY_M_N (BITS, LANES, LDRSTRYTPE, uint, u, NAME, PRED)
+
+#define TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N(NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M_N (8, 16, b, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M_N (16, 8, h, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M_N (32, 4, w, NAME, PRED)
+
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vaddq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vmulq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vsubq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vhaddq, _m)
+
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vbrsrq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vshlq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vshrq, _m)
+
+/* Now test some more configurations. */
+
+/* Using a >=1 condition. */
+void test1 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n >= 1)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c+=4;
+ a+=4;
+ b+=4;
+ n-=4;
+ }
+}
+
+/* Test a for loop format of decrementing to zero */
+int32_t a[] = {0, 1, 2, 3, 4, 5, 6, 7};
+void test2 (int32_t *b, int num_elems)
+{
+ for (int i = num_elems; i > 0; i-= 4)
+ {
+ mve_pred16_t p = vctp32q (i);
+ int32x4_t va = vldrwq_z_s32 (&(a[i]), p);
+ vstrwq_p_s32 (b + i, va, p);
+ }
+}
+
+/* Iteration counter counting up to num_iter. */
+void test3 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ int num_iter = (n + 15)/16;
+ for (int i = 0; i < num_iter; i++)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n-=16;
+ }
+}
+
+/* Iteration counter counting down from num_iter. */
+void test4 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ int num_iter = (n + 15)/16;
+ for (int i = num_iter; i > 0; i--)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n-=16;
+ }
+}
+
+/* Using an unpredicated arithmetic instruction within the loop. */
+void test5 (uint8_t *a, uint8_t *b, uint8_t *c, uint8_t *d, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_u8 (b);
+ /* Is affected by implicit predication, because vb also
+ came from an unpredicated load, but there is no functional
+ problem, because the result is used in a predicated store. */
+ uint8x16_t vc = vaddq_u8 (va, vb);
+ uint8x16_t vd = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ vstrbq_p_u8 (d, vd, p);
+ n-=16;
+ }
+}
+
+/* Using a different VPR value for one instruction in the loop. */
+void test6 (int32_t *a, int32_t *b, int32_t *c, int n, mve_pred16_t p1)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p1);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Generating and using another VPR value in the loop, with a vctp.
+ The doloop logic will always try to do the transform on the first
+ vctp it encounters, so this is still expected to work. */
+void test7 (int32_t *a, int32_t *b, int32_t *c, int n, int g)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ mve_pred16_t p1 = vctp32q (g);
+ int32x4_t vb = vldrwq_z_s32 (b, p1);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Generating and using a different VPR value in the loop, with a vctp,
+ but this time the p1 will also change in every loop (still fine) */
+void test8 (int32_t *a, int32_t *b, int32_t *c, int n, int g)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ mve_pred16_t p1 = vctp32q (g);
+ int32x4_t vb = vldrwq_z_s32 (b, p1);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ g++;
+ }
+}
+
+/* Generating and using a different VPR value in the loop, with a vctp_m
+ that is independent of the loop vctp VPR. */
+void test9 (int32_t *a, int32_t *b, int32_t *c, int n, mve_pred16_t p1)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ mve_pred16_t p2 = vctp32q_m (n, p1);
+ int32x4_t vb = vldrwq_z_s32 (b, p1);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p2);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Generating and using a different VPR value in the loop,
+ with a vctp_m that is tied to the base vctp VPR. This
+ is still fine, because the vctp_m will be transformed
+ into a vctp and be implicitly predicated. */
+void test10 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ mve_pred16_t p1 = vctp32q_m (n, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p1);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p1);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Generating and using a different VPR value in the loop, with a vcmp. */
+void test11 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ mve_pred16_t p1 = vcmpeqq_s32 (va, vb);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p1);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Generating and using a different VPR value in the loop, with a vcmp_m. */
+void test12 (int32_t *a, int32_t *b, int32_t *c, int n, mve_pred16_t p1)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ mve_pred16_t p2 = vcmpeqq_m_s32 (va, vb, p1);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p2);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Generating and using a different VPR value in the loop, with a vcmp_m
+ that is tied to the base vctp VPR (same as above, this will be turned
+ into a vcmp and be implicitly predicated). */
+void test13 (int32_t *a, int32_t *b, int32_t *c, int n, mve_pred16_t p1)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ mve_pred16_t p2 = vcmpeqq_m_s32 (va, vb, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p2);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using an unpredicated op with a scalar output, where the result is valid
+ outside the bb. This is valid, because all the inputs to the unpredicated
+ op are correctly predicated. */
+uint8_t test14 (uint8_t *a, uint8_t *b, uint8_t *c, int n, uint8x16_t vx)
+{
+ uint8_t sum = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_m_u8 (vx, va, vb, p);
+ sum += vaddvq_u8 (vc);
+ a += 16;
+ b += 16;
+ n -= 16;
+ }
+ return sum;
+}
+
+/* Same as above, but with another scalar op between the unpredicated op and
+ the scalar op outside the loop. */
+uint8_t test15 (uint8_t *a, uint8_t *b, uint8_t *c, int n, uint8x16_t vx, int g)
+{
+ uint8_t sum = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_m_u8 (vx, va, vb, p);
+ sum += vaddvq_u8 (vc);
+ sum += g;
+ a += 16;
+ b += 16;
+ n -= 16;
+ }
+ return sum;
+}
+
+/* Using an unpredicated vcmp to generate a new predicate value in the
+ loop and then using it in a predicated store insn. */
+void test16 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_s32 (b);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ mve_pred16_t p1 = vcmpeqq_s32 (va, vc);
+ vstrwq_p_s32 (c, vc, p1);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using a predicated vcmp to generate a new predicate value in the
+ loop and then using it in a predicated store insn. */
+void test17 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_s32 (va, vb);
+ mve_pred16_t p1 = vcmpeqq_m_s32 (va, vc, p);
+ vstrwq_p_s32 (c, vc, p1);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using an across-vector unpredicated instruction in a valid way.
+ This tests that "vc" has correctly masked the risky "vb". */
+uint16_t test18 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16_t res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ uint16x8_t vc = vaddq_x_u16 (va, vb, p);
+ res = vaddvq_u16 (vc);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+/* Using an across-vector unpredicated instruction with a scalar from outside the loop. */
+uint16_t test19 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16_t res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ uint16x8_t vc = vaddq_x_u16 (va, vb, p);
+ res = vaddvaq_u16 (res, vc);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+/* Using an across-vector predicated instruction in a valid way. */
+uint16_t test20 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16_t res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ res = vaddvaq_p_u16 (res, vb, p);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+/* Using an across-vector predicated instruction in a valid way. */
+uint16_t test21 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16_t res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ res++;
+ res = vaddvaq_p_u16 (res, vb, p);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+int test22 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ int res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ res = vmaxvq (res, va);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+int test23 (int8_t *a, int8_t *b, int8_t *c, int n)
+{
+ int res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ int8x16_t va = vldrbq_z_s8 (a, p);
+ res = vmaxavq (res, va);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+/* The final number of DLSTPs currently is calculated by the number of
+ `TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY.*` macros * 6 + 23. */
+/* { dg-final { scan-assembler-times {\tdlstp} 167 } } */
+/* { dg-final { scan-assembler-times {\tletp} 167 } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8-run.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8-run.c
new file mode 100644
index 00000000000..6966a396604
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8-run.c
@@ -0,0 +1,44 @@
+/* { dg-do run { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-require-effective-target arm_mve_hw } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+#include "dlstp-int16x8.c"
+
+int main ()
+{
+ int i;
+ int16_t temp1[N];
+ int16_t temp2[N];
+ int16_t temp3[N];
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 0);
+ check_plus16 (temp1, temp2, temp3, 0);
+
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 1);
+ check_plus16 (temp1, temp2, temp3, 1);
+
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 7);
+ check_plus16 (temp1, temp2, temp3, 7);
+
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 8);
+ check_plus16 (temp1, temp2, temp3, 8);
+
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 9);
+ check_plus16 (temp1, temp2, temp3, 9);
+
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 16);
+ check_plus16 (temp1, temp2, temp3, 16);
+
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 17);
+ check_plus16 (temp1, temp2, temp3, 17);
+
+ reset_data16 (temp1, temp2, temp3, N);
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8.c
new file mode 100644
index 00000000000..33632c5f14d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8.c
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include <arm_mve.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "../lob.h"
+
+void __attribute__ ((noinline)) test (int16_t *a, int16_t *b, int16_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ int16x8_t va = vldrhq_z_s16 (a, p);
+ int16x8_t vb = vldrhq_z_s16 (b, p);
+ int16x8_t vc = vaddq_x_s16 (va, vb, p);
+ vstrhq_p_s16 (c, vc, p);
+ c+=8;
+ a+=8;
+ b+=8;
+ n-=8;
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tdlstp.16} 1 } } */
+/* { dg-final { scan-assembler-times {\tletp} 1 } } */
+/* { dg-final { scan-assembler-not "\tvctp" } } */
+/* { dg-final { scan-assembler-not "\tvpst" } } */
+/* { dg-final { scan-assembler-not "p0" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4-run.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4-run.c
new file mode 100644
index 00000000000..6833dddde92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4-run.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-require-effective-target arm_mve_hw } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include "dlstp-int32x4.c"
+
+int main ()
+{
+ int i;
+ int32_t temp1[N];
+ int32_t temp2[N];
+ int32_t temp3[N];
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 0);
+ check_plus32 (temp1, temp2, temp3, 0);
+
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 1);
+ check_plus32 (temp1, temp2, temp3, 1);
+
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 3);
+ check_plus32 (temp1, temp2, temp3, 3);
+
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 4);
+ check_plus32 (temp1, temp2, temp3, 4);
+
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 5);
+ check_plus32 (temp1, temp2, temp3, 5);
+
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 8);
+ check_plus32 (temp1, temp2, temp3, 8);
+
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 9);
+ check_plus32 (temp1, temp2, temp3, 9);
+
+ reset_data32 (temp1, temp2, temp3, N);
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4.c
new file mode 100644
index 00000000000..5d09f784b77
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4.c
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include <arm_mve.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "../lob.h"
+
+void __attribute__ ((noinline)) test (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c+=4;
+ a+=4;
+ b+=4;
+ n-=4;
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tdlstp.32} 1 } } */
+/* { dg-final { scan-assembler-times {\tletp} 1 } } */
+/* { dg-final { scan-assembler-not "\tvctp" } } */
+/* { dg-final { scan-assembler-not "\tvpst" } } */
+/* { dg-final { scan-assembler-not "p0" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int64x2-run.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int64x2-run.c
new file mode 100644
index 00000000000..cc0b9ce7ee9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int64x2-run.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-require-effective-target arm_mve_hw } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include "dlstp-int64x2.c"
+
+int main ()
+{
+ int i;
+ int64_t temp1[N];
+ int64_t temp3[N];
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 0);
+ check_memcpy64 (temp1, temp3, 0);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 1);
+ check_memcpy64 (temp1, temp3, 1);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 2);
+ check_memcpy64 (temp1, temp3, 2);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 3);
+ check_memcpy64 (temp1, temp3, 3);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 4);
+ check_memcpy64 (temp1, temp3, 4);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 5);
+ check_memcpy64 (temp1, temp3, 5);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 6);
+ check_memcpy64 (temp1, temp3, 6);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 7);
+ check_memcpy64 (temp1, temp3, 7);
+
+ reset_data64 (temp1, temp3, N);
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int64x2.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int64x2.c
new file mode 100644
index 00000000000..21e882424ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int64x2.c
@@ -0,0 +1,28 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include <arm_mve.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "../lob.h"
+
+void __attribute__ ((noinline)) test (int64_t *a, int64_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp64q (n);
+ int64x2_t va = vldrdq_gather_offset_z_s64 (a, vcreateq_u64 (0, 8), p);
+ vstrdq_scatter_offset_p_s64 (c, vcreateq_u64 (0, 8), va, p);
+ c+=2;
+ a+=2;
+ n-=2;
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tdlstp.64} 1 } } */
+/* { dg-final { scan-assembler-times {\tletp} 1 } } */
+/* { dg-final { scan-assembler-not "\tvctp" } } */
+/* { dg-final { scan-assembler-not "\tvpst" } } */
+/* { dg-final { scan-assembler-not "p0" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int8x16.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int8x16.c
new file mode 100644
index 00000000000..8ea181c82d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int8x16.c
@@ -0,0 +1,69 @@
+/* { dg-do run { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-require-effective-target arm_mve_hw } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include <arm_mve.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "../lob.h"
+
+void __attribute__ ((noinline)) test (int8_t *a, int8_t *b, int8_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ int8x16_t va = vldrbq_z_s8 (a, p);
+ int8x16_t vb = vldrbq_z_s8 (b, p);
+ int8x16_t vc = vaddq_x_s8 (va, vb, p);
+ vstrbq_p_s8 (c, vc, p);
+ c+=16;
+ a+=16;
+ b+=16;
+ n-=16;
+ }
+}
+
+int main ()
+{
+ int i;
+ int8_t temp1[N];
+ int8_t temp2[N];
+ int8_t temp3[N];
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 0);
+ check_plus8 (temp1, temp2, temp3, 0);
+
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 1);
+ check_plus8 (temp1, temp2, temp3, 1);
+
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 15);
+ check_plus8 (temp1, temp2, temp3, 15);
+
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 16);
+ check_plus8 (temp1, temp2, temp3, 16);
+
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 17);
+ check_plus8 (temp1, temp2, temp3, 17);
+
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 32);
+ check_plus8 (temp1, temp2, temp3, 32);
+
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 33);
+ check_plus8 (temp1, temp2, temp3, 33);
+
+ reset_data8 (temp1, temp2, temp3, N);
+}
+
+/* { dg-final { scan-assembler-times {\tdlstp.8} 1 } } */
+/* { dg-final { scan-assembler-times {\tletp} 1 } } */
+/* { dg-final { scan-assembler-not "\tvctp" } } */
+/* { dg-final { scan-assembler-not "\tvpst" } } */
+/* { dg-final { scan-assembler-not "p0" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c
new file mode 100644
index 00000000000..f7c3e04f883
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c
@@ -0,0 +1,391 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-options "-O3 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include <limits.h>
+#include <arm_mve.h>
+
+/* Terminating on a non-zero number of elements. */
+void test0 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ while (n > 1)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n -= 16;
+ }
+}
+
+/* Terminating on n >= 0. */
+void test1 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ while (n >= 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n -= 16;
+ }
+}
+
+/* Similar, terminating on a non-zero number of elements, but in a for loop
+ format. */
+int32_t a[] = {0, 1, 2, 3, 4, 5, 6, 7};
+void test2 (int32_t *b, int num_elems)
+{
+ for (int i = num_elems; i >= 2; i-= 4)
+ {
+ mve_pred16_t p = vctp32q (i);
+ int32x4_t va = vldrwq_z_s32 (&(a[i]), p);
+ vstrwq_p_s32 (b + i, va, p);
+ }
+}
+
+/* Iteration counter counting up to num_iter, with a non-zero starting num. */
+void test3 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ int num_iter = (n + 15)/16;
+ for (int i = 1; i < num_iter; i++)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n -= 16;
+ }
+}
+
+/* Iteration counter counting up to num_iter, with a larger increment */
+void test4 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ int num_iter = (n + 15)/16;
+ for (int i = 0; i < num_iter; i+=2)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n -= 16;
+ }
+}
+
+/* Using an unpredicated store instruction within the loop. */
+void test5 (uint8_t *a, uint8_t *b, uint8_t *c, uint8_t *d, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_u8 (va, vb);
+ uint8x16_t vd = vaddq_x_u8 (va, vb, p);
+ vstrbq_u8 (d, vd);
+ n -= 16;
+ }
+}
+
+/* Using an unpredicated store outside the loop. */
+void test6 (uint8_t *a, uint8_t *b, uint8_t *c, int n, uint8x16_t vx)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_m_u8 (vx, va, vb, p);
+ vx = vaddq_u8 (vx, vc);
+ a += 16;
+ b += 16;
+ n -= 16;
+ }
+ vstrbq_u8 (c, vx);
+}
+
+/* Using a VPR that gets modified within the loop. */
+void test9 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ p++;
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using a VPR that gets re-generated within the loop. */
+void test10 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ mve_pred16_t p = vctp32q (n);
+ while (n > 0)
+ {
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ p = vctp32q (n);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using vctp32q_m instead of vctp32q. */
+void test11 (int32_t *a, int32_t *b, int32_t *c, int n, mve_pred16_t p0)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q_m (n, p0);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using an unpredicated op with a scalar output, where the result is valid
+ outside the bb. This is invalid, because one of the inputs to the
+ unpredicated op is also unpredicated. */
+uint8_t test12 (uint8_t *a, uint8_t *b, uint8_t *c, int n, uint8x16_t vx)
+{
+ uint8_t sum = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_u8 (b);
+ uint8x16_t vc = vaddq_u8 (va, vb);
+ sum += vaddvq_u8 (vc);
+ a += 16;
+ b += 16;
+ n -= 16;
+ }
+ return sum;
+}
+
+/* Using an unpredicated vcmp to generate a new predicate value in the
+ loop and then using that VPR to predicate a store insn. */
+void test13 (int32_t *a, int32_t *b, int32x4_t vc, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_s32 (a);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_s32 (va, vb);
+ mve_pred16_t p1 = vcmpeqq_s32 (va, vc);
+ vstrwq_p_s32 (c, vc, p1);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using an across-vector unpredicated instruction. "vb" is the risk. */
+uint16_t test14 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16_t res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ vb = vaddq_u16 (va, vb);
+ res = vaddvq_u16 (vb);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+/* Using an across-vector unpredicated instruction. "vc" is the risk. */
+uint16_t test15 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16_t res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ uint16x8_t vc = vaddq_u16 (va, vb);
+ res = vaddvaq_u16 (res, vc);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+uint16_t test16 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16_t res =0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ res = vaddvaq_u16 (res, vb);
+ res = vaddvaq_p_u16 (res, va, p);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+int test17 (int8_t *a, int8_t *b, int8_t *c, int n)
+{
+ int res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ int8x16_t va = vldrbq_z_s8 (a, p);
+ res = vmaxvq (res, va);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+
+
+int test18 (int8_t *a, int8_t *b, int8_t *c, int n)
+{
+ int res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ int8x16_t va = vldrbq_z_s8 (a, p);
+ res = vminvq (res, va);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+int test19 (int8_t *a, int8_t *b, int8_t *c, int n)
+{
+ int res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ int8x16_t va = vldrbq_z_s8 (a, p);
+ res = vminavq (res, va);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+int test20 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ int res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ res = vminvq (res, va);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+uint8x16_t test21 (uint8_t *a, uint32_t *b, int n, uint8x16_t res)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ res = vshlcq_u8 (va, b, 1);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+int8x16_t test22 (int8_t *a, int32_t *b, int n, int8x16_t res)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ int8x16_t va = vldrbq_z_s8 (a, p);
+ res = vshlcq_s8 (va, b, 1);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+/* Using an unsigned number of elements to count down from, with a >0*/
+void test23 (int32_t *a, int32_t *b, int32_t *c, unsigned int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c+=4;
+ a+=4;
+ b+=4;
+ n-=4;
+ }
+}
+
+/* Using an unsigned number of elements to count up to, with a <n*/
+void test24 (uint8_t *a, uint8_t *b, uint8_t *c, unsigned int n)
+{
+ for (int i = 0; i < n; i+=16)
+ {
+ mve_pred16_t p = vctp8q (n-i);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n-=16;
+ }
+}
+
+
+/* Using an unsigned number of elements to count up to, with a <=n*/
+void test25 (uint8_t *a, uint8_t *b, uint8_t *c, unsigned int n)
+{
+ for (int i = 1; i <= n; i+=16)
+ {
+ mve_pred16_t p = vctp8q (n-i+1);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n-=16;
+ }
+}
+
+/* { dg-final { scan-assembler-not "\tdlstp" } } */
+/* { dg-final { scan-assembler-not "\tletp" } } */
\ No newline at end of file
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] arm: Add define_attr to to create a mapping between MVE predicated and unpredicated insns
2023-12-18 11:53 ` [PATCH 1/2] arm: Add define_attr to to create a mapping between MVE predicated and unpredicated insns Andre Vieira
@ 2023-12-20 16:54 ` Andre Vieira (lists)
0 siblings, 0 replies; 5+ messages in thread
From: Andre Vieira (lists) @ 2023-12-20 16:54 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard.Earnshaw, Stam Markianos-Wright
[-- Attachment #1: Type: text/plain, Size: 377 bytes --]
Reworked patch after Richard's comments and moved
predicated_doloop_end_internal and dlstp*_insn to the next patch in the
series to make sure this one builds on its own.
On 18/12/2023 11:53, Andre Vieira wrote:
>
> Re-sending Stam's first patch, same as:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635301.html
>
> Hopefully patchworks can pick this up :)
>
[-- Attachment #2: 0001-arm-Add-define_attr-to-to-create-a-mapping-between-M_v2.patch --]
[-- Type: text/plain, Size: 103870 bytes --]
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index a9c2752c0ea5ecd4597ded254e9426753ac0a098..f0b01b7461f883994a0be137cb6cbf079d54618b 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -2375,6 +2375,21 @@ extern int making_const_table;
else if (TARGET_THUMB1) \
thumb1_final_prescan_insn (INSN)
+/* These defines are useful to refer to the value of the mve_unpredicated_insn
+ insn attribute. Note that, because these use the get_attr_* function, these
+ will change recog_data if (INSN) isn't current_insn. */
+#define MVE_VPT_PREDICABLE_INSN_P(INSN) \
+ (recog_memoized (INSN) >= 0 \
+ && get_attr_mve_unpredicated_insn (INSN) != CODE_FOR_nothing)
+
+#define MVE_VPT_PREDICATED_INSN_P(INSN) \
+ (MVE_VPT_PREDICABLE_INSN_P (INSN) \
+ && recog_memoized (INSN) != get_attr_mve_unpredicated_insn (INSN))
+
+#define MVE_VPT_UNPREDICATED_INSN_P(INSN) \
+ (MVE_VPT_PREDICABLE_INSN_P (INSN) \
+ && recog_memoized (INSN) == get_attr_mve_unpredicated_insn (INSN))
+
#define ARM_SIGN_EXTEND(x) ((HOST_WIDE_INT) \
(HOST_BITS_PER_WIDE_INT <= 32 ? (unsigned HOST_WIDE_INT) (x) \
: ((((unsigned HOST_WIDE_INT)(x)) & (unsigned HOST_WIDE_INT) 0xffffffff) |\
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 07eaf06cdeace750fe1c7d399deb833ef5fc2b66..296212be33ffe6397b05491d8854d2a59f7c54df 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -124,6 +124,12 @@ (define_attr "fpu" "none,vfp"
; and not all ARM insns do.
(define_attr "predicated" "yes,no" (const_string "no"))
+; An attribute that encodes the CODE_FOR_<insn> of the MVE VPT unpredicated
+; version of a VPT-predicated instruction. For unpredicated instructions
+; that are predicable, encode the same pattern's CODE_FOR_<insn> as a way to
+; encode that it is a predicable instruction.
+(define_attr "mve_unpredicated_insn" "" (symbol_ref "CODE_FOR_nothing"))
+
; LENGTH of an instruction (in bytes)
(define_attr "length" ""
(const_int 4))
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index a980353810166312d5bdfc8ad58b2825c910d0a0..5ea2d9e866891bdb3dc73fcf6cbd6cdd2f989951 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -2305,6 +2305,7 @@ (define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8")
(define_int_attr mmla_sfx [(UNSPEC_MATMUL_S "s8") (UNSPEC_MATMUL_U "u8")
(UNSPEC_MATMUL_US "s8")])
+
;;MVE int attribute.
(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
(VREV16Q_U "u") (VMVNQ_N_S "s") (VMVNQ_N_U "u")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index b0d3443da9cee991193d390200738290806a1e69..b1862d7977e91605cd971e634105bed3fa6e75cb 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -17,7 +17,7 @@
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
-(define_insn "*mve_mov<mode>"
+(define_insn "mve_mov<mode>"
[(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w , w, r,Ux,w")
(match_operand:MVE_types 1 "general_operand" " w,r,w,DnDm,UxUi,r,w, Ul"))]
"TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
@@ -81,18 +81,27 @@ (define_insn "*mve_mov<mode>"
return "";
}
}
- [(set_attr "type" "mve_move,mve_move,mve_move,mve_move,mve_load,multiple,mve_store,mve_load")
+ [(set_attr_alternative "mve_unpredicated_insn" [(symbol_ref "CODE_FOR_mve_mov<mode>")
+ (symbol_ref "CODE_FOR_nothing")
+ (symbol_ref "CODE_FOR_nothing")
+ (symbol_ref "CODE_FOR_mve_mov<mode>")
+ (symbol_ref "CODE_FOR_mve_mov<mode>")
+ (symbol_ref "CODE_FOR_nothing")
+ (symbol_ref "CODE_FOR_mve_mov<mode>")
+ (symbol_ref "CODE_FOR_nothing")])
+ (set_attr "type" "mve_move,mve_move,mve_move,mve_move,mve_load,multiple,mve_store,mve_load")
(set_attr "length" "4,8,8,4,4,8,4,8")
(set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*")
(set_attr "neg_pool_range" "*,*,*,*,996,*,*,*")])
-(define_insn "*mve_vdup<mode>"
+(define_insn "mve_vdup<mode>"
[(set (match_operand:MVE_vecs 0 "s_register_operand" "=w")
(vec_duplicate:MVE_vecs
(match_operand:<V_elem> 1 "s_register_operand" "r")))]
"TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
"vdup.<V_sz_elem>\t%q0, %1"
- [(set_attr "length" "4")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vdup<mode>"))
+ (set_attr "length" "4")
(set_attr "type" "mve_move")])
;;
@@ -145,7 +154,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_mnemo>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -159,7 +169,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -173,7 +184,8 @@ (define_insn "mve_v<absneg_str>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"v<absneg_str>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_v<absneg_str>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -187,7 +199,8 @@ (define_insn "@mve_<mve_insn>q_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.%#<V_sz_elem>\t%q0, %1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -201,7 +214,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
;; [vcvttq_f32_f16])
@@ -214,7 +228,8 @@ (define_insn "mve_vcvttq_f32_f16v4sf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtt.f32.f16\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvttq_f32_f16v4sf"))
+ (set_attr "type" "mve_move")
])
;;
@@ -228,7 +243,8 @@ (define_insn "mve_vcvtbq_f32_f16v4sf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtb.f32.f16\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtbq_f32_f16v4sf"))
+ (set_attr "type" "mve_move")
])
;;
@@ -242,7 +258,8 @@ (define_insn "mve_vcvtq_to_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_to_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -256,7 +273,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -270,7 +288,8 @@ (define_insn "mve_vcvtq_from_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_from_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -284,7 +303,8 @@ (define_insn "mve_v<absneg_str>q_s<mode>"
]
"TARGET_HAVE_MVE"
"v<absneg_str>.s%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_v<absneg_str>q_s<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -297,7 +317,8 @@ (define_insn "mve_vmvnq_u<mode>"
]
"TARGET_HAVE_MVE"
"vmvn\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vmvnq_u<mode>"))
+ (set_attr "type" "mve_move")
])
(define_expand "mve_vmvnq_s<mode>"
[
@@ -318,7 +339,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.%#<V_sz_elem>\t%q0, %1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -331,7 +353,8 @@ (define_insn "@mve_vclzq_s<mode>"
]
"TARGET_HAVE_MVE"
"vclz.i%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vclzq_s<mode>"))
+ (set_attr "type" "mve_move")
])
(define_expand "mve_vclzq_u<mode>"
[
@@ -354,7 +377,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -368,7 +392,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -382,7 +407,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -397,7 +423,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -411,7 +438,8 @@ (define_insn "mve_vcvtpq_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtp.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtpq_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -425,7 +453,8 @@ (define_insn "mve_vcvtnq_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtn.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtnq_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -439,7 +468,8 @@ (define_insn "mve_vcvtmq_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtm.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtmq_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -453,7 +483,8 @@ (define_insn "mve_vcvtaq_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvta.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtaq_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -467,7 +498,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.i%#<V_sz_elem>\t%q0, %1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -481,7 +513,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<V_sz_elem>\t%q0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -495,7 +528,8 @@ (define_insn "@mve_<mve_insn>q_<supf>v4si"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>32\t%Q0, %R0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
])
;;
@@ -509,7 +543,8 @@ (define_insn "mve_vctp<MVE_vctp>q<MVE_vpred>"
]
"TARGET_HAVE_MVE"
"vctp.<MVE_vctp>\t%1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vctp<MVE_vctp>q<MVE_vpred>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -523,7 +558,8 @@ (define_insn "mve_vpnotv16bi"
]
"TARGET_HAVE_MVE"
"vpnot"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vpnotv16bi"))
+ (set_attr "type" "mve_move")
])
;;
@@ -538,7 +574,8 @@ (define_insn "@mve_<mve_insn>q_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -553,7 +590,8 @@ (define_insn "mve_vcvtq_n_to_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvt.f<V_sz_elem>.<supf><V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_n_to_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;; [vcreateq_f])
@@ -599,7 +637,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf><V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;; Versions that take constant vectors as operand 2 (with all elements
@@ -617,7 +656,8 @@ (define_insn "mve_vshrq_n_s<mode>_imm"
VALID_NEON_QREG_MODE (<MODE>mode),
true);
}
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vshrq_n_s<mode>_imm"))
+ (set_attr "type" "mve_move")
])
(define_insn "mve_vshrq_n_u<mode>_imm"
[
@@ -632,7 +672,8 @@ (define_insn "mve_vshrq_n_u<mode>_imm"
VALID_NEON_QREG_MODE (<MODE>mode),
true);
}
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vshrq_n_u<mode>_imm"))
+ (set_attr "type" "mve_move")
])
;;
@@ -647,7 +688,8 @@ (define_insn "mve_vcvtq_n_from_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvt.<supf><V_sz_elem>.f<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_n_from_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -662,8 +704,9 @@ (define_insn "@mve_<mve_insn>q_p_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q1"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [vcmpneq_, vcmpcsq_, vcmpeqq_, vcmpgeq_, vcmpgtq_, vcmphiq_, vcmpleq_, vcmpltq_])
@@ -676,7 +719,8 @@ (define_insn "@mve_vcmp<mve_cmp_op>q_<mode>"
]
"TARGET_HAVE_MVE"
"vcmp.<mve_cmp_type>%#<V_sz_elem>\t<mve_cmp_op>, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op>q_<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -691,7 +735,8 @@ (define_insn "@mve_vcmp<mve_cmp_op>q_n_<mode>"
]
"TARGET_HAVE_MVE"
"vcmp.<mve_cmp_type>%#<V_sz_elem> <mve_cmp_op>, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op>q_n_<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -722,7 +767,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -739,7 +785,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.i%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -754,7 +801,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -769,7 +817,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q1"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -789,8 +838,11 @@ (define_insn "mve_vandq_u<mode>"
"@
vand\t%q0, %q1, %q2
* return neon_output_logic_immediate (\"vand\", &operands[2], <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));"
- [(set_attr "type" "mve_move")
+ [(set_attr_alternative "mve_unpredicated_insn" [(symbol_ref "CODE_FOR_mve_vandq_u<mode>")
+ (symbol_ref "CODE_FOR_nothing")])
+ (set_attr "type" "mve_move")
])
+
(define_expand "mve_vandq_s<mode>"
[
(set (match_operand:MVE_2 0 "s_register_operand")
@@ -811,7 +863,8 @@ (define_insn "mve_vbicq_u<mode>"
]
"TARGET_HAVE_MVE"
"vbic\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vbicq_u<mode>"))
+ (set_attr "type" "mve_move")
])
(define_expand "mve_vbicq_s<mode>"
@@ -835,7 +888,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -853,7 +907,8 @@ (define_insn "@mve_<mve_insn>q<mve_rot>_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<isu>%#<V_sz_elem>\t%q0, %q1, %q2, #<rot>"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;; Auto vectorizer pattern for int vcadd
@@ -876,7 +931,8 @@ (define_insn "mve_veorq_u<mode>"
]
"TARGET_HAVE_MVE"
"veor\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_veorq_u<mode>"))
+ (set_attr "type" "mve_move")
])
(define_expand "mve_veorq_s<mode>"
[
@@ -904,7 +960,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -920,7 +977,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.s%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -935,7 +993,8 @@ (define_insn "mve_<max_min_su_str>q_<max_min_supf><mode>"
]
"TARGET_HAVE_MVE"
"<max_min_su_str>.<max_min_supf>%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<max_min_su_str>q_<max_min_supf><mode>"))
+ (set_attr "type" "mve_move")
])
@@ -954,7 +1013,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -972,7 +1032,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -988,7 +1049,8 @@ (define_insn "@mve_<mve_insn>q_int_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<isu>%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_int_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1004,7 +1066,8 @@ (define_insn "mve_<mve_addsubmul>q<mode>"
]
"TARGET_HAVE_MVE"
"<mve_addsubmul>.i%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_addsubmul>q<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1018,7 +1081,8 @@ (define_insn "mve_vornq_s<mode>"
]
"TARGET_HAVE_MVE"
"vorn\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vornq_s<mode>"))
+ (set_attr "type" "mve_move")
])
(define_expand "mve_vornq_u<mode>"
@@ -1047,7 +1111,8 @@ (define_insn "mve_vorrq_s<mode>"
"@
vorr\t%q0, %q1, %q2
* return neon_output_logic_immediate (\"vorr\", &operands[2], <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vorrq_s<mode>"))
+ (set_attr "type" "mve_move")
])
(define_expand "mve_vorrq_u<mode>"
[
@@ -1071,7 +1136,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1087,7 +1153,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1103,7 +1170,8 @@ (define_insn "@mve_<mve_insn>q_r_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_r_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1118,7 +1186,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1133,7 +1202,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1148,7 +1218,8 @@ (define_insn "@mve_<mve_insn>q_<supf>v4si"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>32\t%Q0, %R0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1165,7 +1236,8 @@ (define_insn "@mve_<mve_insn>q_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1179,7 +1251,8 @@ (define_insn "mve_vandq_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vand\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vandq_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1193,7 +1266,8 @@ (define_insn "mve_vbicq_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vbic\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vbicq_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1209,7 +1283,8 @@ (define_insn "@mve_<mve_insn>q<mve_rot>_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%q0, %q1, %q2, #<rot>"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1223,7 +1298,8 @@ (define_insn "@mve_vcmp<mve_cmp_op>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcmp.f%#<V_sz_elem> <mve_cmp_op>, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1238,7 +1314,8 @@ (define_insn "@mve_vcmp<mve_cmp_op>q_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcmp.f%#<V_sz_elem> <mve_cmp_op>, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1253,8 +1330,10 @@ (define_insn "mve_vctp<MVE_vctp>q_m<MVE_vpred>"
]
"TARGET_HAVE_MVE"
"vpst\;vctpt.<MVE_vctp>\t%1"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vctp<MVE_vctp>q<MVE_vpred>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")
+])
;;
;; [vcvtbq_f16_f32])
@@ -1268,7 +1347,8 @@ (define_insn "mve_vcvtbq_f16_f32v8hf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtb.f16.f32\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtbq_f16_f32v8hf"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1283,7 +1363,8 @@ (define_insn "mve_vcvttq_f16_f32v8hf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vcvtt.f16.f32\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvttq_f16_f32v8hf"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1297,7 +1378,8 @@ (define_insn "mve_veorq_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"veor\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_veorq_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1313,7 +1395,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1331,7 +1414,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1346,7 +1430,8 @@ (define_insn "@mve_<max_min_f_str>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<max_min_f_str>.f%#<V_sz_elem> %q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<max_min_f_str>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1364,7 +1449,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%Q0, %R0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1384,7 +1470,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<isu>%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1400,7 +1487,8 @@ (define_insn "mve_<mve_addsubmul>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_addsubmul>.f%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_addsubmul>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1414,7 +1502,8 @@ (define_insn "mve_vornq_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vorn\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vornq_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1428,7 +1517,8 @@ (define_insn "mve_vorrq_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vorr\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vorrq_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1444,7 +1534,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.i%#<V_sz_elem> %q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1460,7 +1551,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.s%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1476,7 +1568,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.s%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1494,7 +1587,8 @@ (define_insn "@mve_<mve_insn>q_<supf>v4si"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>32\t%Q0, %R0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1510,7 +1604,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1526,7 +1621,8 @@ (define_insn "@mve_<mve_insn>q_poly_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_poly_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1547,8 +1643,9 @@ (define_insn "@mve_vcmp<mve_cmp_op1>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcmpt.f%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %q2"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op1>q_f<mode>"))
+ (set_attr "length""8")])
+
;;
;; [vcvtaq_m_u, vcvtaq_m_s])
;;
@@ -1562,8 +1659,10 @@ (define_insn "mve_vcvtaq_m_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtat.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtaq_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
;;
;; [vcvtq_m_to_f_s, vcvtq_m_to_f_u])
;;
@@ -1577,8 +1676,9 @@ (define_insn "mve_vcvtq_m_to_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_to_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [vqrshrnbq_n_u, vqrshrnbq_n_s]
@@ -1604,7 +1704,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<isu>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1623,7 +1724,8 @@ (define_insn "@mve_<mve_insn>q_<supf>v4si"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>32\t%Q0, %R0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1639,7 +1741,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1685,7 +1788,10 @@ (define_insn "mve_vshlcq_<supf><mode>"
(match_dup 4)]
VSHLCQ))]
"TARGET_HAVE_MVE"
- "vshlc\t%q0, %1, %4")
+ "vshlc\t%q0, %1, %4"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vshlcq_<supf><mode>"))
+ (set_attr "type" "mve_move")
+])
;;
;; [vabsq_m_s]
@@ -1705,7 +1811,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<isu>%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1721,7 +1828,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1744,7 +1852,8 @@ (define_insn "@mve_vcmp<mve_cmp_op1>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;vcmpt.<isu>%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op1>q_n_<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1767,7 +1876,8 @@ (define_insn "@mve_vcmp<mve_cmp_op1>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;vcmpt.<isu>%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op1>q_<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1783,7 +1893,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1800,7 +1911,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.s%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1819,7 +1931,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1838,7 +1951,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1857,7 +1971,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1878,7 +1993,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1894,7 +2010,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1910,7 +2027,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1933,7 +2051,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.s%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -1950,7 +2069,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1967,7 +2087,8 @@ (define_insn "@mve_<mve_insn>q_m_r_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_r_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1983,7 +2104,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -1999,7 +2121,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -2015,7 +2138,8 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -2038,7 +2162,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_mnemo>t.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2054,7 +2179,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
;; [vcmlaq, vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270])
@@ -2072,7 +2198,9 @@ (define_insn "@mve_<mve_insn>q<mve_rot>_f<mode>"
"@
vcmul.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>
vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>"
- [(set_attr "type" "mve_move")
+ [(set_attr_alternative "mve_unpredicated_insn" [(symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_f<mode>")
+ (symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_f<mode>")])
+ (set_attr "type" "mve_move")
])
;;
@@ -2093,7 +2221,8 @@ (define_insn "@mve_vcmp<mve_cmp_op1>q_m_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcmpt.f%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcmp<mve_cmp_op1>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2109,7 +2238,8 @@ (define_insn "mve_vcvtbq_m_f16_f32v8hf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtbt.f16.f32\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtbq_f16_f32v8hf"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2125,7 +2255,8 @@ (define_insn "mve_vcvtbq_m_f32_f16v4sf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtbt.f32.f16\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtbq_f32_f16v4sf"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2141,7 +2272,8 @@ (define_insn "mve_vcvttq_m_f16_f32v8hf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvttt.f16.f32\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvttq_f16_f32v8hf"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2157,8 +2289,9 @@ (define_insn "mve_vcvttq_m_f32_f16v4sf"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvttt.f32.f16\t%q0, %q2"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvttq_f32_f16v4sf"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [vdupq_m_n_f])
@@ -2173,7 +2306,8 @@ (define_insn "@mve_<mve_insn>q_m_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2190,7 +2324,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -2207,7 +2342,8 @@ (define_insn "@mve_<mve_insn>q_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>.f%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -2224,7 +2360,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2243,7 +2380,8 @@ (define_insn "@mve_<mve_insn>q_p_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.f%#<V_sz_elem>\t%0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2262,7 +2400,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
]
"TARGET_HAVE_MVE"
"<mve_insn>.<supf>%#<V_sz_elem>\t%Q0, %R0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -2281,7 +2420,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%Q0, %R0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2298,7 +2438,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2319,7 +2460,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<isu>%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2335,7 +2477,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.i%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2352,7 +2495,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.i%#<V_sz_elem>\t%q0, %2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2368,7 +2512,8 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"<mve_insn>\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
])
;;
@@ -2384,7 +2529,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2400,7 +2546,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2416,7 +2563,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2435,7 +2583,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2451,7 +2600,8 @@ (define_insn "mve_vcvtmq_m_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtmt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtmq_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2467,7 +2617,8 @@ (define_insn "mve_vcvtpq_m_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtpt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtpq_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2483,7 +2634,8 @@ (define_insn "mve_vcvtnq_m_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtnt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtnq_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2500,7 +2652,8 @@ (define_insn "mve_vcvtq_m_n_from_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_n_from_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2516,7 +2669,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2532,8 +2686,9 @@ (define_insn "mve_vcvtq_m_from_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_from_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [vabavq_p_s, vabavq_p_u])
@@ -2549,7 +2704,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length" "8")])
;;
@@ -2566,8 +2722,9 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\n\t<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
- (set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length" "8")])
;;
;; [vsriq_m_n_s, vsriq_m_n_u])
@@ -2583,8 +2740,9 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
- (set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length" "8")])
;;
;; [vcvtq_m_n_to_f_u, vcvtq_m_n_to_f_s])
@@ -2600,7 +2758,8 @@ (define_insn "mve_vcvtq_m_n_to_f_<supf><mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vcvtt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_n_to_f_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2640,7 +2799,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2659,8 +2819,9 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.i%#<V_sz_elem> %q0, %q2, %3"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [vaddq_m_u, vaddq_m_s]
@@ -2678,7 +2839,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.i%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2698,7 +2860,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2715,8 +2878,9 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [vcaddq_rot90_m_u, vcaddq_rot90_m_s]
@@ -2735,7 +2899,8 @@ (define_insn "@mve_<mve_insn>q<mve_rot>_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<isu>%#<V_sz_elem>\t%q0, %q2, %q3, #<rot>"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2763,7 +2928,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2784,7 +2950,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2802,7 +2969,8 @@ (define_insn "@mve_<mve_insn>q_int_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_int_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2819,7 +2987,8 @@ (define_insn "mve_vornq_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;vornt\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vornq_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2837,7 +3006,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2855,7 +3025,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2872,7 +3043,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2892,7 +3064,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%Q0, %R0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2920,7 +3093,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<isu>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2940,7 +3114,8 @@ (define_insn "@mve_<mve_insn>q_p_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2958,7 +3133,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2976,7 +3152,8 @@ (define_insn "@mve_<mve_insn>q_poly_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_poly_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -2994,7 +3171,8 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.s%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3012,7 +3190,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.s%#<V_sz_elem>\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3036,7 +3215,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.f%#<V_sz_elem> %q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3057,7 +3237,8 @@ (define_insn "@mve_<mve_insn>q_m_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.f%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3077,7 +3258,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3094,7 +3276,8 @@ (define_insn "@mve_<mve_insn>q_m_n_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2, %3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3116,7 +3299,8 @@ (define_insn "@mve_<mve_insn>q<mve_rot>_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.f%#<V_sz_elem>\t%q0, %q2, %q3, #<rot>"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3136,7 +3320,8 @@ (define_insn "@mve_<mve_insn>q<mve_rot>_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;<mve_insn>t.f%#<V_sz_elem>\t%q0, %q2, %q3, #<rot>"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q<mve_rot>_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3153,7 +3338,8 @@ (define_insn "mve_vornq_m_f<mode>"
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vornt\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vornq_f<mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -3173,7 +3359,8 @@ (define_insn "mve_vstrbq_<supf><mode>"
output_asm_insn("vstrb.<V_sz_elem>\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrbq_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vstrbq_scatter_offset_s vstrbq_scatter_offset_u]
@@ -3201,7 +3388,8 @@ (define_insn "mve_vstrbq_scatter_offset_<supf><mode>_insn"
VSTRBSOQ))]
"TARGET_HAVE_MVE"
"vstrb.<V_sz_elem>\t%q2, [%0, %q1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrbq_scatter_offset_<supf><mode>_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_scatter_base_s vstrwq_scatter_base_u]
@@ -3223,7 +3411,8 @@ (define_insn "mve_vstrwq_scatter_base_<supf>v4si"
output_asm_insn("vstrw.u32\t%q2, [%q0, %1]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_<supf>v4si"))
+ (set_attr "length" "4")])
;;
;; [vldrbq_gather_offset_s vldrbq_gather_offset_u]
@@ -3246,7 +3435,8 @@ (define_insn "mve_vldrbq_gather_offset_<supf><mode>"
output_asm_insn ("vldrb.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_gather_offset_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vldrbq_s vldrbq_u]
@@ -3268,7 +3458,8 @@ (define_insn "mve_vldrbq_<supf><mode>"
output_asm_insn ("vldrb.<supf><V_sz_elem>\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_gather_base_s vldrwq_gather_base_u]
@@ -3288,7 +3479,8 @@ (define_insn "mve_vldrwq_gather_base_<supf>v4si"
output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_<supf>v4si"))
+ (set_attr "length" "4")])
;;
;; [vstrbq_scatter_offset_p_s vstrbq_scatter_offset_p_u]
@@ -3320,7 +3512,8 @@ (define_insn "mve_vstrbq_scatter_offset_p_<supf><mode>_insn"
VSTRBSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrbt.<V_sz_elem>\t%q2, [%0, %q1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrbq_scatter_offset_<supf><mode>_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_base_p_s vstrwq_scatter_base_p_u]
@@ -3343,7 +3536,8 @@ (define_insn "mve_vstrwq_scatter_base_p_<supf>v4si"
output_asm_insn ("vpst\n\tvstrwt.u32\t%q2, [%q0, %1]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_<supf>v4si"))
+ (set_attr "length" "8")])
(define_insn "mve_vstrbq_p_<supf><mode>"
[(set (match_operand:<MVE_B_ELEM> 0 "mve_memory_operand" "=Ux")
@@ -3361,7 +3555,8 @@ (define_insn "mve_vstrbq_p_<supf><mode>"
output_asm_insn ("vpst\;vstrbt.<V_sz_elem>\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrbq_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vldrbq_gather_offset_z_s vldrbq_gather_offset_z_u]
@@ -3386,7 +3581,8 @@ (define_insn "mve_vldrbq_gather_offset_z_<supf><mode>"
output_asm_insn ("vpst\n\tvldrbt.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_gather_offset_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vldrbq_z_s vldrbq_z_u]
@@ -3409,7 +3605,8 @@ (define_insn "mve_vldrbq_z_<supf><mode>"
output_asm_insn ("vpst\;vldrbt.<supf><V_sz_elem>\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_gather_base_z_s vldrwq_gather_base_z_u]
@@ -3430,7 +3627,8 @@ (define_insn "mve_vldrwq_gather_base_z_<supf>v4si"
output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%q1, %2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_<supf>v4si"))
+ (set_attr "length" "8")])
;;
;; [vldrhq_f]
@@ -3449,7 +3647,8 @@ (define_insn "mve_vldrhq_fv8hf"
output_asm_insn ("vldrh.16\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_fv8hf"))
+ (set_attr "length" "4")])
;;
;; [vldrhq_gather_offset_s vldrhq_gather_offset_u]
@@ -3472,7 +3671,8 @@ (define_insn "mve_vldrhq_gather_offset_<supf><mode>"
output_asm_insn ("vldrh.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vldrhq_gather_offset_z_s vldrhq_gather_offset_z_u]
@@ -3497,7 +3697,8 @@ (define_insn "mve_vldrhq_gather_offset_z_<supf><mode>"
output_asm_insn ("vpst\n\tvldrht.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vldrhq_gather_shifted_offset_s vldrhq_gather_shifted_offset_u]
@@ -3520,7 +3721,8 @@ (define_insn "mve_vldrhq_gather_shifted_offset_<supf><mode>"
output_asm_insn ("vldrh.<supf><V_sz_elem>\t%q0, [%m1, %q2, uxtw #1]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_shifted_offset_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vldrhq_gather_shifted_offset_z_s vldrhq_gather_shited_offset_z_u]
@@ -3545,7 +3747,8 @@ (define_insn "mve_vldrhq_gather_shifted_offset_z_<supf><mode>"
output_asm_insn ("vpst\n\tvldrht.<supf><V_sz_elem>\t%q0, [%m1, %q2, uxtw #1]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_shifted_offset_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vldrhq_s, vldrhq_u]
@@ -3567,7 +3770,8 @@ (define_insn "mve_vldrhq_<supf><mode>"
output_asm_insn ("vldrh.<supf><V_sz_elem>\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vldrhq_z_f]
@@ -3587,7 +3791,8 @@ (define_insn "mve_vldrhq_z_fv8hf"
output_asm_insn ("vpst\;vldrht.16\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_fv8hf"))
+ (set_attr "length" "8")])
;;
;; [vldrhq_z_s vldrhq_z_u]
@@ -3610,7 +3815,8 @@ (define_insn "mve_vldrhq_z_<supf><mode>"
output_asm_insn ("vpst\;vldrht.<supf><V_sz_elem>\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_f]
@@ -3629,7 +3835,8 @@ (define_insn "mve_vldrwq_fv4sf"
output_asm_insn ("vldrw.32\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_s vldrwq_u]
@@ -3648,7 +3855,8 @@ (define_insn "mve_vldrwq_<supf>v4si"
output_asm_insn ("vldrw.32\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_<supf>v4si"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_z_f]
@@ -3668,7 +3876,8 @@ (define_insn "mve_vldrwq_z_fv4sf"
output_asm_insn ("vpst\;vldrwt.32\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_z_s vldrwq_z_u]
@@ -3688,7 +3897,8 @@ (define_insn "mve_vldrwq_z_<supf>v4si"
output_asm_insn ("vpst\;vldrwt.32\t%q0, %E1",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_<supf>v4si"))
+ (set_attr "length" "8")])
(define_expand "@mve_vld1q_f<mode>"
[(match_operand:MVE_0 0 "s_register_operand")
@@ -3728,7 +3938,8 @@ (define_insn "mve_vldrdq_gather_base_<supf>v2di"
output_asm_insn ("vldrd.64\t%q0, [%q1, %2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_<supf>v2di"))
+ (set_attr "length" "4")])
;;
;; [vldrdq_gather_base_z_s vldrdq_gather_base_z_u]
@@ -3749,7 +3960,8 @@ (define_insn "mve_vldrdq_gather_base_z_<supf>v2di"
output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%q1, %2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_<supf>v2di"))
+ (set_attr "length" "8")])
;;
;; [vldrdq_gather_offset_s vldrdq_gather_offset_u]
@@ -3769,7 +3981,8 @@ (define_insn "mve_vldrdq_gather_offset_<supf>v2di"
output_asm_insn ("vldrd.u64\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_offset_<supf>v2di"))
+ (set_attr "length" "4")])
;;
;; [vldrdq_gather_offset_z_s vldrdq_gather_offset_z_u]
@@ -3790,7 +4003,8 @@ (define_insn "mve_vldrdq_gather_offset_z_<supf>v2di"
output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_offset_<supf>v2di"))
+ (set_attr "length" "8")])
;;
;; [vldrdq_gather_shifted_offset_s vldrdq_gather_shifted_offset_u]
@@ -3810,7 +4024,8 @@ (define_insn "mve_vldrdq_gather_shifted_offset_<supf>v2di"
output_asm_insn ("vldrd.u64\t%q0, [%m1, %q2, uxtw #3]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_shifted_offset_<supf>v2di"))
+ (set_attr "length" "4")])
;;
;; [vldrdq_gather_shifted_offset_z_s vldrdq_gather_shifted_offset_z_u]
@@ -3831,7 +4046,8 @@ (define_insn "mve_vldrdq_gather_shifted_offset_z_<supf>v2di"
output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%m1, %q2, uxtw #3]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_shifted_offset_<supf>v2di"))
+ (set_attr "length" "8")])
;;
;; [vldrhq_gather_offset_f]
@@ -3851,7 +4067,8 @@ (define_insn "mve_vldrhq_gather_offset_fv8hf"
output_asm_insn ("vldrh.f16\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_fv8hf"))
+ (set_attr "length" "4")])
;;
;; [vldrhq_gather_offset_z_f]
@@ -3873,7 +4090,8 @@ (define_insn "mve_vldrhq_gather_offset_z_fv8hf"
output_asm_insn ("vpst\n\tvldrht.f16\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_fv8hf"))
+ (set_attr "length" "8")])
;;
;; [vldrhq_gather_shifted_offset_f]
@@ -3893,7 +4111,8 @@ (define_insn "mve_vldrhq_gather_shifted_offset_fv8hf"
output_asm_insn ("vldrh.f16\t%q0, [%m1, %q2, uxtw #1]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_shifted_offset_fv8hf"))
+ (set_attr "length" "4")])
;;
;; [vldrhq_gather_shifted_offset_z_f]
@@ -3915,7 +4134,8 @@ (define_insn "mve_vldrhq_gather_shifted_offset_z_fv8hf"
output_asm_insn ("vpst\n\tvldrht.f16\t%q0, [%m1, %q2, uxtw #1]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_shifted_offset_fv8hf"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_gather_base_f]
@@ -3935,7 +4155,8 @@ (define_insn "mve_vldrwq_gather_base_fv4sf"
output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_gather_base_z_f]
@@ -3956,7 +4177,8 @@ (define_insn "mve_vldrwq_gather_base_z_fv4sf"
output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%q1, %2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_gather_offset_f]
@@ -3976,7 +4198,8 @@ (define_insn "mve_vldrwq_gather_offset_fv4sf"
output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_gather_offset_s vldrwq_gather_offset_u]
@@ -3996,7 +4219,8 @@ (define_insn "mve_vldrwq_gather_offset_<supf>v4si"
output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_<supf>v4si"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_gather_offset_z_f]
@@ -4018,7 +4242,8 @@ (define_insn "mve_vldrwq_gather_offset_z_fv4sf"
output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_gather_offset_z_s vldrwq_gather_offset_z_u]
@@ -4040,7 +4265,8 @@ (define_insn "mve_vldrwq_gather_offset_z_<supf>v4si"
output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_<supf>v4si"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_gather_shifted_offset_f]
@@ -4060,7 +4286,8 @@ (define_insn "mve_vldrwq_gather_shifted_offset_fv4sf"
output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2, uxtw #2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_gather_shifted_offset_s vldrwq_gather_shifted_offset_u]
@@ -4080,7 +4307,8 @@ (define_insn "mve_vldrwq_gather_shifted_offset_<supf>v4si"
output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2, uxtw #2]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_<supf>v4si"))
+ (set_attr "length" "4")])
;;
;; [vldrwq_gather_shifted_offset_z_f]
@@ -4102,7 +4330,8 @@ (define_insn "mve_vldrwq_gather_shifted_offset_z_fv4sf"
output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2, uxtw #2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vldrwq_gather_shifted_offset_z_s vldrwq_gather_shifted_offset_z_u]
@@ -4124,7 +4353,8 @@ (define_insn "mve_vldrwq_gather_shifted_offset_z_<supf>v4si"
output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2, uxtw #2]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_<supf>v4si"))
+ (set_attr "length" "8")])
;;
;; [vstrhq_f]
@@ -4143,7 +4373,8 @@ (define_insn "mve_vstrhq_fv8hf"
output_asm_insn ("vstrh.16\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_fv8hf"))
+ (set_attr "length" "4")])
;;
;; [vstrhq_p_f]
@@ -4164,7 +4395,8 @@ (define_insn "mve_vstrhq_p_fv8hf"
output_asm_insn ("vpst\;vstrht.16\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_fv8hf"))
+ (set_attr "length" "8")])
;;
;; [vstrhq_p_s vstrhq_p_u]
@@ -4186,7 +4418,8 @@ (define_insn "mve_vstrhq_p_<supf><mode>"
output_asm_insn ("vpst\;vstrht.<V_sz_elem>\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_<supf><mode>"))
+ (set_attr "length" "8")])
;;
;; [vstrhq_scatter_offset_p_s vstrhq_scatter_offset_p_u]
@@ -4218,7 +4451,8 @@ (define_insn "mve_vstrhq_scatter_offset_p_<supf><mode>_insn"
VSTRHSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_offset_<supf><mode>_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrhq_scatter_offset_s vstrhq_scatter_offset_u]
@@ -4246,7 +4480,8 @@ (define_insn "mve_vstrhq_scatter_offset_<supf><mode>_insn"
VSTRHSOQ))]
"TARGET_HAVE_MVE"
"vstrh.<V_sz_elem>\t%q2, [%0, %q1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_offset_<supf><mode>_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrhq_scatter_shifted_offset_p_s vstrhq_scatter_shifted_offset_p_u]
@@ -4278,7 +4513,8 @@ (define_insn "mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn"
VSTRHSSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrhq_scatter_shifted_offset_s vstrhq_scatter_shifted_offset_u]
@@ -4307,7 +4543,8 @@ (define_insn "mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn"
VSTRHSSOQ))]
"TARGET_HAVE_MVE"
"vstrh.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrhq_s, vstrhq_u]
@@ -4326,7 +4563,8 @@ (define_insn "mve_vstrhq_<supf><mode>"
output_asm_insn ("vstrh.<V_sz_elem>\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_<supf><mode>"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_f]
@@ -4345,7 +4583,8 @@ (define_insn "mve_vstrwq_fv4sf"
output_asm_insn ("vstrw.32\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_p_f]
@@ -4366,7 +4605,8 @@ (define_insn "mve_vstrwq_p_fv4sf"
output_asm_insn ("vpst\;vstrwt.32\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_p_s vstrwq_p_u]
@@ -4387,7 +4627,8 @@ (define_insn "mve_vstrwq_p_<supf>v4si"
output_asm_insn ("vpst\;vstrwt.32\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_<supf>v4si"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_s vstrwq_u]
@@ -4406,7 +4647,8 @@ (define_insn "mve_vstrwq_<supf>v4si"
output_asm_insn ("vstrw.32\t%q1, %E0",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_<supf>v4si"))
+ (set_attr "length" "4")])
(define_expand "@mve_vst1q_f<mode>"
[(match_operand:<MVE_CNVT> 0 "mve_memory_operand")
@@ -4449,7 +4691,8 @@ (define_insn "mve_vstrdq_scatter_base_p_<supf>v2di"
output_asm_insn ("vpst\;\tvstrdt.u64\t%q2, [%q0, %1]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_<supf>v2di"))
+ (set_attr "length" "8")])
;;
;; [vstrdq_scatter_base_s vstrdq_scatter_base_u]
@@ -4471,7 +4714,8 @@ (define_insn "mve_vstrdq_scatter_base_<supf>v2di"
output_asm_insn ("vstrd.u64\t%q2, [%q0, %1]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_<supf>v2di"))
+ (set_attr "length" "4")])
;;
;; [vstrdq_scatter_offset_p_s vstrdq_scatter_offset_p_u]
@@ -4502,7 +4746,8 @@ (define_insn "mve_vstrdq_scatter_offset_p_<supf>v2di_insn"
VSTRDSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrdt.64\t%q2, [%0, %q1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_offset_<supf>v2di_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrdq_scatter_offset_s vstrdq_scatter_offset_u]
@@ -4530,7 +4775,8 @@ (define_insn "mve_vstrdq_scatter_offset_<supf>v2di_insn"
VSTRDSOQ))]
"TARGET_HAVE_MVE"
"vstrd.64\t%q2, [%0, %q1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_offset_<supf>v2di_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrdq_scatter_shifted_offset_p_s vstrdq_scatter_shifted_offset_p_u]
@@ -4562,7 +4808,8 @@ (define_insn "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn"
VSTRDSSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrdt.64\t%q2, [%0, %q1, uxtw #3]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrdq_scatter_shifted_offset_s vstrdq_scatter_shifted_offset_u]
@@ -4591,7 +4838,8 @@ (define_insn "mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn"
VSTRDSSOQ))]
"TARGET_HAVE_MVE"
"vstrd.64\t%q2, [%0, %q1, uxtw #3]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrhq_scatter_offset_f]
@@ -4619,7 +4867,8 @@ (define_insn "mve_vstrhq_scatter_offset_fv8hf_insn"
VSTRHQSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vstrh.16\t%q2, [%0, %q1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_offset_fv8hf_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrhq_scatter_offset_p_f]
@@ -4650,7 +4899,8 @@ (define_insn "mve_vstrhq_scatter_offset_p_fv8hf_insn"
VSTRHQSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vstrht.16\t%q2, [%0, %q1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_offset_fv8hf_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrhq_scatter_shifted_offset_f]
@@ -4678,7 +4928,8 @@ (define_insn "mve_vstrhq_scatter_shifted_offset_fv8hf_insn"
VSTRHQSSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vstrh.16\t%q2, [%0, %q1, uxtw #1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_fv8hf_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrhq_scatter_shifted_offset_p_f]
@@ -4710,7 +4961,8 @@ (define_insn "mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn"
VSTRHQSSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vstrht.16\t%q2, [%0, %q1, uxtw #1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_fv8hf_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_base_f]
@@ -4732,7 +4984,8 @@ (define_insn "mve_vstrwq_scatter_base_fv4sf"
output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_scatter_base_p_f]
@@ -4755,7 +5008,8 @@ (define_insn "mve_vstrwq_scatter_base_p_fv4sf"
output_asm_insn ("vpst\n\tvstrwt.u32\t%q2, [%q0, %1]",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_offset_f]
@@ -4783,7 +5037,8 @@ (define_insn "mve_vstrwq_scatter_offset_fv4sf_insn"
VSTRWQSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vstrw.32\t%q2, [%0, %q1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_offset_fv4sf_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_scatter_offset_p_f]
@@ -4814,7 +5069,8 @@ (define_insn "mve_vstrwq_scatter_offset_p_fv4sf_insn"
VSTRWQSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vstrwt.32\t%q2, [%0, %q1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_offset_fv4sf_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_offset_s vstrwq_scatter_offset_u]
@@ -4845,7 +5101,8 @@ (define_insn "mve_vstrwq_scatter_offset_p_<supf>v4si_insn"
VSTRWSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrwt.32\t%q2, [%0, %q1]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_offset_<supf>v4si_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_offset_s vstrwq_scatter_offset_u]
@@ -4873,7 +5130,8 @@ (define_insn "mve_vstrwq_scatter_offset_<supf>v4si_insn"
VSTRWSOQ))]
"TARGET_HAVE_MVE"
"vstrw.32\t%q2, [%0, %q1]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_offset_<supf>v4si_insn"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_scatter_shifted_offset_f]
@@ -4901,7 +5159,8 @@ (define_insn "mve_vstrwq_scatter_shifted_offset_fv4sf_insn"
VSTRWQSSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vstrw.32\t%q2, [%0, %q1, uxtw #2]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_fv4sf_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_shifted_offset_p_f]
@@ -4933,7 +5192,8 @@ (define_insn "mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn"
VSTRWQSSO_F))]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_fv4sf_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_shifted_offset_p_s vstrwq_scatter_shifted_offset_p_u]
@@ -4965,7 +5225,8 @@ (define_insn "mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn"
VSTRWSSOQ))]
"TARGET_HAVE_MVE"
"vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]"
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_shifted_offset_s vstrwq_scatter_shifted_offset_u]
@@ -4994,7 +5255,8 @@ (define_insn "mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn"
VSTRWSSOQ))]
"TARGET_HAVE_MVE"
"vstrw.32\t%q2, [%0, %q1, uxtw #2]"
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn"))
+ (set_attr "length" "4")])
;;
;; [vidupq_n_u])
@@ -5062,7 +5324,8 @@ (define_insn "mve_vidupq_m_wb_u<mode>_insn"
(match_operand:SI 6 "immediate_operand" "i")))]
"TARGET_HAVE_MVE"
"vpst\;\tvidupt.u%#<V_sz_elem>\t%q0, %2, %4"
- [(set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vidupq_u<mode>_insn"))
+ (set_attr "length""8")])
;;
;; [vddupq_n_u])
@@ -5130,7 +5393,8 @@ (define_insn "mve_vddupq_m_wb_u<mode>_insn"
(match_operand:SI 6 "immediate_operand" "i")))]
"TARGET_HAVE_MVE"
"vpst\;vddupt.u%#<V_sz_elem>\t%q0, %2, %4"
- [(set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vddupq_u<mode>_insn"))
+ (set_attr "length""8")])
;;
;; [vdwdupq_n_u])
@@ -5246,8 +5510,9 @@ (define_insn "mve_vdwdupq_m_wb_u<mode>_insn"
]
"TARGET_HAVE_MVE"
"vpst\;vdwdupt.u%#<V_sz_elem>\t%q2, %3, %R4, %5"
- [(set_attr "type" "mve_move")
- (set_attr "length""8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vdwdupq_wb_u<mode>_insn"))
+ (set_attr "type" "mve_move")
+ (set_attr "length""8")])
;;
;; [viwdupq_n_u])
@@ -5363,7 +5628,8 @@ (define_insn "mve_viwdupq_m_wb_u<mode>_insn"
]
"TARGET_HAVE_MVE"
"vpst\;\tviwdupt.u%#<V_sz_elem>\t%q2, %3, %R4, %5"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_viwdupq_wb_u<mode>_insn"))
+ (set_attr "type" "mve_move")
(set_attr "length""8")])
;;
@@ -5389,7 +5655,8 @@ (define_insn "mve_vstrwq_scatter_base_wb_<supf>v4si"
output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_<supf>v4si"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_scatter_base_wb_p_s vstrwq_scatter_base_wb_p_u]
@@ -5415,7 +5682,8 @@ (define_insn "mve_vstrwq_scatter_base_wb_p_<supf>v4si"
output_asm_insn ("vpst\;\tvstrwt.u32\t%q2, [%q0, %1]!",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_<supf>v4si"))
+ (set_attr "length" "8")])
;;
;; [vstrwq_scatter_base_wb_f]
@@ -5440,7 +5708,8 @@ (define_insn "mve_vstrwq_scatter_base_wb_fv4sf"
output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_fv4sf"))
+ (set_attr "length" "4")])
;;
;; [vstrwq_scatter_base_wb_p_f]
@@ -5466,7 +5735,8 @@ (define_insn "mve_vstrwq_scatter_base_wb_p_fv4sf"
output_asm_insn ("vpst\;vstrwt.u32\t%q2, [%q0, %1]!",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_fv4sf"))
+ (set_attr "length" "8")])
;;
;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u]
@@ -5491,7 +5761,8 @@ (define_insn "mve_vstrdq_scatter_base_wb_<supf>v2di"
output_asm_insn ("vstrd.u64\t%q2, [%q0, %1]!",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_wb_<supf>v2di"))
+ (set_attr "length" "4")])
;;
;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u]
@@ -5517,7 +5788,8 @@ (define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di"
output_asm_insn ("vpst\;vstrdt.u64\t%q2, [%q0, %1]!",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_wb_<supf>v2di"))
+ (set_attr "length" "8")])
(define_expand "mve_vldrwq_gather_base_wb_<supf>v4si"
[(match_operand:V4SI 0 "s_register_operand")
@@ -5569,7 +5841,8 @@ (define_insn "mve_vldrwq_gather_base_wb_<supf>v4si_insn"
output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_<supf>v4si_insn"))
+ (set_attr "length" "4")])
(define_expand "mve_vldrwq_gather_base_wb_z_<supf>v4si"
[(match_operand:V4SI 0 "s_register_operand")
@@ -5625,7 +5898,8 @@ (define_insn "mve_vldrwq_gather_base_wb_z_<supf>v4si_insn"
output_asm_insn ("vpst\;vldrwt.u32\t%q0, [%q1, %2]!",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_<supf>v4si_insn"))
+ (set_attr "length" "8")])
(define_expand "mve_vldrwq_gather_base_wb_fv4sf"
[(match_operand:V4SI 0 "s_register_operand")
@@ -5677,7 +5951,8 @@ (define_insn "mve_vldrwq_gather_base_wb_fv4sf_insn"
output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_fv4sf_insn"))
+ (set_attr "length" "4")])
(define_expand "mve_vldrwq_gather_base_wb_z_fv4sf"
[(match_operand:V4SI 0 "s_register_operand")
@@ -5734,7 +6009,8 @@ (define_insn "mve_vldrwq_gather_base_wb_z_fv4sf_insn"
output_asm_insn ("vpst\;vldrwt.u32\t%q0, [%q1, %2]!",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_fv4sf_insn"))
+ (set_attr "length" "8")])
(define_expand "mve_vldrdq_gather_base_wb_<supf>v2di"
[(match_operand:V2DI 0 "s_register_operand")
@@ -5787,7 +6063,8 @@ (define_insn "mve_vldrdq_gather_base_wb_<supf>v2di_insn"
output_asm_insn ("vldrd.64\t%q0, [%q1, %2]!",ops);
return "";
}
- [(set_attr "length" "4")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_wb_<supf>v2di_insn"))
+ (set_attr "length" "4")])
(define_expand "mve_vldrdq_gather_base_wb_z_<supf>v2di"
[(match_operand:V2DI 0 "s_register_operand")
@@ -5826,7 +6103,7 @@ (define_insn "get_fpscr_nzcvqc"
(unspec_volatile:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))]
"TARGET_HAVE_MVE"
"vmrs\\t%0, FPSCR_nzcvqc"
- [(set_attr "type" "mve_move")])
+ [(set_attr "type" "mve_move")])
(define_insn "set_fpscr_nzcvqc"
[(set (reg:SI VFPCC_REGNUM)
@@ -5834,7 +6111,7 @@ (define_insn "set_fpscr_nzcvqc"
VUNSPEC_SET_FPSCR_NZCVQC))]
"TARGET_HAVE_MVE"
"vmsr\\tFPSCR_nzcvqc, %0"
- [(set_attr "type" "mve_move")])
+ [(set_attr "type" "mve_move")])
;;
;; [vldrdq_gather_base_wb_z_s vldrdq_gather_base_wb_z_u]
@@ -5859,7 +6136,8 @@ (define_insn "mve_vldrdq_gather_base_wb_z_<supf>v2di_insn"
output_asm_insn ("vpst\;vldrdt.u64\t%q0, [%q1, %2]!",ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_wb_<supf>v2di_insn"))
+ (set_attr "length" "8")])
;;
;; [vadciq_m_s, vadciq_m_u])
;;
@@ -5876,7 +6154,8 @@ (define_insn "mve_vadciq_m_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;vadcit.i32\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vadciq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "8")])
;;
@@ -5893,7 +6172,8 @@ (define_insn "mve_vadciq_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vadci.i32\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vadciq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "4")])
;;
@@ -5912,7 +6192,8 @@ (define_insn "mve_vadcq_m_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;vadct.i32\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vadcq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "8")])
;;
@@ -5929,7 +6210,8 @@ (define_insn "mve_vadcq_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vadc.i32\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vadcq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "4")
(set_attr "conds" "set")])
@@ -5949,7 +6231,8 @@ (define_insn "mve_vsbciq_m_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;vsbcit.i32\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vsbciq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "8")])
;;
@@ -5966,7 +6249,8 @@ (define_insn "mve_vsbciq_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vsbci.i32\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vsbciq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "4")])
;;
@@ -5985,7 +6269,8 @@ (define_insn "mve_vsbcq_m_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vpst\;vsbct.i32\t%q0, %q2, %q3"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vsbcq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "8")])
;;
@@ -6002,7 +6287,8 @@ (define_insn "mve_vsbcq_<supf>v4si"
]
"TARGET_HAVE_MVE"
"vsbc.i32\t%q0, %q1, %q2"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vsbcq_<supf>v4si"))
+ (set_attr "type" "mve_move")
(set_attr "length" "4")])
;;
@@ -6031,7 +6317,7 @@ (define_insn "mve_vst2q<mode>"
"vst21.<V_sz_elem>\t{%q0, %q1}, %3", ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set_attr "length" "8")])
;;
;; [vld2q])
@@ -6059,7 +6345,7 @@ (define_insn "mve_vld2q<mode>"
"vld21.<V_sz_elem>\t{%q0, %q1}, %3", ops);
return "";
}
- [(set_attr "length" "8")])
+ [(set_attr "length" "8")])
;;
;; [vld4q])
@@ -6402,7 +6688,8 @@ (define_insn "mve_vshlcq_m_<supf><mode>"
]
"TARGET_HAVE_MVE"
"vpst\;vshlct\t%q0, %1, %4"
- [(set_attr "type" "mve_move")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vshlcq_<supf><mode>"))
+ (set_attr "type" "mve_move")
(set_attr "length" "8")])
;; CDE instructions on MVE registers.
@@ -6414,7 +6701,8 @@ (define_insn "arm_vcx1qv16qi"
UNSPEC_VCDE))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx1\\tp%c1, %q0, #%c2"
- [(set_attr "type" "coproc")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx1qv16qi"))
+ (set_attr "type" "coproc")]
)
(define_insn "arm_vcx1qav16qi"
@@ -6425,7 +6713,8 @@ (define_insn "arm_vcx1qav16qi"
UNSPEC_VCDEA))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx1a\\tp%c1, %q0, #%c3"
- [(set_attr "type" "coproc")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx1qav16qi"))
+ (set_attr "type" "coproc")]
)
(define_insn "arm_vcx2qv16qi"
@@ -6436,7 +6725,8 @@ (define_insn "arm_vcx2qv16qi"
UNSPEC_VCDE))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx2\\tp%c1, %q0, %q2, #%c3"
- [(set_attr "type" "coproc")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx2qv16qi"))
+ (set_attr "type" "coproc")]
)
(define_insn "arm_vcx2qav16qi"
@@ -6448,7 +6738,8 @@ (define_insn "arm_vcx2qav16qi"
UNSPEC_VCDEA))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx2a\\tp%c1, %q0, %q3, #%c4"
- [(set_attr "type" "coproc")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx2qav16qi"))
+ (set_attr "type" "coproc")]
)
(define_insn "arm_vcx3qv16qi"
@@ -6460,7 +6751,8 @@ (define_insn "arm_vcx3qv16qi"
UNSPEC_VCDE))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx3\\tp%c1, %q0, %q2, %q3, #%c4"
- [(set_attr "type" "coproc")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx3qv16qi"))
+ (set_attr "type" "coproc")]
)
(define_insn "arm_vcx3qav16qi"
@@ -6473,7 +6765,8 @@ (define_insn "arm_vcx3qav16qi"
UNSPEC_VCDEA))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vcx3a\\tp%c1, %q0, %q3, %q4, #%c5"
- [(set_attr "type" "coproc")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx3qav16qi"))
+ (set_attr "type" "coproc")]
)
(define_insn "arm_vcx1q<a>_p_v16qi"
@@ -6485,7 +6778,8 @@ (define_insn "arm_vcx1q<a>_p_v16qi"
CDE_VCX))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vpst\;vcx1<a>t\\tp%c1, %q0, #%c3"
- [(set_attr "type" "coproc")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx1q<a>v16qi"))
+ (set_attr "type" "coproc")
(set_attr "length" "8")]
)
@@ -6499,7 +6793,8 @@ (define_insn "arm_vcx2q<a>_p_v16qi"
CDE_VCX))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vpst\;vcx2<a>t\\tp%c1, %q0, %q3, #%c4"
- [(set_attr "type" "coproc")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx2q<a>v16qi"))
+ (set_attr "type" "coproc")
(set_attr "length" "8")]
)
@@ -6514,11 +6809,12 @@ (define_insn "arm_vcx3q<a>_p_v16qi"
CDE_VCX))]
"TARGET_CDE && TARGET_HAVE_MVE"
"vpst\;vcx3<a>t\\tp%c1, %q0, %q3, %q4, #%c5"
- [(set_attr "type" "coproc")
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_arm_vcx3q<a>v16qi"))
+ (set_attr "type" "coproc")
(set_attr "length" "8")]
)
-(define_insn "*movmisalign<mode>_mve_store"
+(define_insn "movmisalign<mode>_mve_store"
[(set (match_operand:MVE_VLD_ST 0 "mve_memory_operand" "=Ux")
(unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 "s_register_operand" " w")]
UNSPEC_MISALIGNED_ACCESS))]
@@ -6526,11 +6822,12 @@ (define_insn "*movmisalign<mode>_mve_store"
|| (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode)))
&& !BYTES_BIG_ENDIAN && unaligned_access"
"vstr<V_sz_elem1>.<V_sz_elem>\t%q1, %E0"
- [(set_attr "type" "mve_store")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_movmisalign<mode>_mve_store"))
+ (set_attr "type" "mve_store")]
)
-(define_insn "*movmisalign<mode>_mve_load"
+(define_insn "movmisalign<mode>_mve_load"
[(set (match_operand:MVE_VLD_ST 0 "s_register_operand" "=w")
(unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 "mve_memory_operand" " Ux")]
UNSPEC_MISALIGNED_ACCESS))]
@@ -6538,7 +6835,8 @@ (define_insn "*movmisalign<mode>_mve_load"
|| (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode)))
&& !BYTES_BIG_ENDIAN && unaligned_access"
"vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1"
- [(set_attr "type" "mve_load")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_movmisalign<mode>_mve_load"))
+ (set_attr "type" "mve_load")]
)
;; Expander for VxBI moves
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 9af8429968d8662b3c814306c94f033434378e7d..74871cb984b3fe1fb9571841cdcae39631abf8e2 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -366,7 +366,8 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
"@
<mve_insn>.<supf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
* return neon_output_shift_immediate (\"vshl\", 'i', &operands[2], <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), true);"
- [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
+ (set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
)
(define_expand "vashl<mode>3"
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops
2023-12-18 11:53 ` [PATCH 2/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops Andre Vieira
@ 2023-12-20 16:54 ` Andre Vieira (lists)
0 siblings, 0 replies; 5+ messages in thread
From: Andre Vieira (lists) @ 2023-12-20 16:54 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard.Earnshaw, Stam Markianos-Wright
[-- Attachment #1: Type: text/plain, Size: 1124 bytes --]
Squashed the definition and changes to predicated_doloop_end_internal
and dlstp*_insn into this patch to make sure the first patch builds
independently
On 18/12/2023 11:53, Andre Vieira wrote:
>
> Reworked Stam's patch after comments in:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640362.html
>
> The original gcc ChangeLog remains unchanged, but I did split up some tests so
> here is the testsuite ChangeLog.
>
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/arm/lob.h: Update framework.
> * gcc.target/arm/lob1.c: Likewise.
> * gcc.target/arm/lob6.c: Likewise.
> * gcc.target/arm/mve/dlstp-compile-asm.c: New test.
> * gcc.target/arm/mve/dlstp-int16x8.c: New test.
> * gcc.target/arm/mve/dlstp-int16x8-run.c: New test.
> * gcc.target/arm/mve/dlstp-int32x4.c: New test.
> * gcc.target/arm/mve/dlstp-int32x4-run.c: New test.
> * gcc.target/arm/mve/dlstp-int64x2.c: New test.
> * gcc.target/arm/mve/dlstp-int64x2-run.c: New test.
> * gcc.target/arm/mve/dlstp-int8x16.c: New test.
> * gcc.target/arm/mve/dlstp-int8x16-run.c: New test.
> * gcc.target/arm/mve/dlstp-invalid-asm.c: New test.
>
[-- Attachment #2: 0002-arm-Add-support-for-MVE-Tail-Predicated-Low-Overhead_v2.patch --]
[-- Type: text/plain, Size: 107215 bytes --]
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 2f5ca79ed8ddd647b212782a0454ee4fefc07257..4f164c547406c43219900c111401540c7ef9d7d1 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -65,8 +65,8 @@ extern void arm_emit_speculation_barrier_function (void);
extern void arm_decompose_di_binop (rtx, rtx, rtx *, rtx *, rtx *, rtx *);
extern bool arm_q_bit_access (void);
extern bool arm_ge_bits_access (void);
-extern bool arm_target_insn_ok_for_lob (rtx);
-
+extern bool arm_target_bb_ok_for_lob (basic_block);
+extern rtx arm_attempt_dlstp_transform (rtx);
#ifdef RTX_CODE
enum reg_class
arm_mode_base_reg_class (machine_mode);
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 0c0cb14a8a4f043357b8acd7042a9f9386af1eb1..1ee72bcb7ec4bea5feea8453ceef7702b0088a73 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -668,6 +668,12 @@ static const scoped_attribute_specs *const arm_attribute_table[] =
#undef TARGET_HAVE_CONDITIONAL_EXECUTION
#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST arm_loop_unroll_adjust
+
+#undef TARGET_PREDICT_DOLOOP_P
+#define TARGET_PREDICT_DOLOOP_P arm_predict_doloop_p
+
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
@@ -34483,19 +34489,1147 @@ arm_invalid_within_doloop (const rtx_insn *insn)
}
bool
-arm_target_insn_ok_for_lob (rtx insn)
+arm_target_bb_ok_for_lob (basic_block bb)
{
- basic_block bb = BLOCK_FOR_INSN (insn);
/* Make sure the basic block of the target insn is a simple latch
having as single predecessor and successor the body of the loop
itself. Only simple loops with a single basic block as body are
supported for 'low over head loop' making sure that LE target is
above LE itself in the generated code. */
-
return single_succ_p (bb)
- && single_pred_p (bb)
- && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
- && contains_no_active_insn_p (bb);
+ && single_pred_p (bb)
+ && single_succ_edge (bb)->dest == single_pred_edge (bb)->src;
+}
+
+/* Utility fuction: Given a VCTP or a VCTP_M insn, return the number of MVE
+ lanes based on the machine mode being used. */
+
+static int
+arm_mve_get_vctp_lanes (rtx_insn *insn)
+{
+ rtx insn_set = single_set (insn);
+ if (insn_set
+ && GET_CODE (SET_SRC (insn_set)) == UNSPEC
+ && (XINT (SET_SRC (insn_set), 1) == VCTP
+ || XINT (SET_SRC (insn_set), 1) == VCTP_M))
+ {
+ machine_mode mode = GET_MODE (SET_SRC (insn_set));
+ return (VECTOR_MODE_P (mode) && VALID_MVE_PRED_MODE (mode))
+ ? GET_MODE_NUNITS (mode) : 0;
+ }
+ return 0;
+}
+
+/* Check if INSN requires the use of the VPR reg, if it does, return the
+ sub-rtx of the VPR reg. The TYPE argument controls whether
+ this function should:
+ * For TYPE == 0, check all operands, including the OUT operands,
+ and return the first occurrence of the VPR reg.
+ * For TYPE == 1, only check the input operands.
+ * For TYPE == 2, only check the output operands.
+ (INOUT operands are considered both as input and output operands)
+*/
+static rtx
+arm_get_required_vpr_reg (rtx_insn *insn, unsigned int type = 0)
+{
+ gcc_assert (type < 3);
+ if (!NONJUMP_INSN_P (insn))
+ return NULL_RTX;
+
+ bool requires_vpr;
+ extract_constrain_insn (insn);
+ int n_operands = recog_data.n_operands;
+ if (recog_data.n_alternatives == 0)
+ return NULL_RTX;
+
+ /* Fill in recog_op_alt with information about the constraints of
+ this insn. */
+ preprocess_constraints (insn);
+
+ for (int op = 0; op < n_operands; op++)
+ {
+ requires_vpr = true;
+ if (type == 1 && recog_data.operand_type[op] == OP_OUT)
+ continue;
+ else if (type == 2 && recog_data.operand_type[op] == OP_IN)
+ continue;
+
+ /* Iterate through alternatives of operand "op" in recog_op_alt and
+ identify if the operand is required to be the VPR. */
+ for (int alt = 0; alt < recog_data.n_alternatives; alt++)
+ {
+ const operand_alternative *op_alt
+ = &recog_op_alt[alt * n_operands];
+ /* Fetch the reg_class for each entry and check it against the
+ VPR_REG reg_class. */
+ if (alternative_class (op_alt, op) != VPR_REG)
+ requires_vpr = false;
+ }
+ /* If all alternatives of the insn require the VPR reg for this operand,
+ it means that either this is VPR-generating instruction, like a vctp,
+ vcmp, etc., or it is a VPT-predicated insruction. Return the subrtx
+ of the VPR reg operand. */
+ if (requires_vpr)
+ return recog_data.operand[op];
+ }
+ return NULL_RTX;
+}
+
+/* Wrapper function of arm_get_required_vpr_reg with TYPE == 1, so return
+ something only if the VPR reg is an input operand to the insn. */
+
+static rtx
+arm_get_required_vpr_reg_param (rtx_insn *insn)
+{
+ return arm_get_required_vpr_reg (insn, 1);
+}
+
+/* Wrapper function of arm_get_required_vpr_reg with TYPE == 2, so return
+ something only if the VPR reg is the return value, an output of, or is
+ clobbered by the insn. */
+
+static rtx
+arm_get_required_vpr_reg_ret_val (rtx_insn *insn)
+{
+ return arm_get_required_vpr_reg (insn, 2);
+}
+
+/* Scan the basic block of a loop body for a vctp instruction. If there is
+ at least vctp instruction, return the first rtx_insn *. */
+
+static rtx_insn *
+arm_mve_get_loop_vctp (basic_block bb)
+{
+ rtx_insn *insn = BB_HEAD (bb);
+
+ /* Now scan through all the instruction patterns and pick out the VCTP
+ instruction. We require arm_get_required_vpr_reg_param to be false
+ to make sure we pick up a VCTP, rather than a VCTP_M. */
+ FOR_BB_INSNS (bb, insn)
+ if (NONDEBUG_INSN_P (insn))
+ if (arm_get_required_vpr_reg_ret_val (insn)
+ && (arm_mve_get_vctp_lanes (insn) != 0)
+ && !arm_get_required_vpr_reg_param (insn))
+ return insn;
+ return NULL;
+}
+
+/* Return true if INSN is a MVE instruction that is VPT-predicable, but in
+ its unpredicated form, or if it is predicated, but on a predicate other
+ than VPR_REG. */
+
+static bool
+arm_mve_vec_insn_is_unpredicated_or_uses_other_predicate (rtx_insn *insn,
+ rtx vpr_reg)
+{
+ rtx insn_vpr_reg_operand;
+ if (MVE_VPT_UNPREDICATED_INSN_P (insn)
+ || (MVE_VPT_PREDICATED_INSN_P (insn)
+ && (insn_vpr_reg_operand = arm_get_required_vpr_reg_param (insn))
+ && !rtx_equal_p (vpr_reg, insn_vpr_reg_operand)))
+ return true;
+ else
+ return false;
+}
+
+/* Return true if INSN is a MVE instruction that is VPT-predicable and is
+ predicated on VPR_REG. */
+
+static bool
+arm_mve_vec_insn_is_predicated_with_this_predicate (rtx_insn *insn,
+ rtx vpr_reg)
+{
+ rtx insn_vpr_reg_operand;
+ if (MVE_VPT_PREDICATED_INSN_P (insn)
+ && (insn_vpr_reg_operand = arm_get_required_vpr_reg_param (insn))
+ && rtx_equal_p (vpr_reg, insn_vpr_reg_operand))
+ return true;
+ else
+ return false;
+}
+
+/* Utility function to identify if INSN is an MVE instruction that performs
+ some across-vector operation (and as a result does not align with normal
+ lane predication rules). All such instructions give one only scalar
+ output, except for vshlcq which gives a PARALLEL of a vector and a scalar
+ (one vector result and one carry output). */
+
+static bool
+arm_is_mve_across_vector_insn (rtx_insn* insn)
+{
+ df_ref insn_defs = NULL;
+ if (!MVE_VPT_PREDICABLE_INSN_P (insn))
+ return false;
+
+ bool is_across_vector = false;
+ FOR_EACH_INSN_DEF (insn_defs, insn)
+ if (!VALID_MVE_MODE (GET_MODE (DF_REF_REG (insn_defs)))
+ && !arm_get_required_vpr_reg_ret_val (insn))
+ is_across_vector = true;
+
+ return is_across_vector;
+}
+
+/* Utility function to identify if INSN is an MVE load or store instruction.
+ * For TYPE == 0, check all operands. If the function returns true,
+ INSN is a load or a store insn.
+ * For TYPE == 1, only check the input operands. If the function returns
+ true, INSN is a load insn.
+ * For TYPE == 2, only check the output operands. If the function returns
+ true, INSN is a store insn. */
+
+static bool
+arm_is_mve_load_store_insn (rtx_insn* insn, int type = 0)
+{
+ int n_operands = recog_data.n_operands;
+ extract_insn (insn);
+
+ for (int op = 0; op < n_operands; op++)
+ {
+ if (type == 1 && recog_data.operand_type[op] == OP_OUT)
+ continue;
+ else if (type == 2 && recog_data.operand_type[op] == OP_IN)
+ continue;
+ if (mve_memory_operand (recog_data.operand[op],
+ GET_MODE (recog_data.operand[op])))
+ return true;
+ }
+ return false;
+}
+
+/* When transforming an MVE intrinsic loop into an MVE Tail Predicated Low
+ Overhead Loop, there are a number of instructions that, if in their
+ unpredicated form, act across vector lanes, but are still safe to include
+ within the loop, despite the implicit predication added to the vector lanes.
+ This list has been compiled by carefully analyzing the instruction
+ pseudocode in the Arm-ARM.
+ All other across-vector instructions aren't allowed, because the addition
+ of implicit predication could influnce the result of the operation.
+ Any new across-vector instructions to the MVE ISA will have to assessed for
+ inclusion to this list. */
+
+static bool
+arm_mve_is_allowed_unpredic_across_vector_insn (rtx_insn* insn)
+{
+ gcc_assert (MVE_VPT_UNPREDICATED_INSN_P (insn)
+ && arm_is_mve_across_vector_insn (insn));
+ rtx insn_set = single_set (insn);
+ if (!insn_set)
+ return false;
+ rtx unspec = SET_SRC (insn_set);
+ if (GET_CODE (unspec) != UNSPEC)
+ return false;
+ switch (XINT (unspec, 1))
+ {
+ case VADDVQ_U:
+ case VADDVQ_S:
+ case VADDVAQ_U:
+ case VADDVAQ_S:
+ case VMLADAVQ_U:
+ case VMLADAVQ_S:
+ case VMLADAVXQ_S:
+ case VMLADAVAQ_U:
+ case VMLADAVAQ_S:
+ case VMLADAVAXQ_S:
+ case VABAVQ_S:
+ case VABAVQ_U:
+ case VADDLVQ_S:
+ case VADDLVQ_U:
+ case VADDLVAQ_S:
+ case VADDLVAQ_U:
+ case VMAXVQ_U:
+ case VMAXAVQ_S:
+ case VMLALDAVQ_U:
+ case VMLALDAVXQ_U:
+ case VMLALDAVXQ_S:
+ case VMLALDAVQ_S:
+ case VMLALDAVAQ_S:
+ case VMLALDAVAQ_U:
+ case VMLALDAVAXQ_S:
+ case VMLALDAVAXQ_U:
+ case VMLSDAVQ_S:
+ case VMLSDAVXQ_S:
+ case VMLSDAVAXQ_S:
+ case VMLSDAVAQ_S:
+ case VMLSLDAVQ_S:
+ case VMLSLDAVXQ_S:
+ case VMLSLDAVAQ_S:
+ case VMLSLDAVAXQ_S:
+ case VRMLALDAVHXQ_S:
+ case VRMLALDAVHQ_U:
+ case VRMLALDAVHQ_S:
+ case VRMLALDAVHAQ_S:
+ case VRMLALDAVHAQ_U:
+ case VRMLALDAVHAXQ_S:
+ case VRMLSLDAVHQ_S:
+ case VRMLSLDAVHXQ_S:
+ case VRMLSLDAVHAQ_S:
+ case VRMLSLDAVHAXQ_S:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+/* Scan through the DF chain backwards within the basic block and
+ determine if any of the USEs of the original insn (or the USEs of the insns
+ where thy were DEF-ed, etc.) were affected by implicit VPT
+ predication of an MVE_VPT_UNPREDICATED_INSN_P in a dlstp/letp loop.
+ This function returns true if the insn is affected implicit predication
+ and false otherwise.
+ Having such implicit predication on an unpredicated insn wouldn't in itself
+ block tail predication, because the output of that insn might then be used
+ in a correctly predicated store insn, where the disabled lanes will be
+ ignored. To verify this we later call:
+ `arm_mve_check_df_chain_fwd_for_implic_predic_impact`, which will check the
+ DF chains forward to see if any implicitly-predicated operand gets used in
+ an improper way. */
+
+static bool
+arm_mve_check_df_chain_back_for_implic_predic
+ (hash_map <rtx_insn *, bool> *safe_insn_map, rtx_insn *insn_in,
+ rtx vctp_vpr_generated)
+{
+
+ auto_vec<rtx_insn *> worklist;
+ worklist.safe_push (insn_in);
+
+ bool *temp = NULL;
+
+ while (worklist.length () > 0)
+ {
+ rtx_insn *insn = worklist.pop ();
+
+ if ((temp = safe_insn_map->get (insn)))
+ return *temp;
+
+ basic_block body = BLOCK_FOR_INSN (insn);
+
+ /* The circumstances under which an instruction is affected by "implicit
+ predication" are as follows:
+ * It is an UNPREDICATED_INSN_P:
+ * That loads/stores from/to memory.
+ * Where any one of its operands is an MVE vector from outside the
+ loop body bb.
+ Or:
+ * Any of it's operands were affected earlier in the insn chain. */
+ if (MVE_VPT_UNPREDICATED_INSN_P (insn)
+ && (arm_is_mve_load_store_insn (insn)
+ || (arm_is_mve_across_vector_insn (insn)
+ && !arm_mve_is_allowed_unpredic_across_vector_insn (insn))))
+ {
+ safe_insn_map->put (insn, true);
+ return true;
+ }
+
+ df_ref insn_uses = NULL;
+ FOR_EACH_INSN_USE (insn_uses, insn)
+ {
+ /* If the operand is in the input reg set to the the basic block,
+ (i.e. it has come from outside the loop!), consider it unsafe if:
+ * It's being used in an unpredicated insn.
+ * It is a predicable MVE vector. */
+ if (MVE_VPT_UNPREDICATED_INSN_P (insn)
+ && VALID_MVE_MODE (GET_MODE (DF_REF_REG (insn_uses)))
+ && REGNO_REG_SET_P (DF_LR_IN (body), DF_REF_REGNO (insn_uses)))
+ {
+ safe_insn_map->put (insn, true);
+ return true;
+ }
+
+ /* Scan backwards from the current INSN through the instruction chain
+ until the start of the basic block. */
+ for (rtx_insn *prev_insn = PREV_INSN (insn);
+ prev_insn && prev_insn != PREV_INSN (BB_HEAD (body));
+ prev_insn = PREV_INSN (prev_insn))
+ {
+ /* If a previous insn defines a register that INSN uses, then
+ add to the worklist to check that insn's USEs. If any of these
+ insns return true as MVE_VPT_UNPREDICATED_INSN_Ps, then the
+ whole chain is affected by the change in behaviour from being
+ placed in dlstp/letp loop. */
+ df_ref prev_insn_defs = NULL;
+ FOR_EACH_INSN_DEF (prev_insn_defs, prev_insn)
+ {
+ if (DF_REF_REGNO (insn_uses) == DF_REF_REGNO (prev_insn_defs)
+ && !arm_mve_vec_insn_is_predicated_with_this_predicate
+ (insn, vctp_vpr_generated))
+ worklist.safe_push (prev_insn);
+ }
+ }
+ }
+ }
+ safe_insn_map->put (insn_in, false);
+ return false;
+}
+
+/* If we have identified that the current DEF will be modified
+ by such implicit predication, scan through all the
+ insns that USE it and bail out if any one is outside the
+ current basic block (i.e. the reg is live after the loop)
+ or if any are store insns that are unpredicated or using a
+ predicate other than the loop VPR.
+ This function returns true if the insn is not suitable for
+ implicit predication and false otherwise.*/
+
+static bool
+arm_mve_check_df_chain_fwd_for_implic_predic_impact (rtx_insn *insn,
+ rtx vctp_vpr_generated)
+{
+
+ /* If this insn is indeed an unpredicated store to memory, bail out. */
+ if (arm_mve_vec_insn_is_unpredicated_or_uses_other_predicate
+ (insn, vctp_vpr_generated)
+ && (arm_is_mve_load_store_insn (insn, 2)
+ || arm_is_mve_across_vector_insn (insn)))
+ return true;
+
+ /* Next, scan forward to the various USEs of the DEFs in this insn. */
+ df_ref insn_def = NULL;
+ FOR_EACH_INSN_DEF (insn_def, insn)
+ {
+ for (df_ref use = DF_REG_USE_CHAIN (DF_REF_REGNO (insn_def)); use;
+ use = DF_REF_NEXT_REG (use))
+ {
+ rtx_insn *next_use_insn = DF_REF_INSN (use);
+ if (next_use_insn != insn
+ && NONDEBUG_INSN_P (next_use_insn))
+ {
+ /* If the USE is outside the loop body bb, or it is inside, but
+ is an differently-predicated store to memory or it is any
+ across-vector instruction. */
+ if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (next_use_insn)
+ || (arm_mve_vec_insn_is_unpredicated_or_uses_other_predicate
+ (next_use_insn, vctp_vpr_generated)
+ && (arm_is_mve_load_store_insn (next_use_insn, 2)
+ || arm_is_mve_across_vector_insn (next_use_insn))))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/* Helper function to `arm_mve_dlstp_check_inc_counter` and to
+ `arm_mve_dlstp_check_dec_counter`. In the situations where the loop counter
+ is incrementing by 1 or decrementing by 1 in each iteration, ensure that the
+ target value or the initialisation value, respectively, was a calculation
+ of the number of iterations of the loop, which is expected to be an ASHIFTRT
+ by VCTP_STEP. */
+
+static bool
+arm_mve_check_reg_origin_is_num_elems (basic_block body, rtx reg, rtx vctp_step)
+{
+ /* Ok, we now know the loop starts from zero and increments by one.
+ Now just show that the max value of the counter came from an
+ appropriate ASHIFRT expr of the correct amount. */
+ basic_block pre_loop_bb = body->prev_bb;
+ while (pre_loop_bb && BB_END (pre_loop_bb)
+ && !df_bb_regno_only_def_find (pre_loop_bb, REGNO (reg)))
+ pre_loop_bb = pre_loop_bb->prev_bb;
+
+ df_ref counter_max_last_def = df_bb_regno_only_def_find (pre_loop_bb, REGNO (reg));
+ if (!counter_max_last_def)
+ return false;
+ rtx counter_max_last_set = single_set (DF_REF_INSN (counter_max_last_def));
+ if (!counter_max_last_set)
+ return false;
+
+ /* If we encounter a simple SET from a REG, follow it through. */
+ if (REG_P (SET_SRC (counter_max_last_set)))
+ return arm_mve_check_reg_origin_is_num_elems
+ (pre_loop_bb->next_bb, SET_SRC (counter_max_last_set), vctp_step);
+
+ /* If we encounter a SET from an IF_THEN_ELSE where one of the operands is a
+ constant and the other is a REG, follow through to that REG. */
+ if (GET_CODE (SET_SRC (counter_max_last_set)) == IF_THEN_ELSE
+ && REG_P (XEXP (SET_SRC (counter_max_last_set), 1))
+ && CONST_INT_P (XEXP (SET_SRC (counter_max_last_set), 2)))
+ return arm_mve_check_reg_origin_is_num_elems
+ (pre_loop_bb->next_bb, XEXP (SET_SRC (counter_max_last_set), 1), vctp_step);
+
+ if (GET_CODE (SET_SRC (counter_max_last_set)) == ASHIFTRT
+ && CONST_INT_P (XEXP (SET_SRC (counter_max_last_set), 1))
+ && ((1 << INTVAL (XEXP (SET_SRC (counter_max_last_set), 1)))
+ == abs (INTVAL (vctp_step))))
+ return true;
+
+ return false;
+}
+
+/* If we have identified the loop to have an incrementing counter, we need to
+ make sure that it increments by 1 and that the loop is structured correctly:
+ * The counter starts from 0
+ * The counter terminates at (num_of_elem + num_of_lanes - 1) / num_of_lanes
+ * The vctp insn uses a reg that decrements appropriately in each iteration.
+*/
+
+static rtx_insn*
+arm_mve_dlstp_check_inc_counter (basic_block body, rtx_insn* vctp_insn,
+ rtx condconst, rtx condcount)
+{
+ rtx vctp_reg = XVECEXP (XEXP (PATTERN (vctp_insn), 1), 0, 0);
+ /* The loop latch has to be empty. When compiling all the known MVE LoLs in
+ user applications, none of those with incrementing counters had any real
+ insns in the loop latch. As such, this function has only been tested with
+ an empty latch and may misbehave or ICE if we somehow get here with an
+ increment in the latch, so, for correctness, error out early. */
+ if (!empty_block_p (body->loop_father->latch))
+ return NULL;
+
+ class rtx_iv vctp_reg_iv;
+ /* For loops of type B) the loop counter is independent of the decrement
+ of the reg used in the vctp_insn. So run iv analysis on that reg. This
+ has to succeed for such loops to be supported. */
+ if (!iv_analyze (vctp_insn, as_a<scalar_int_mode> (GET_MODE (vctp_reg)),
+ vctp_reg, &vctp_reg_iv))
+ return NULL;
+
+ /* Extract the decrementnum of the vctp reg from the iv. This decrementnum
+ is the number of lanes/elements it decrements from the remaining number of
+ lanes/elements to process in the loop, for this reason this is always a
+ negative number, but to simplify later checks we use it's absolute value. */
+ int decrementnum = INTVAL (vctp_reg_iv.step);
+ if (decrementnum >= 0)
+ return NULL;
+ decrementnum = abs (decrementnum);
+
+ /* Find where both of those are modified in the loop body bb. */
+ df_ref condcount_reg_set_df = df_bb_regno_only_def_find (body, REGNO (condcount));
+ df_ref vctp_reg_set_df = df_bb_regno_only_def_find (body, REGNO (vctp_reg));
+ if (!condcount_reg_set_df || !vctp_reg_set_df)
+ return NULL;
+ rtx condcount_reg_set = single_set (DF_REF_INSN (condcount_reg_set_df));
+ rtx vctp_reg_set = single_set (DF_REF_INSN (vctp_reg_set_df));
+ if (!condcount_reg_set || !vctp_reg_set)
+ return NULL;
+
+ /* Ensure the modification of the vctp reg from df is consistent with
+ the iv and the number of lanes on the vctp insn. */
+ if (GET_CODE (SET_SRC (vctp_reg_set)) != PLUS
+ || !REG_P (SET_DEST (vctp_reg_set))
+ || !REG_P (XEXP (SET_SRC (vctp_reg_set), 0))
+ || REGNO (SET_DEST (vctp_reg_set))
+ != REGNO (XEXP (SET_SRC (vctp_reg_set), 0))
+ || !CONST_INT_P (XEXP (SET_SRC (vctp_reg_set), 1))
+ || INTVAL (XEXP (SET_SRC (vctp_reg_set), 1)) >= 0
+ || decrementnum != abs (INTVAL (XEXP (SET_SRC (vctp_reg_set), 1)))
+ || decrementnum != arm_mve_get_vctp_lanes (vctp_insn))
+ return NULL;
+
+ if (REG_P (condcount) && REG_P (condconst))
+ {
+ /* First we need to prove that the loop is going 0..condconst with an
+ inc of 1 in each iteration. */
+ if (GET_CODE (SET_SRC (condcount_reg_set)) == PLUS
+ && CONST_INT_P (XEXP (SET_SRC (condcount_reg_set), 1))
+ && INTVAL (XEXP (SET_SRC (condcount_reg_set), 1)) == 1)
+ {
+ rtx counter_reg = SET_DEST (condcount_reg_set);
+ /* Check that the counter did indeed start from zero. */
+ df_ref this_set = DF_REG_DEF_CHAIN (REGNO (counter_reg));
+ if (!this_set)
+ return NULL;
+ df_ref last_set_def = DF_REF_NEXT_REG (this_set);
+ if (!last_set_def)
+ return NULL;
+ rtx_insn* last_set_insn = DF_REF_INSN (last_set_def);
+ rtx last_set = single_set (last_set_insn);
+ if (!last_set)
+ return NULL;
+ rtx counter_orig_set;
+ counter_orig_set = SET_SRC (last_set);
+ if (!CONST_INT_P (counter_orig_set)
+ || (INTVAL (counter_orig_set) != 0))
+ return NULL;
+ /* And finally check that the target value of the counter,
+ condconst, is of the correct shape. */
+ if (!arm_mve_check_reg_origin_is_num_elems (body, condconst,
+ vctp_reg_iv.step))
+ return NULL;
+ }
+ else
+ return NULL;
+ }
+ else
+ return NULL;
+
+ /* Everything looks valid. */
+ return vctp_insn;
+}
+
+/* Helper function to `arm_mve_loop_valid_for_dlstp`. In the case of a
+ counter that is decrementing, ensure that it is decrementing by the
+ right amount in each iteration and that the target condition is what
+ we expect. */
+
+static rtx_insn*
+arm_mve_dlstp_check_dec_counter (basic_block body, rtx_insn* vctp_insn,
+ rtx condconst, rtx condcount)
+{
+ rtx vctp_reg = XVECEXP (XEXP (PATTERN (vctp_insn), 1), 0, 0);
+ class rtx_iv vctp_reg_iv;
+ int decrementnum;
+ /* For decrementing loops of type A), the counter is usually present in the
+ loop latch. Here we simply need to verify that this counter is the same
+ reg that is also used in the vctp_insn and that it is not otherwise
+ modified. */
+ rtx_insn *dec_insn = BB_END (body->loop_father->latch);
+ /* If not in the loop latch, try to find the decrement in the loop body. */
+ if (!NONDEBUG_INSN_P (dec_insn))
+ {
+ df_ref temp = df_bb_regno_only_def_find (body, REGNO (condcount));
+ /* If we haven't been able to find the decrement, bail out. */
+ if (!temp)
+ return NULL;
+ dec_insn = DF_REF_INSN (temp);
+ }
+
+ rtx dec_set = single_set (dec_insn);
+
+ /* Next, ensure that it is a PLUS of the form:
+ (set (reg a) (plus (reg a) (const_int)))
+ where (reg a) is the same as condcount. */
+ if (!dec_set
+ || !REG_P (SET_DEST (dec_set))
+ || !REG_P (XEXP (SET_SRC (dec_set), 0))
+ || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
+ || REGNO (SET_DEST (dec_set))
+ != REGNO (XEXP (SET_SRC (dec_set), 0))
+ || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
+ return NULL;
+
+ decrementnum = INTVAL (XEXP (SET_SRC (dec_set), 1));
+
+ /* This decrementnum is the number of lanes/elements it decrements from the
+ remaining number of lanes/elements to process in the loop, for this reason
+ this is always a negative number, but to simplify later checks we use it's
+ absolute value. */
+ if (decrementnum >= 0)
+ return NULL;
+ decrementnum = abs (decrementnum);
+
+ /* Ok, so we now know the loop decrement. If it is a 1, then we need to
+ look at the loop vctp_reg and verify that it also decrements correctly.
+ Then, we need to establish that the starting value of the loop decrement
+ originates from the starting value of the vctp decrement. */
+ if (decrementnum == 1)
+ {
+ class rtx_iv vctp_reg_iv;
+ /* The loop counter is found to be independent of the decrement
+ of the reg used in the vctp_insn, again. Ensure that IV analysis
+ succeeds and check the step. */
+ if (!iv_analyze (vctp_insn, as_a<scalar_int_mode> (GET_MODE (vctp_reg)),
+ vctp_reg, &vctp_reg_iv))
+ return NULL;
+ /* Ensure it matches the number of lanes of the vctp instruction. */
+ if (abs (INTVAL (vctp_reg_iv.step))
+ != arm_mve_get_vctp_lanes (vctp_insn))
+ return NULL;
+ if (!arm_mve_check_reg_origin_is_num_elems (body, condcount, vctp_reg_iv.step))
+ return NULL;
+ }
+ /* If the decrements are the same, then the situation is simple: either they
+ are also the same reg, which is safe, or they are different registers, in
+ which case makse sure that there is a only simple SET from one to the
+ other inside the loop.*/
+ else if (decrementnum == arm_mve_get_vctp_lanes (vctp_insn))
+ {
+ if (REGNO (condcount) != REGNO (vctp_reg))
+ {
+ /* It wasn't the same reg, but it could be behild a
+ (set (vctp_reg) (condcount)), so instead find where
+ the VCTP insn is DEF'd inside the loop. */
+ rtx_insn *vctp_reg_insn
+ = DF_REF_INSN (df_bb_regno_only_def_find (body, REGNO (vctp_reg)));
+ rtx vctp_reg_set = single_set (vctp_reg_insn);
+ /* This must just be a simple SET from the condcount. */
+ if (!vctp_reg_set
+ || !REG_P (SET_DEST (vctp_reg_set))
+ || !REG_P (SET_SRC (vctp_reg_set))
+ || REGNO (SET_SRC (vctp_reg_set)) != REGNO (condcount))
+ return NULL;
+ }
+ }
+ else
+ return NULL;
+
+ /* We now only need to find out that the loop terminates with a LE
+ zero condition. If condconst is a const_int, then this is easy.
+ If its a REG, look at the last condition+jump in a bb before
+ the loop, because that usually will have a branch jumping over
+ the loop body. */
+ if (CONST_INT_P (condconst)
+ && !(INTVAL (condconst) == 0 && JUMP_P (BB_END (body))
+ && GET_CODE (XEXP (PATTERN (BB_END (body)), 1)) == IF_THEN_ELSE
+ && (GET_CODE (XEXP (XEXP (PATTERN (BB_END (body)), 1), 0)) == NE
+ ||GET_CODE (XEXP (XEXP (PATTERN (BB_END (body)), 1), 0)) == GT)))
+ return NULL;
+ else if (REG_P (condconst))
+ {
+ basic_block pre_loop_bb = body;
+ while (pre_loop_bb->prev_bb && BB_END (pre_loop_bb->prev_bb)
+ && !JUMP_P (BB_END (pre_loop_bb->prev_bb)))
+ pre_loop_bb = pre_loop_bb->prev_bb;
+ if (pre_loop_bb && BB_END (pre_loop_bb))
+ pre_loop_bb = pre_loop_bb->prev_bb;
+ else
+ return NULL;
+ rtx initial_compare = NULL_RTX;
+ if (!(prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb))
+ && INSN_P (prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb)))))
+ return NULL;
+ else
+ initial_compare
+ = single_set (prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb)));
+ if (!(initial_compare
+ && cc_register (SET_DEST (initial_compare), VOIDmode)
+ && GET_CODE (SET_SRC (initial_compare)) == COMPARE
+ && CONST_INT_P (XEXP (SET_SRC (initial_compare), 1))
+ && INTVAL (XEXP (SET_SRC (initial_compare), 1)) == 0))
+ return NULL;
+
+ /* Usually this is a LE condition, but it can also just be a GT or an EQ
+ condition (if the value is unsigned or the compiler knows its not negative) */
+ rtx_insn *loop_jumpover = BB_END (pre_loop_bb);
+ if (!(JUMP_P (loop_jumpover)
+ && GET_CODE (XEXP (PATTERN (loop_jumpover), 1)) == IF_THEN_ELSE
+ && (GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover), 1), 0)) == LE
+ || GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover), 1), 0)) == GT
+ || GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover), 1), 0)) == EQ)))
+ return NULL;
+ }
+
+ /* Everything looks valid. */
+ return vctp_insn;
+}
+
+/* Function to check a loop's structure to see if it is a valid candidate for
+ an MVE Tail Predicated Low-Overhead Loop. Returns the loop's VCTP_INSN if
+ it is valid, or NULL if it isn't. */
+
+static rtx_insn*
+arm_mve_loop_valid_for_dlstp (basic_block body)
+{
+ /* Doloop can only be done "elementwise" with predicated dlstp/letp if it
+ contains a VCTP on the number of elements processed by the loop.
+ Find the VCTP predicate generation inside the loop body BB. */
+ rtx_insn *vctp_insn = arm_mve_get_loop_vctp (body);
+ if (!vctp_insn)
+ return NULL;
+
+ /* There are only two types of loops that can be turned into dlstp/letp
+ loops:
+ A) Loops of the form:
+ while (num_of_elem > 0)
+ {
+ p = vctp<size> (num_of_elem)
+ n -= num_of_lanes;
+ }
+ B) Loops of the form:
+ int num_of_iters = (num_of_elem + num_of_lanes - 1) / num_of_lanes
+ for (i = 0; i < num_of_iters; i++)
+ {
+ p = vctp<size> (num_of_elem)
+ n -= num_of_lanes;
+ }
+
+ Then, depending on the type of loop above we need will need to do
+ different sets of checks. */
+ iv_analysis_loop_init (body->loop_father);
+
+ /* In order to find out if the loop is of type A or B above look for the
+ loop counter: it will either be incrementing by one per iteration or
+ it will be decrementing by num_of_lanes. We can find the loop counter
+ in the condition at the end of the loop. */
+ rtx_insn *loop_cond = prev_nonnote_nondebug_insn_bb (BB_END (body));
+ if (!(cc_register (XEXP (PATTERN (loop_cond), 0), VOIDmode)
+ && GET_CODE (XEXP (PATTERN (loop_cond), 1)) == COMPARE))
+ return NULL;
+
+ /* The operands in the condition: Try to identify which one is the
+ constant and which is the counter and run IV analysis on the latter. */
+ rtx cond_arg_1 = XEXP (XEXP (PATTERN (loop_cond), 1), 0);
+ rtx cond_arg_2 = XEXP (XEXP (PATTERN (loop_cond), 1), 1);
+
+ rtx loop_cond_constant;
+ rtx loop_counter;
+ class rtx_iv cond_counter_iv, cond_temp_iv;
+
+ if (CONST_INT_P (cond_arg_1))
+ {
+ /* cond_arg_1 is the constant and cond_arg_2 is the counter. */
+ loop_cond_constant = cond_arg_1;
+ loop_counter = cond_arg_2;
+ iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_2)),
+ cond_arg_2, &cond_counter_iv);
+ }
+ else if (CONST_INT_P (cond_arg_2))
+ {
+ /* cond_arg_2 is the constant and cond_arg_1 is the counter. */
+ loop_cond_constant = cond_arg_2;
+ loop_counter = cond_arg_1;
+ iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_1)),
+ cond_arg_1, &cond_counter_iv);
+ }
+ else if (REG_P (cond_arg_1) && REG_P (cond_arg_2))
+ {
+ /* If both operands to the compare are REGs, we can safely
+ run IV analysis on both and then determine which is the
+ constant by looking at the step.
+ First assume cond_arg_1 is the counter. */
+ loop_counter = cond_arg_1;
+ loop_cond_constant = cond_arg_2;
+ iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_1)),
+ cond_arg_1, &cond_counter_iv);
+ iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_2)),
+ cond_arg_2, &cond_temp_iv);
+
+ /* Look at the steps and swap around the rtx's if needed. Error out if
+ one of them cannot be identified as constant. */
+ if (!CONST_INT_P (cond_counter_iv.step) || !CONST_INT_P (cond_temp_iv.step))
+ return NULL;
+ if (INTVAL (cond_counter_iv.step) != 0 && INTVAL (cond_temp_iv.step) != 0)
+ return NULL;
+ if (INTVAL (cond_counter_iv.step) == 0 && INTVAL (cond_temp_iv.step) != 0)
+ {
+ loop_counter = cond_arg_2;
+ loop_cond_constant = cond_arg_1;
+ cond_counter_iv = cond_temp_iv;
+ }
+ }
+ else
+ return NULL;
+
+ if (!REG_P (loop_counter))
+ return NULL;
+ if (!(REG_P (loop_cond_constant) || CONST_INT_P (loop_cond_constant)))
+ return NULL;
+
+ /* Now we have extracted the IV step of the loop counter, call the
+ appropriate checking function. */
+ if (INTVAL (cond_counter_iv.step) > 0)
+ return arm_mve_dlstp_check_inc_counter (body, vctp_insn,
+ loop_cond_constant, loop_counter);
+ else if (INTVAL (cond_counter_iv.step) < 0)
+ return arm_mve_dlstp_check_dec_counter (body, vctp_insn,
+ loop_cond_constant, loop_counter);
+ else
+ return NULL;
+}
+
+/* Predict whether the given loop in gimple will be transformed in the RTL
+ doloop_optimize pass. It could be argued that turning large enough loops
+ into low-overhead loops would not show a signficant performance boost.
+ Howeer, in the case of tail predication we would still avoid using VPT/VPST
+ instructions inside the loop, and in either case using low-overhead loops
+ would not be detrimental, so we decided to not consider size, avoiding the
+ need of a heuristic to determine what an appropriate size boundary is. */
+
+static bool
+arm_predict_doloop_p (struct loop *loop)
+{
+ gcc_assert (loop);
+ /* On arm, targetm.can_use_doloop_p is actually
+ can_use_doloop_if_innermost. Ensure the loop is innermost,
+ it is valid and as per arm_target_bb_ok_for_lob and the
+ correct architecture flags are enabled. */
+ if (!(TARGET_HAVE_LOB && optimize > 0))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Predict doloop failure due to"
+ " target architecture or optimisation flags.\n");
+ return false;
+ }
+ else if (loop->inner != NULL)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Predict doloop failure due to"
+ " loop nesting.\n");
+ return false;
+ }
+ else if (!arm_target_bb_ok_for_lob (loop->header->next_bb))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Predict doloop failure due to"
+ " loop bb complexity.\n");
+ return false;
+ }
+
+ return true;
+}
+
+/* Implement targetm.loop_unroll_adjust. Use this to block unrolling of loops
+ that may later be turned into MVE Tail Predicated Low Overhead Loops. The
+ performance benefit of an MVE LoL is likely to be much higher than that of
+ the unrolling. */
+
+unsigned
+arm_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
+{
+ if (TARGET_HAVE_MVE
+ && arm_target_bb_ok_for_lob (loop->latch)
+ && arm_mve_loop_valid_for_dlstp (loop->header))
+ return 0;
+ else
+ return nunroll;
+}
+
+/* Function to hadle emitting a VPT-unpredicated version of a VPT-predicated
+ insn to a sequence. */
+
+static bool
+arm_emit_mve_unpredicated_insn_to_seq (rtx_insn* insn)
+{
+ rtx insn_vpr_reg_operand = arm_get_required_vpr_reg_param (insn);
+ int new_icode = get_attr_mve_unpredicated_insn (insn);
+ if (!in_sequence_p ()
+ || !MVE_VPT_PREDICATED_INSN_P (insn)
+ || (!insn_vpr_reg_operand)
+ || (!new_icode))
+ return false;
+
+ extract_insn (insn);
+ rtx arr[8];
+ int j = 0;
+
+ /* When transforming a VPT-predicated instruction
+ into its unpredicated equivalent we need to drop
+ the VPR operand and we may need to also drop a
+ merge "vuninit" input operand, depending on the
+ instruction pattern. Here ensure that we have at
+ most a two-operand difference between the two
+ instrunctions. */
+ int n_operands_diff
+ = recog_data.n_operands - insn_data[new_icode].n_operands;
+ if (!(n_operands_diff > 0 && n_operands_diff <= 2))
+ return false;
+
+ /* Then, loop through the operands of the predicated
+ instruction, and retain the ones that map to the
+ unpredicated instruction. */
+ for (int i = 0; i < recog_data.n_operands; i++)
+ {
+ /* Ignore the VPR and, if needed, the vuninit
+ operand. */
+ if (insn_vpr_reg_operand == recog_data.operand[i]
+ || (n_operands_diff == 2
+ && !strcmp (recog_data.constraints[i], "0")))
+ continue;
+ else
+ {
+ arr[j] = recog_data.operand[i];
+ j++;
+ }
+ }
+
+ /* Finally, emit the upredicated instruction. */
+ rtx_insn *new_insn;
+ switch (j)
+ {
+ case 1:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0]));
+ break;
+ case 2:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1]));
+ break;
+ case 3:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2]));
+ break;
+ case 4:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
+ arr[3]));
+ break;
+ case 5:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
+ arr[3], arr[4]));
+ break;
+ case 6:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
+ arr[3], arr[4], arr[5]));
+ break;
+ case 7:
+ new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
+ arr[3], arr[4], arr[5],
+ arr[6]));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
+ return true;
+}
+
+/* When a vctp insn is used, its out is often followed by
+ a zero-extend insn to SImode, which is then SUBREG'd into a
+ vector form of mode VALID_MVE_PRED_MODE: this vector form is
+ what is then used as an input to the instructions within the
+ loop. Hence, store that vector form of the VPR reg into
+ vctp_vpr_generated, so that we can match it with instructions
+ in the loop to determine if they are predicated on this same
+ VPR. If there is no zero-extend and subreg or it is otherwise
+ invalid, then return NULL to cancel the dlstp transform. */
+
+static rtx
+arm_mve_get_vctp_vec_form (rtx_insn *insn)
+{
+ rtx vctp_vpr_generated = NULL_RTX;
+ rtx_insn *next_use1 = NULL;
+ df_ref use;
+ for (use
+ = DF_REG_USE_CHAIN
+ (DF_REF_REGNO (DF_INSN_INFO_DEFS (DF_INSN_INFO_GET (insn))));
+ use; use = DF_REF_NEXT_REG (use))
+ if (!next_use1 && NONDEBUG_INSN_P (DF_REF_INSN (use)))
+ next_use1 = DF_REF_INSN (use);
+
+ rtx next_use1_set = single_set (next_use1);
+ if (next_use1_set
+ && GET_CODE (SET_SRC (next_use1_set)) == ZERO_EXTEND)
+ {
+ rtx_insn *next_use2 = NULL;
+ for (use
+ = DF_REG_USE_CHAIN
+ (DF_REF_REGNO
+ (DF_INSN_INFO_DEFS (DF_INSN_INFO_GET (next_use1))));
+ use; use = DF_REF_NEXT_REG (use))
+ if (!next_use2 && NONDEBUG_INSN_P (DF_REF_INSN (use)))
+ next_use2 = DF_REF_INSN (use);
+
+ rtx next_use2_set = single_set (next_use2);
+ if (next_use2_set
+ && GET_CODE (SET_SRC (next_use2_set)) == SUBREG)
+ vctp_vpr_generated = SET_DEST (next_use2_set);
+ }
+
+ if (!vctp_vpr_generated || !REG_P (vctp_vpr_generated)
+ || !VALID_MVE_PRED_MODE (GET_MODE (vctp_vpr_generated)))
+ return NULL_RTX;
+
+ return vctp_vpr_generated;
+}
+
+/* Attempt to transform the loop contents of loop basic block from VPT
+ predicated insns into unpredicated insns for a dlstp/letp loop. Returns
+ rtx constant value to decrement from the total number of elements. Return
+ (const_int 1) if we can't use tail predication and fallback to scalar
+ low-overhead loops. */
+
+rtx
+arm_attempt_dlstp_transform (rtx label)
+{
+ basic_block body = BLOCK_FOR_INSN (label)->prev_bb;
+
+ /* Ensure that the bb is within a loop that has all required metadata. */
+ if (!body->loop_father || !body->loop_father->header
+ || !body->loop_father->simple_loop_desc)
+ return const1_rtx;
+
+ rtx_insn *vctp_insn = arm_mve_loop_valid_for_dlstp (body);
+ if (!vctp_insn)
+ return const1_rtx;
+
+ gcc_assert (single_set (vctp_insn));
+
+ rtx vctp_vpr_generated = arm_mve_get_vctp_vec_form (vctp_insn);
+ if (!vctp_vpr_generated)
+ return const1_rtx;
+
+ /* decrementunum is already known to be valid at this point. */
+ int decrementnum = arm_mve_get_vctp_lanes (vctp_insn);
+
+ rtx_insn *insn = 0;
+ rtx_insn *cur_insn = 0;
+ rtx_insn *seq;
+ hash_map <rtx_insn *, bool> *safe_insn_map
+ = new hash_map <rtx_insn *, bool>;
+
+ /* Scan through the insns in the loop bb and emit the transformed bb
+ insns to a sequence. */
+ start_sequence ();
+ FOR_BB_INSNS (body, insn)
+ {
+ if (GET_CODE (insn) == CODE_LABEL || NOTE_INSN_BASIC_BLOCK_P (insn))
+ continue;
+ else if (NOTE_P (insn))
+ emit_note ((enum insn_note)NOTE_KIND (insn));
+ else if (DEBUG_INSN_P (insn))
+ emit_debug_insn (PATTERN (insn));
+ else if (!INSN_P (insn))
+ {
+ end_sequence ();
+ return const1_rtx;
+ }
+ /* When we find the vctp instruction: continue. */
+ else if (insn == vctp_insn)
+ continue;
+ /* If the insn pattern requires the use of the VPR value from the
+ vctp as an input parameter for predication. */
+ else if (arm_mve_vec_insn_is_predicated_with_this_predicate
+ (insn, vctp_vpr_generated))
+ {
+ bool success = arm_emit_mve_unpredicated_insn_to_seq (insn);
+ if (!success)
+ {
+ end_sequence ();
+ return const1_rtx;
+ }
+ }
+ /* If the insn isn't VPT predicated on vctp_vpr_generated, we need to
+ make sure that it is still valid within the dlstp/letp loop. */
+ else
+ {
+ /* If this instruction USE-s the vctp_vpr_generated other than for
+ predication, this blocks the transformation as we are not allowed
+ to optimise the VPR value away. */
+ df_ref insn_uses = NULL;
+ FOR_EACH_INSN_USE (insn_uses, insn)
+ {
+ if (rtx_equal_p (vctp_vpr_generated, DF_REF_REG (insn_uses)))
+ {
+ end_sequence ();
+ return const1_rtx;
+ }
+ }
+ /* If within the loop we have an MVE vector instruction that is
+ unpredicated, the dlstp/letp looping will add implicit
+ predication to it. This will result in a change in behaviour
+ of the instruction, so we need to find out if any instructions
+ that feed into the current instruction were implicitly
+ predicated. */
+ if (arm_mve_check_df_chain_back_for_implic_predic
+ (safe_insn_map, insn, vctp_vpr_generated))
+ {
+ if (arm_mve_check_df_chain_fwd_for_implic_predic_impact
+ (insn, vctp_vpr_generated))
+ {
+ end_sequence ();
+ return const1_rtx;
+ }
+ }
+ emit_insn (PATTERN (insn));
+ }
+ }
+ seq = get_insns ();
+ end_sequence ();
+
+ /* Re-write the entire BB contents with the transformed
+ sequence. */
+ FOR_BB_INSNS_SAFE (body, insn, cur_insn)
+ if (!(GET_CODE (insn) == CODE_LABEL || NOTE_INSN_BASIC_BLOCK_P (insn)))
+ delete_insn (insn);
+
+ emit_insn_after (seq, BB_END (body));
+
+ /* The transformation has succeeded, so now modify the "count"
+ (a.k.a. niter_expr) for the middle-end. Also set noloop_assumptions
+ to NULL to stop the middle-end from making assumptions about the
+ number of iterations. */
+ simple_loop_desc (body->loop_father)->niter_expr
+ = XVECEXP (SET_SRC (PATTERN (vctp_insn)), 0, 0);
+ simple_loop_desc (body->loop_father)->noloop_assumptions = NULL_RTX;
+ return GEN_INT (decrementnum);
}
#if CHECKING_P
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 5ea2d9e866891bdb3dc73fcf6cbd6cdd2f989951..9398702cddd076a7eacf1ca6eac6c5a1fbd9a3d0 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -2673,6 +2673,17 @@ (define_int_iterator MRRCI [VUNSPEC_MRRC VUNSPEC_MRRC2])
(define_int_attr mrrc [(VUNSPEC_MRRC "mrrc") (VUNSPEC_MRRC2 "mrrc2")])
(define_int_attr MRRC [(VUNSPEC_MRRC "MRRC") (VUNSPEC_MRRC2 "MRRC2")])
+(define_int_attr dlstp_elemsize [(DLSTP8 "8") (DLSTP16 "16") (DLSTP32 "32")
+ (DLSTP64 "64")])
+
+(define_int_attr letp_num_lanes [(LETP8 "16") (LETP16 "8") (LETP32 "4")
+ (LETP64 "2")])
+(define_int_attr letp_num_lanes_neg [(LETP8 "-16") (LETP16 "-8") (LETP32 "-4")
+ (LETP64 "-2")])
+
+(define_int_attr letp_num_lanes_minus_1 [(LETP8 "15") (LETP16 "7") (LETP32 "3")
+ (LETP64 "1")])
+
(define_int_attr opsuffix [(UNSPEC_DOT_S "s8")
(UNSPEC_DOT_U "u8")
(UNSPEC_DOT_US "s8")
@@ -2916,6 +2927,10 @@ (define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
(define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
(define_int_iterator VQSHLUQ_M_N [VQSHLUQ_M_N_S])
(define_int_iterator VQSHLUQ_N [VQSHLUQ_N_S])
+(define_int_iterator DLSTP [DLSTP8 DLSTP16 DLSTP32
+ DLSTP64])
+(define_int_iterator LETP [LETP8 LETP16 LETP32
+ LETP64])
;; Define iterators for VCMLA operations
(define_int_iterator VCMLA_OP [UNSPEC_VCMLA
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index b1862d7977e91605cd971e634105bed3fa6e75cb..5748e2333eb3a88d659892f2bcc72849bcf388b5 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -6918,3 +6918,41 @@ (define_expand "@arm_mve_reinterpret<mode>"
}
}
)
+
+;; Originally expanded by 'predicated_doloop_end'.
+;; In the rare situation where the branch is too far, we do also need to
+;; revert FPSCR.LTPSIZE back to 0x100 after the last iteration.
+(define_insn "predicated_doloop_end_internal<letp_num_lanes>"
+ [(set (pc)
+ (if_then_else
+ (gtu (unspec:SI [(plus:SI (match_operand:SI 0 "s_register_operand" "=r")
+ (const_int <letp_num_lanes_neg>))]
+ LETP)
+ (const_int <letp_num_lanes_minus_1>))
+ (match_operand 1 "" "")
+ (pc)))
+ (set (match_dup 0)
+ (plus:SI (match_dup 0) (const_int <letp_num_lanes_neg>)))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_HAVE_MVE"
+ {
+ if (get_attr_length (insn) == 4)
+ return "letp\t%|lr, %l1";
+ else
+ return "subs\t%|lr, #<letp_num_lanes>\n\tbhi\t%l1\n\tlctp";
+ }
+ [(set (attr "length")
+ (if_then_else
+ (ltu (minus (pc) (match_dup 1)) (const_int 1024))
+ (const_int 4)
+ (const_int 6)))
+ (set_attr "type" "branch")])
+
+(define_insn "dlstp<dlstp_elemsize>_insn"
+ [
+ (set (reg:SI LR_REGNUM)
+ (unspec:SI [(match_operand:SI 0 "s_register_operand" "r")]
+ DLSTP))
+ ]
+ "TARGET_HAVE_MVE"
+ "dlstp.<dlstp_elemsize>\t%|lr, %0")
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index e1e013befa7a67ddbf517bf22797bdaeeb96b94f..f2801cea36a34d326fd6f3a213e0e149c3e0784f 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1613,7 +1613,7 @@ (define_expand "doloop_end"
(use (match_operand 1 "" ""))] ; label
"TARGET_32BIT"
"
- {
+{
/* Currently SMS relies on the do-loop pattern to recognize loops
where (1) the control part consists of all insns defining and/or
using a certain 'count' register and (2) the loop count can be
@@ -1623,41 +1623,77 @@ (define_expand "doloop_end"
Also used to implement the low over head loops feature, which is part of
the Armv8.1-M Mainline Low Overhead Branch (LOB) extension. */
- if (optimize > 0 && (flag_modulo_sched || TARGET_HAVE_LOB))
- {
- rtx s0;
- rtx bcomp;
- rtx loc_ref;
- rtx cc_reg;
- rtx insn;
- rtx cmp;
-
- if (GET_MODE (operands[0]) != SImode)
- FAIL;
-
- s0 = operands [0];
-
- /* Low over head loop instructions require the first operand to be LR. */
- if (TARGET_HAVE_LOB && arm_target_insn_ok_for_lob (operands [1]))
- s0 = gen_rtx_REG (SImode, LR_REGNUM);
-
- if (TARGET_THUMB2)
- insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1)));
- else
- insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1)));
-
- cmp = XVECEXP (PATTERN (insn), 0, 0);
- cc_reg = SET_DEST (cmp);
- bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
- loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]);
- emit_jump_insn (gen_rtx_SET (pc_rtx,
- gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
- loc_ref, pc_rtx)));
- DONE;
- }
- else
- FAIL;
- }")
+ if (optimize > 0 && (flag_modulo_sched || TARGET_HAVE_LOB))
+ {
+ rtx s0;
+ rtx bcomp;
+ rtx loc_ref;
+ rtx cc_reg;
+ rtx insn;
+ rtx cmp;
+ rtx decrement_num;
+
+ if (GET_MODE (operands[0]) != SImode)
+ FAIL;
+
+ s0 = operands[0];
+
+ if (TARGET_HAVE_LOB && arm_target_bb_ok_for_lob (BLOCK_FOR_INSN (operands[1])))
+ {
+ s0 = gen_rtx_REG (SImode, LR_REGNUM);
+
+ /* If we have a compatible MVE target, try and analyse the loop
+ contents to determine if we can use predicated dlstp/letp
+ looping. */
+ if (TARGET_HAVE_MVE
+ && (decrement_num = arm_attempt_dlstp_transform (operands[1]))
+ && (INTVAL (decrement_num) != 1))
+ {
+ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[1]);
+ switch (INTVAL (decrement_num))
+ {
+ case 2:
+ insn = emit_jump_insn (gen_predicated_doloop_end_internal2
+ (s0, loc_ref));
+ break;
+ case 4:
+ insn = emit_jump_insn (gen_predicated_doloop_end_internal4
+ (s0, loc_ref));
+ break;
+ case 8:
+ insn = emit_jump_insn (gen_predicated_doloop_end_internal8
+ (s0, loc_ref));
+ break;
+ case 16:
+ insn = emit_jump_insn (gen_predicated_doloop_end_internal16
+ (s0, loc_ref));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+ }
+ }
+
+ /* Otherwise, try standard decrement-by-one dls/le looping. */
+ if (TARGET_THUMB2)
+ insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0,
+ GEN_INT (-1)));
+ else
+ insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1)));
+
+ cmp = XVECEXP (PATTERN (insn), 0, 0);
+ cc_reg = SET_DEST (cmp);
+ bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
+ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[1]);
+ emit_jump_insn (gen_rtx_SET (pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+ loc_ref, pc_rtx)));
+ DONE;
+ }
+ else
+ FAIL;
+}")
(define_insn "*clear_apsr"
[(unspec_volatile:SI [(const_int 0)] VUNSPEC_CLRM_APSR)
@@ -1755,7 +1791,37 @@ (define_expand "doloop_begin"
{
if (REGNO (operands[0]) == LR_REGNUM)
{
- emit_insn (gen_dls_insn (operands[0]));
+ /* Pick out the number by which we are decrementing the loop counter
+ in every iteration. If it's > 1, then use dlstp. */
+ int const_int_dec_num
+ = abs (INTVAL (XEXP (XEXP (XVECEXP (PATTERN (operands[1]), 0, 1),
+ 1),
+ 1)));
+ switch (const_int_dec_num)
+ {
+ case 16:
+ emit_insn (gen_dlstp8_insn (operands[0]));
+ break;
+
+ case 8:
+ emit_insn (gen_dlstp16_insn (operands[0]));
+ break;
+
+ case 4:
+ emit_insn (gen_dlstp32_insn (operands[0]));
+ break;
+
+ case 2:
+ emit_insn (gen_dlstp64_insn (operands[0]));
+ break;
+
+ case 1:
+ emit_insn (gen_dls_insn (operands[0]));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
DONE;
}
else
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 4713ec840abae48ca70f418dbc0d4028ad4ad527..2d6f27c14f4a1e7db05b9684a8958a76a1c79ef2 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -583,6 +583,14 @@ (define_c_enum "unspec" [
VADDLVQ_U
VCTP
VCTP_M
+ DLSTP8
+ DLSTP16
+ DLSTP32
+ DLSTP64
+ LETP8
+ LETP16
+ LETP32
+ LETP64
VPNOT
VCREATEQ_F
VCVTQ_N_TO_F_S
diff --git a/gcc/df-core.cc b/gcc/df-core.cc
index d4812b04a7cb97ea1606082e26e910472da5bcc1..4fcc14bf790d43e792b3c926fe1f80073d908c17 100644
--- a/gcc/df-core.cc
+++ b/gcc/df-core.cc
@@ -1964,6 +1964,21 @@ df_bb_regno_last_def_find (basic_block bb, unsigned int regno)
return NULL;
}
+/* Return the one and only def of REGNO within BB. If there is no def or
+ there are multiple defs, return NULL. */
+
+df_ref
+df_bb_regno_only_def_find (basic_block bb, unsigned int regno)
+{
+ df_ref temp = df_bb_regno_first_def_find (bb, regno);
+ if (!temp)
+ return NULL;
+ else if (temp == df_bb_regno_last_def_find (bb, regno))
+ return temp;
+ else
+ return NULL;
+}
+
/* Finds the reference corresponding to the definition of REG in INSN.
DF is the dataflow object. */
diff --git a/gcc/df.h b/gcc/df.h
index 402657a7076f1bcad24e9c50682e033e57f432f9..98623637f9c839c799222e99df2a7173a770b2ac 100644
--- a/gcc/df.h
+++ b/gcc/df.h
@@ -987,6 +987,7 @@ extern void df_check_cfg_clean (void);
#endif
extern df_ref df_bb_regno_first_def_find (basic_block, unsigned int);
extern df_ref df_bb_regno_last_def_find (basic_block, unsigned int);
+extern df_ref df_bb_regno_only_def_find (basic_block, unsigned int);
extern df_ref df_find_def (rtx_insn *, rtx);
extern bool df_reg_defined (rtx_insn *, rtx);
extern df_ref df_find_use (rtx_insn *, rtx);
diff --git a/gcc/loop-doloop.cc b/gcc/loop-doloop.cc
index 4feb0a25ab9331b7124df900f73c9fc6fb3eb10b..d919207505c472c8a54a2c9c982a09061584177b 100644
--- a/gcc/loop-doloop.cc
+++ b/gcc/loop-doloop.cc
@@ -85,10 +85,10 @@ doloop_condition_get (rtx_insn *doloop_pat)
forms:
1) (parallel [(set (pc) (if_then_else (condition)
- (label_ref (label))
- (pc)))
- (set (reg) (plus (reg) (const_int -1)))
- (additional clobbers and uses)])
+ (label_ref (label))
+ (pc)))
+ (set (reg) (plus (reg) (const_int -1)))
+ (additional clobbers and uses)])
The branch must be the first entry of the parallel (also required
by jump.cc), and the second entry of the parallel must be a set of
@@ -96,19 +96,34 @@ doloop_condition_get (rtx_insn *doloop_pat)
the loop counter in an if_then_else too.
2) (set (reg) (plus (reg) (const_int -1))
- (set (pc) (if_then_else (reg != 0)
- (label_ref (label))
- (pc))).
+ (set (pc) (if_then_else (reg != 0)
+ (label_ref (label))
+ (pc))).
Some targets (ARM) do the comparison before the branch, as in the
following form:
- 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
- (set (reg) (plus (reg) (const_int -1)))])
- (set (pc) (if_then_else (cc == NE)
- (label_ref (label))
- (pc))) */
-
+ 3) (parallel [(set (cc) (compare (plus (reg) (const_int -1)) 0))
+ (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (cc == NE)
+ (label_ref (label))
+ (pc)))
+
+ The ARM target also supports a special case of a counter that decrements
+ by `n` and terminating in a GTU condition. In that case, the compare and
+ branch are all part of one insn, containing an UNSPEC:
+
+ 4) (parallel [
+ (set (pc)
+ (if_then_else (gtu (unspec:SI [(plus:SI (reg:SI 14 lr)
+ (const_int -n))])
+ (const_int n-1]))
+ (label_ref)
+ (pc)))
+ (set (reg:SI 14 lr)
+ (plus:SI (reg:SI 14 lr)
+ (const_int -n)))
+ */
pattern = PATTERN (doloop_pat);
if (GET_CODE (pattern) != PARALLEL)
@@ -143,7 +158,7 @@ doloop_condition_get (rtx_insn *doloop_pat)
|| GET_CODE (cmp_arg1) != PLUS)
return 0;
reg_orig = XEXP (cmp_arg1, 0);
- if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
+ if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
|| !REG_P (reg_orig))
return 0;
cc_reg = SET_DEST (cmp_orig);
@@ -173,15 +188,17 @@ doloop_condition_get (rtx_insn *doloop_pat)
if (! REG_P (reg))
return 0;
- /* Check if something = (plus (reg) (const_int -1)).
+ /* Check if something = (plus (reg) (const_int -n)).
On IA-64, this decrement is wrapped in an if_then_else. */
inc_src = SET_SRC (inc);
if (GET_CODE (inc_src) == IF_THEN_ELSE)
inc_src = XEXP (inc_src, 1);
if (GET_CODE (inc_src) != PLUS
|| XEXP (inc_src, 0) != reg
- || XEXP (inc_src, 1) != constm1_rtx)
+ || !CONST_INT_P (XEXP (inc_src, 1))
+ || INTVAL (XEXP (inc_src, 1)) >= 0)
return 0;
+ int dec_num = abs (INTVAL (XEXP (inc_src, 1)));
/* Check for (set (pc) (if_then_else (condition)
(label_ref (label))
@@ -196,60 +213,71 @@ doloop_condition_get (rtx_insn *doloop_pat)
/* Extract loop termination condition. */
condition = XEXP (SET_SRC (cmp), 0);
- /* We expect a GE or NE comparison with 0 or 1. */
- if ((GET_CODE (condition) != GE
- && GET_CODE (condition) != NE)
- || (XEXP (condition, 1) != const0_rtx
- && XEXP (condition, 1) != const1_rtx))
+ /* We expect a GE or NE comparison with 0 or 1, or a GTU comparison with
+ dec_num - 1. */
+ if (!((GET_CODE (condition) == GE
+ || GET_CODE (condition) == NE)
+ && (XEXP (condition, 1) == const0_rtx
+ || XEXP (condition, 1) == const1_rtx ))
+ &&!(GET_CODE (condition) == GTU
+ && ((INTVAL (XEXP (condition, 1))) == (dec_num - 1))))
return 0;
- if ((XEXP (condition, 0) == reg)
+ /* For the ARM special case of having a GTU: re-form the condition without
+ the unspec for the benefit of the middle-end. */
+ if (GET_CODE (condition) == GTU)
+ {
+ condition = gen_rtx_fmt_ee (GTU, VOIDmode, inc_src,
+ GEN_INT (dec_num - 1));
+ return condition;
+ }
+ else if ((XEXP (condition, 0) == reg)
/* For the third case: */
|| ((cc_reg != NULL_RTX)
&& (XEXP (condition, 0) == cc_reg)
&& (reg_orig == reg))
|| (GET_CODE (XEXP (condition, 0)) == PLUS
&& XEXP (XEXP (condition, 0), 0) == reg))
- {
+ {
if (GET_CODE (pattern) != PARALLEL)
/* For the second form we expect:
- (set (reg) (plus (reg) (const_int -1))
- (set (pc) (if_then_else (reg != 0)
- (label_ref (label))
- (pc))).
+ (set (reg) (plus (reg) (const_int -1))
+ (set (pc) (if_then_else (reg != 0)
+ (label_ref (label))
+ (pc))).
- is equivalent to the following:
+ is equivalent to the following:
- (parallel [(set (pc) (if_then_else (reg != 1)
- (label_ref (label))
- (pc)))
- (set (reg) (plus (reg) (const_int -1)))
- (additional clobbers and uses)])
+ (parallel [(set (pc) (if_then_else (reg != 1)
+ (label_ref (label))
+ (pc)))
+ (set (reg) (plus (reg) (const_int -1)))
+ (additional clobbers and uses)])
- For the third form we expect:
+ For the third form we expect:
- (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
- (set (reg) (plus (reg) (const_int -1)))])
- (set (pc) (if_then_else (cc == NE)
- (label_ref (label))
- (pc)))
+ (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
+ (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (cc == NE)
+ (label_ref (label))
+ (pc)))
- which is equivalent to the following:
+ which is equivalent to the following:
- (parallel [(set (cc) (compare (reg, 1))
- (set (reg) (plus (reg) (const_int -1)))
- (set (pc) (if_then_else (NE == cc)
- (label_ref (label))
- (pc))))])
+ (parallel [(set (cc) (compare (reg, 1))
+ (set (reg) (plus (reg) (const_int -1)))
+ (set (pc) (if_then_else (NE == cc)
+ (label_ref (label))
+ (pc))))])
- So we return the second form instead for the two cases.
+ So we return the second form instead for the two cases.
*/
- condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
+ condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
return condition;
- }
+ }
/* ??? If a machine uses a funny comparison, we could return a
canonicalized form here. */
@@ -507,6 +535,11 @@ doloop_modify (class loop *loop, class niter_desc *desc,
nonneg = 1;
break;
+ case GTU:
+ /* The iteration count does not need incrementing for a GTU test. */
+ increment_count = false;
+ break;
+
/* Abort if an invalid doloop pattern has been generated. */
default:
gcc_unreachable ();
@@ -529,6 +562,10 @@ doloop_modify (class loop *loop, class niter_desc *desc,
if (desc->noloop_assumptions)
{
+ /* The GTU case has only been implemented for the ARM target, where
+ noloop_assumptions gets explicitly set to NULL for that case, so
+ assert here for safety. */
+ gcc_assert (GET_CODE (condition) != GTU);
rtx ass = copy_rtx (desc->noloop_assumptions);
basic_block preheader = loop_preheader_edge (loop)->src;
basic_block set_zero = split_edge (loop_preheader_edge (loop));
@@ -642,7 +679,7 @@ doloop_optimize (class loop *loop)
{
scalar_int_mode mode;
rtx doloop_reg;
- rtx count;
+ rtx count = NULL_RTX;
widest_int iterations, iterations_max;
rtx_code_label *start_label;
rtx condition;
@@ -685,17 +722,6 @@ doloop_optimize (class loop *loop)
return false;
}
- max_cost
- = COSTS_N_INSNS (param_max_iterations_computation_cost);
- if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop))
- > max_cost)
- {
- if (dump_file)
- fprintf (dump_file,
- "Doloop: number of iterations too costly to compute.\n");
- return false;
- }
-
if (desc->const_iter)
iterations = widest_int::from (rtx_mode_t (desc->niter_expr, mode),
UNSIGNED);
@@ -716,12 +742,25 @@ doloop_optimize (class loop *loop)
/* Generate looping insn. If the pattern FAILs then give up trying
to modify the loop since there is some aspect the back-end does
- not like. */
- count = copy_rtx (desc->niter_expr);
+ not like. If this succeeds, there is a chance that the loop
+ desc->niter_expr has been altered by the backend, so only extract
+ that data after the gen_doloop_end. */
start_label = block_label (desc->in_edge->dest);
doloop_reg = gen_reg_rtx (mode);
rtx_insn *doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
+ max_cost
+ = COSTS_N_INSNS (param_max_iterations_computation_cost);
+ if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop))
+ > max_cost)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "Doloop: number of iterations too costly to compute.\n");
+ return false;
+ }
+
+ count = copy_rtx (desc->niter_expr);
word_mode_size = GET_MODE_PRECISION (word_mode);
word_mode_max = (HOST_WIDE_INT_1U << (word_mode_size - 1) << 1) - 1;
if (! doloop_seq
diff --git a/gcc/testsuite/gcc.target/arm/lob.h b/gcc/testsuite/gcc.target/arm/lob.h
index feaae7cc89959b3147368980120700bbc3e85ecb..3941fe7a8b620e62a5f742722be1ba2d031f5a8d 100644
--- a/gcc/testsuite/gcc.target/arm/lob.h
+++ b/gcc/testsuite/gcc.target/arm/lob.h
@@ -1,15 +1,131 @@
#include <string.h>
-
+#include <stdint.h>
/* Common code for lob tests. */
#define NO_LOB asm volatile ("@ clobber lr" : : : "lr" )
-#define N 10000
+#define N 100
+
+static void
+reset_data (int *a, int *b, int *c, int x)
+{
+ memset (a, -1, x * sizeof (*a));
+ memset (b, -1, x * sizeof (*b));
+ memset (c, 0, x * sizeof (*c));
+}
+
+static void
+reset_data8 (int8_t *a, int8_t *b, int8_t *c, int x)
+{
+ memset (a, -1, x * sizeof (*a));
+ memset (b, -1, x * sizeof (*b));
+ memset (c, 0, x * sizeof (*c));
+}
+
+static void
+reset_data16 (int16_t *a, int16_t *b, int16_t *c, int x)
+{
+ memset (a, -1, x * sizeof (*a));
+ memset (b, -1, x * sizeof (*b));
+ memset (c, 0, x * sizeof (*c));
+}
+
+static void
+reset_data32 (int32_t *a, int32_t *b, int32_t *c, int x)
+{
+ memset (a, -1, x * sizeof (*a));
+ memset (b, -1, x * sizeof (*b));
+ memset (c, 0, x * sizeof (*c));
+}
+
+static void
+reset_data64 (int64_t *a, int64_t *c, int x)
+{
+ memset (a, -1, x * sizeof (*a));
+ memset (c, 0, x * sizeof (*c));
+}
+
+static void
+check_plus (int *a, int *b, int *c, int x)
+{
+ for (int i = 0; i < N; i++)
+ {
+ NO_LOB;
+ if (i < x)
+ {
+ if (c[i] != (a[i] + b[i])) abort ();
+ }
+ else
+ {
+ if (c[i] != 0) abort ();
+ }
+ }
+}
+
+static void
+check_plus8 (int8_t *a, int8_t *b, int8_t *c, int x)
+{
+ for (int i = 0; i < N; i++)
+ {
+ NO_LOB;
+ if (i < x)
+ {
+ if (c[i] != (a[i] + b[i])) abort ();
+ }
+ else
+ {
+ if (c[i] != 0) abort ();
+ }
+ }
+}
+
+static void
+check_plus16 (int16_t *a, int16_t *b, int16_t *c, int x)
+{
+ for (int i = 0; i < N; i++)
+ {
+ NO_LOB;
+ if (i < x)
+ {
+ if (c[i] != (a[i] + b[i])) abort ();
+ }
+ else
+ {
+ if (c[i] != 0) abort ();
+ }
+ }
+}
+
+static void
+check_plus32 (int32_t *a, int32_t *b, int32_t *c, int x)
+{
+ for (int i = 0; i < N; i++)
+ {
+ NO_LOB;
+ if (i < x)
+ {
+ if (c[i] != (a[i] + b[i])) abort ();
+ }
+ else
+ {
+ if (c[i] != 0) abort ();
+ }
+ }
+}
static void
-reset_data (int *a, int *b, int *c)
+check_memcpy64 (int64_t *a, int64_t *c, int x)
{
- memset (a, -1, N * sizeof (*a));
- memset (b, -1, N * sizeof (*b));
- memset (c, -1, N * sizeof (*c));
+ for (int i = 0; i < N; i++)
+ {
+ NO_LOB;
+ if (i < x)
+ {
+ if (c[i] != a[i]) abort ();
+ }
+ else
+ {
+ if (c[i] != 0) abort ();
+ }
+ }
}
diff --git a/gcc/testsuite/gcc.target/arm/lob1.c b/gcc/testsuite/gcc.target/arm/lob1.c
index ba5c82cd55c582c96a18ad417a3041e43d843613..c8ce653a5c39fb1ffcf82a6e584d9a0467a130c0 100644
--- a/gcc/testsuite/gcc.target/arm/lob1.c
+++ b/gcc/testsuite/gcc.target/arm/lob1.c
@@ -54,29 +54,18 @@ loop3 (int *a, int *b, int *c)
} while (i < N);
}
-void
-check (int *a, int *b, int *c)
-{
- for (int i = 0; i < N; i++)
- {
- NO_LOB;
- if (c[i] != a[i] + b[i])
- abort ();
- }
-}
-
int
main (void)
{
- reset_data (a, b, c);
+ reset_data (a, b, c, N);
loop1 (a, b ,c);
- check (a, b ,c);
- reset_data (a, b, c);
+ check_plus (a, b, c, N);
+ reset_data (a, b, c, N);
loop2 (a, b ,c);
- check (a, b ,c);
- reset_data (a, b, c);
+ check_plus (a, b, c, N);
+ reset_data (a, b, c, N);
loop3 (a, b ,c);
- check (a, b ,c);
+ check_plus (a, b, c, N);
return 0;
}
diff --git a/gcc/testsuite/gcc.target/arm/lob6.c b/gcc/testsuite/gcc.target/arm/lob6.c
index 17b6124295e8ae9e1cb57e41fa43a954b3390eec..4fe116e2c2be3748d1bb6da7bb9092db8f962abc 100644
--- a/gcc/testsuite/gcc.target/arm/lob6.c
+++ b/gcc/testsuite/gcc.target/arm/lob6.c
@@ -79,14 +79,14 @@ check (void)
int
main (void)
{
- reset_data (a1, b1, c1);
- reset_data (a2, b2, c2);
+ reset_data (a1, b1, c1, N);
+ reset_data (a2, b2, c2, N);
loop1 (a1, b1, c1);
ref1 (a2, b2, c2);
check ();
- reset_data (a1, b1, c1);
- reset_data (a2, b2, c2);
+ reset_data (a1, b1, c1, N);
+ reset_data (a2, b2, c2, N);
loop2 (a1, b1, c1);
ref2 (a2, b2, c2);
check ();
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm.c
new file mode 100644
index 0000000000000000000000000000000000000000..5ddd994e53d55c7b4d05bfb858e6078ce7da4ce4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm.c
@@ -0,0 +1,561 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-options "-O3 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include <arm_mve.h>
+
+#define IMM 5
+
+#define TEST_COMPILE_IN_DLSTP_TERNARY(BITS, LANES, LDRSTRYTPE, TYPE, SIGN, NAME, PRED) \
+void test_##NAME##PRED##_##SIGN##BITS (TYPE##BITS##_t *a, TYPE##BITS##_t *b, TYPE##BITS##_t *c, int n) \
+{ \
+ while (n > 0) \
+ { \
+ mve_pred16_t p = vctp##BITS##q (n); \
+ TYPE##BITS##x##LANES##_t va = vldr##LDRSTRYTPE##q_z_##SIGN##BITS (a, p); \
+ TYPE##BITS##x##LANES##_t vb = vldr##LDRSTRYTPE##q_z_##SIGN##BITS (b, p); \
+ TYPE##BITS##x##LANES##_t vc = NAME##PRED##_##SIGN##BITS (va, vb, p); \
+ vstr##LDRSTRYTPE##q_p_##SIGN##BITS (c, vc, p); \
+ c += LANES; \
+ a += LANES; \
+ b += LANES; \
+ n -= LANES; \
+ } \
+}
+
+#define TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY(BITS, LANES, LDRSTRYTPE, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY (BITS, LANES, LDRSTRYTPE, int, s, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY (BITS, LANES, LDRSTRYTPE, uint, u, NAME, PRED)
+
+#define TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY(NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY (8, 16, b, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY (16, 8, h, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY (32, 4, w, NAME, PRED)
+
+
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY (vaddq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY (vmulq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY (vsubq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY (vhaddq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY (vorrq, _x)
+
+
+#define TEST_COMPILE_IN_DLSTP_TERNARY_M(BITS, LANES, LDRSTRYTPE, TYPE, SIGN, NAME, PRED) \
+void test_##NAME##PRED##_##SIGN##BITS (TYPE##BITS##x##LANES##_t __inactive, TYPE##BITS##_t *a, TYPE##BITS##_t *b, TYPE##BITS##_t *c, int n) \
+{ \
+ while (n > 0) \
+ { \
+ mve_pred16_t p = vctp##BITS##q (n); \
+ TYPE##BITS##x##LANES##_t va = vldr##LDRSTRYTPE##q_z_##SIGN##BITS (a, p); \
+ TYPE##BITS##x##LANES##_t vb = vldr##LDRSTRYTPE##q_z_##SIGN##BITS (b, p); \
+ TYPE##BITS##x##LANES##_t vc = NAME##PRED##_##SIGN##BITS (__inactive, va, vb, p); \
+ vstr##LDRSTRYTPE##q_p_##SIGN##BITS (c, vc, p); \
+ c += LANES; \
+ a += LANES; \
+ b += LANES; \
+ n -= LANES; \
+ } \
+}
+
+#define TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M(BITS, LANES, LDRSTRYTPE, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY_M (BITS, LANES, LDRSTRYTPE, int, s, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY_M (BITS, LANES, LDRSTRYTPE, uint, u, NAME, PRED)
+
+#define TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M(NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M (8, 16, b, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M (16, 8, h, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M (32, 4, w, NAME, PRED)
+
+
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M (vaddq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M (vmulq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M (vsubq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M (vhaddq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M (vorrq, _m)
+
+#define TEST_COMPILE_IN_DLSTP_TERNARY_N(BITS, LANES, LDRSTRYTPE, TYPE, SIGN, NAME, PRED) \
+void test_##NAME##PRED##_n_##SIGN##BITS (TYPE##BITS##_t *a, TYPE##BITS##_t *c, int n) \
+{ \
+ while (n > 0) \
+ { \
+ mve_pred16_t p = vctp##BITS##q (n); \
+ TYPE##BITS##x##LANES##_t va = vldr##LDRSTRYTPE##q_z_##SIGN##BITS (a, p); \
+ TYPE##BITS##x##LANES##_t vc = NAME##PRED##_n_##SIGN##BITS (va, IMM, p); \
+ vstr##LDRSTRYTPE##q_p_##SIGN##BITS (c, vc, p); \
+ c += LANES; \
+ a += LANES; \
+ n -= LANES; \
+ } \
+}
+
+#define TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_N(BITS, LANES, LDRSTRYTPE, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY_N (BITS, LANES, LDRSTRYTPE, int, s, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY_N (BITS, LANES, LDRSTRYTPE, uint, u, NAME, PRED)
+
+#define TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N(NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_N (8, 16, b, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_N (16, 8, h, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_N (32, 4, w, NAME, PRED)
+
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vaddq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vmulq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vsubq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vhaddq, _x)
+
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vbrsrq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vshlq, _x)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_N (vshrq, _x)
+
+#define TEST_COMPILE_IN_DLSTP_TERNARY_M_N(BITS, LANES, LDRSTRYTPE, TYPE, SIGN, NAME, PRED) \
+void test_##NAME##PRED##_n_##SIGN##BITS (TYPE##BITS##x##LANES##_t __inactive, TYPE##BITS##_t *a, TYPE##BITS##_t *c, int n) \
+{ \
+ while (n > 0) \
+ { \
+ mve_pred16_t p = vctp##BITS##q (n); \
+ TYPE##BITS##x##LANES##_t va = vldr##LDRSTRYTPE##q_z_##SIGN##BITS (a, p); \
+ TYPE##BITS##x##LANES##_t vc = NAME##PRED##_n_##SIGN##BITS (__inactive, va, IMM, p); \
+ vstr##LDRSTRYTPE##q_p_##SIGN##BITS (c, vc, p); \
+ c += LANES; \
+ a += LANES; \
+ n -= LANES; \
+ } \
+}
+
+#define TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M_N(BITS, LANES, LDRSTRYTPE, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY_M_N (BITS, LANES, LDRSTRYTPE, int, s, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_TERNARY_M_N (BITS, LANES, LDRSTRYTPE, uint, u, NAME, PRED)
+
+#define TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N(NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M_N (8, 16, b, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M_N (16, 8, h, NAME, PRED) \
+TEST_COMPILE_IN_DLSTP_SIGNED_UNSIGNED_TERNARY_M_N (32, 4, w, NAME, PRED)
+
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vaddq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vmulq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vsubq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vhaddq, _m)
+
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vbrsrq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vshlq, _m)
+TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY_M_N (vshrq, _m)
+
+/* Now test some more configurations. */
+
+/* Using a >=1 condition. */
+void test1 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n >= 1)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c+=4;
+ a+=4;
+ b+=4;
+ n-=4;
+ }
+}
+
+/* Test a for loop format of decrementing to zero */
+int32_t a[] = {0, 1, 2, 3, 4, 5, 6, 7};
+void test2 (int32_t *b, int num_elems)
+{
+ for (int i = num_elems; i > 0; i-= 4)
+ {
+ mve_pred16_t p = vctp32q (i);
+ int32x4_t va = vldrwq_z_s32 (&(a[i]), p);
+ vstrwq_p_s32 (b + i, va, p);
+ }
+}
+
+/* Iteration counter counting up to num_iter. */
+void test3 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ int num_iter = (n + 15)/16;
+ for (int i = 0; i < num_iter; i++)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n-=16;
+ }
+}
+
+/* Iteration counter counting down from num_iter. */
+void test4 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ int num_iter = (n + 15)/16;
+ for (int i = num_iter; i > 0; i--)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n-=16;
+ }
+}
+
+/* Using an unpredicated arithmetic instruction within the loop. */
+void test5 (uint8_t *a, uint8_t *b, uint8_t *c, uint8_t *d, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_u8 (b);
+ /* Is affected by implicit predication, because vb also
+ came from an unpredicated load, but there is no functional
+ problem, because the result is used in a predicated store. */
+ uint8x16_t vc = vaddq_u8 (va, vb);
+ uint8x16_t vd = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ vstrbq_p_u8 (d, vd, p);
+ n-=16;
+ }
+}
+
+/* Using a different VPR value for one instruction in the loop. */
+void test6 (int32_t *a, int32_t *b, int32_t *c, int n, mve_pred16_t p1)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p1);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Generating and using another VPR value in the loop, with a vctp.
+ The doloop logic will always try to do the transform on the first
+ vctp it encounters, so this is still expected to work. */
+void test7 (int32_t *a, int32_t *b, int32_t *c, int n, int g)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ mve_pred16_t p1 = vctp32q (g);
+ int32x4_t vb = vldrwq_z_s32 (b, p1);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Generating and using a different VPR value in the loop, with a vctp,
+ but this time the p1 will also change in every loop (still fine) */
+void test8 (int32_t *a, int32_t *b, int32_t *c, int n, int g)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ mve_pred16_t p1 = vctp32q (g);
+ int32x4_t vb = vldrwq_z_s32 (b, p1);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ g++;
+ }
+}
+
+/* Generating and using a different VPR value in the loop, with a vctp_m
+ that is independent of the loop vctp VPR. */
+void test9 (int32_t *a, int32_t *b, int32_t *c, int n, mve_pred16_t p1)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ mve_pred16_t p2 = vctp32q_m (n, p1);
+ int32x4_t vb = vldrwq_z_s32 (b, p1);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p2);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Generating and using a different VPR value in the loop,
+ with a vctp_m that is tied to the base vctp VPR. This
+ is still fine, because the vctp_m will be transformed
+ into a vctp and be implicitly predicated. */
+void test10 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ mve_pred16_t p1 = vctp32q_m (n, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p1);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p1);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Generating and using a different VPR value in the loop, with a vcmp. */
+void test11 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ mve_pred16_t p1 = vcmpeqq_s32 (va, vb);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p1);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Generating and using a different VPR value in the loop, with a vcmp_m. */
+void test12 (int32_t *a, int32_t *b, int32_t *c, int n, mve_pred16_t p1)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ mve_pred16_t p2 = vcmpeqq_m_s32 (va, vb, p1);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p2);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Generating and using a different VPR value in the loop, with a vcmp_m
+ that is tied to the base vctp VPR (same as above, this will be turned
+ into a vcmp and be implicitly predicated). */
+void test13 (int32_t *a, int32_t *b, int32_t *c, int n, mve_pred16_t p1)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ mve_pred16_t p2 = vcmpeqq_m_s32 (va, vb, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p2);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using an unpredicated op with a scalar output, where the result is valid
+ outside the bb. This is valid, because all the inputs to the unpredicated
+ op are correctly predicated. */
+uint8_t test14 (uint8_t *a, uint8_t *b, uint8_t *c, int n, uint8x16_t vx)
+{
+ uint8_t sum = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_m_u8 (vx, va, vb, p);
+ sum += vaddvq_u8 (vc);
+ a += 16;
+ b += 16;
+ n -= 16;
+ }
+ return sum;
+}
+
+/* Same as above, but with another scalar op between the unpredicated op and
+ the scalar op outside the loop. */
+uint8_t test15 (uint8_t *a, uint8_t *b, uint8_t *c, int n, uint8x16_t vx, int g)
+{
+ uint8_t sum = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_m_u8 (vx, va, vb, p);
+ sum += vaddvq_u8 (vc);
+ sum += g;
+ a += 16;
+ b += 16;
+ n -= 16;
+ }
+ return sum;
+}
+
+/* Using an unpredicated vcmp to generate a new predicate value in the
+ loop and then using it in a predicated store insn. */
+void test16 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_s32 (b);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ mve_pred16_t p1 = vcmpeqq_s32 (va, vc);
+ vstrwq_p_s32 (c, vc, p1);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using a predicated vcmp to generate a new predicate value in the
+ loop and then using it in a predicated store insn. */
+void test17 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_s32 (va, vb);
+ mve_pred16_t p1 = vcmpeqq_m_s32 (va, vc, p);
+ vstrwq_p_s32 (c, vc, p1);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using an across-vector unpredicated instruction in a valid way.
+ This tests that "vc" has correctly masked the risky "vb". */
+uint16_t test18 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16_t res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ uint16x8_t vc = vaddq_x_u16 (va, vb, p);
+ res = vaddvq_u16 (vc);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+/* Using an across-vector unpredicated instruction with a scalar from outside the loop. */
+uint16_t test19 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16_t res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ uint16x8_t vc = vaddq_x_u16 (va, vb, p);
+ res = vaddvaq_u16 (res, vc);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+/* Using an across-vector predicated instruction in a valid way. */
+uint16_t test20 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16_t res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ res = vaddvaq_p_u16 (res, vb, p);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+/* Using an across-vector predicated instruction in a valid way. */
+uint16_t test21 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16_t res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ res++;
+ res = vaddvaq_p_u16 (res, vb, p);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+int test22 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ int res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ res = vmaxvq (res, va);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+int test23 (int8_t *a, int8_t *b, int8_t *c, int n)
+{
+ int res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ int8x16_t va = vldrbq_z_s8 (a, p);
+ res = vmaxavq (res, va);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+/* The final number of DLSTPs currently is calculated by the number of
+ `TEST_COMPILE_IN_DLSTP_INTBITS_SIGNED_UNSIGNED_TERNARY.*` macros * 6 + 23. */
+/* { dg-final { scan-assembler-times {\tdlstp} 167 } } */
+/* { dg-final { scan-assembler-times {\tletp} 167 } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8-run.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8-run.c
new file mode 100644
index 0000000000000000000000000000000000000000..6966a3966046fce59bdabda639c048ed398cac20
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8-run.c
@@ -0,0 +1,44 @@
+/* { dg-do run { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-require-effective-target arm_mve_hw } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+#include "dlstp-int16x8.c"
+
+int main ()
+{
+ int i;
+ int16_t temp1[N];
+ int16_t temp2[N];
+ int16_t temp3[N];
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 0);
+ check_plus16 (temp1, temp2, temp3, 0);
+
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 1);
+ check_plus16 (temp1, temp2, temp3, 1);
+
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 7);
+ check_plus16 (temp1, temp2, temp3, 7);
+
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 8);
+ check_plus16 (temp1, temp2, temp3, 8);
+
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 9);
+ check_plus16 (temp1, temp2, temp3, 9);
+
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 16);
+ check_plus16 (temp1, temp2, temp3, 16);
+
+ reset_data16 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 17);
+ check_plus16 (temp1, temp2, temp3, 17);
+
+ reset_data16 (temp1, temp2, temp3, N);
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8.c
new file mode 100644
index 0000000000000000000000000000000000000000..33632c5f14dc6603d56934dfdd0072a980fbd01e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8.c
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include <arm_mve.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "../lob.h"
+
+void __attribute__ ((noinline)) test (int16_t *a, int16_t *b, int16_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ int16x8_t va = vldrhq_z_s16 (a, p);
+ int16x8_t vb = vldrhq_z_s16 (b, p);
+ int16x8_t vc = vaddq_x_s16 (va, vb, p);
+ vstrhq_p_s16 (c, vc, p);
+ c+=8;
+ a+=8;
+ b+=8;
+ n-=8;
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tdlstp.16} 1 } } */
+/* { dg-final { scan-assembler-times {\tletp} 1 } } */
+/* { dg-final { scan-assembler-not "\tvctp" } } */
+/* { dg-final { scan-assembler-not "\tvpst" } } */
+/* { dg-final { scan-assembler-not "p0" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4-run.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4-run.c
new file mode 100644
index 0000000000000000000000000000000000000000..6833dddde92b7cf16a18d42c003ee5bd2b9da847
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4-run.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-require-effective-target arm_mve_hw } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include "dlstp-int32x4.c"
+
+int main ()
+{
+ int i;
+ int32_t temp1[N];
+ int32_t temp2[N];
+ int32_t temp3[N];
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 0);
+ check_plus32 (temp1, temp2, temp3, 0);
+
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 1);
+ check_plus32 (temp1, temp2, temp3, 1);
+
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 3);
+ check_plus32 (temp1, temp2, temp3, 3);
+
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 4);
+ check_plus32 (temp1, temp2, temp3, 4);
+
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 5);
+ check_plus32 (temp1, temp2, temp3, 5);
+
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 8);
+ check_plus32 (temp1, temp2, temp3, 8);
+
+ reset_data32 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 9);
+ check_plus32 (temp1, temp2, temp3, 9);
+
+ reset_data32 (temp1, temp2, temp3, N);
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4.c
new file mode 100644
index 0000000000000000000000000000000000000000..5d09f784b7716c14e56086b7e66eb12b31772a45
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4.c
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include <arm_mve.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "../lob.h"
+
+void __attribute__ ((noinline)) test (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c+=4;
+ a+=4;
+ b+=4;
+ n-=4;
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tdlstp.32} 1 } } */
+/* { dg-final { scan-assembler-times {\tletp} 1 } } */
+/* { dg-final { scan-assembler-not "\tvctp" } } */
+/* { dg-final { scan-assembler-not "\tvpst" } } */
+/* { dg-final { scan-assembler-not "p0" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int64x2-run.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int64x2-run.c
new file mode 100644
index 0000000000000000000000000000000000000000..cc0b9ce7ee9a5a8400b18f539ff96b8e675414cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int64x2-run.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-require-effective-target arm_mve_hw } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include "dlstp-int64x2.c"
+
+int main ()
+{
+ int i;
+ int64_t temp1[N];
+ int64_t temp3[N];
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 0);
+ check_memcpy64 (temp1, temp3, 0);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 1);
+ check_memcpy64 (temp1, temp3, 1);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 2);
+ check_memcpy64 (temp1, temp3, 2);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 3);
+ check_memcpy64 (temp1, temp3, 3);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 4);
+ check_memcpy64 (temp1, temp3, 4);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 5);
+ check_memcpy64 (temp1, temp3, 5);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 6);
+ check_memcpy64 (temp1, temp3, 6);
+
+ reset_data64 (temp1, temp3, N);
+ test (temp1, temp3, 7);
+ check_memcpy64 (temp1, temp3, 7);
+
+ reset_data64 (temp1, temp3, N);
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int64x2.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int64x2.c
new file mode 100644
index 0000000000000000000000000000000000000000..21e882424ec3ba4e7a141eadb0f4e593146e81ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int64x2.c
@@ -0,0 +1,28 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include <arm_mve.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "../lob.h"
+
+void __attribute__ ((noinline)) test (int64_t *a, int64_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp64q (n);
+ int64x2_t va = vldrdq_gather_offset_z_s64 (a, vcreateq_u64 (0, 8), p);
+ vstrdq_scatter_offset_p_s64 (c, vcreateq_u64 (0, 8), va, p);
+ c+=2;
+ a+=2;
+ n-=2;
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tdlstp.64} 1 } } */
+/* { dg-final { scan-assembler-times {\tletp} 1 } } */
+/* { dg-final { scan-assembler-not "\tvctp" } } */
+/* { dg-final { scan-assembler-not "\tvpst" } } */
+/* { dg-final { scan-assembler-not "p0" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-int8x16.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-int8x16.c
new file mode 100644
index 0000000000000000000000000000000000000000..8ea181c82d45a008d60a66c1f9e9b289c5f05611
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-int8x16.c
@@ -0,0 +1,69 @@
+/* { dg-do run { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-require-effective-target arm_mve_hw } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include <arm_mve.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "../lob.h"
+
+void __attribute__ ((noinline)) test (int8_t *a, int8_t *b, int8_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ int8x16_t va = vldrbq_z_s8 (a, p);
+ int8x16_t vb = vldrbq_z_s8 (b, p);
+ int8x16_t vc = vaddq_x_s8 (va, vb, p);
+ vstrbq_p_s8 (c, vc, p);
+ c+=16;
+ a+=16;
+ b+=16;
+ n-=16;
+ }
+}
+
+int main ()
+{
+ int i;
+ int8_t temp1[N];
+ int8_t temp2[N];
+ int8_t temp3[N];
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 0);
+ check_plus8 (temp1, temp2, temp3, 0);
+
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 1);
+ check_plus8 (temp1, temp2, temp3, 1);
+
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 15);
+ check_plus8 (temp1, temp2, temp3, 15);
+
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 16);
+ check_plus8 (temp1, temp2, temp3, 16);
+
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 17);
+ check_plus8 (temp1, temp2, temp3, 17);
+
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 32);
+ check_plus8 (temp1, temp2, temp3, 32);
+
+ reset_data8 (temp1, temp2, temp3, N);
+ test (temp1, temp2, temp3, 33);
+ check_plus8 (temp1, temp2, temp3, 33);
+
+ reset_data8 (temp1, temp2, temp3, N);
+}
+
+/* { dg-final { scan-assembler-times {\tdlstp.8} 1 } } */
+/* { dg-final { scan-assembler-times {\tletp} 1 } } */
+/* { dg-final { scan-assembler-not "\tvctp" } } */
+/* { dg-final { scan-assembler-not "\tvpst" } } */
+/* { dg-final { scan-assembler-not "p0" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c
new file mode 100644
index 0000000000000000000000000000000000000000..f7c3e04f8831e6b6eb709c8f3b0a0a896313ca64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c
@@ -0,0 +1,391 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-options "-O3 -save-temps" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include <limits.h>
+#include <arm_mve.h>
+
+/* Terminating on a non-zero number of elements. */
+void test0 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ while (n > 1)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n -= 16;
+ }
+}
+
+/* Terminating on n >= 0. */
+void test1 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ while (n >= 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n -= 16;
+ }
+}
+
+/* Similar, terminating on a non-zero number of elements, but in a for loop
+ format. */
+int32_t a[] = {0, 1, 2, 3, 4, 5, 6, 7};
+void test2 (int32_t *b, int num_elems)
+{
+ for (int i = num_elems; i >= 2; i-= 4)
+ {
+ mve_pred16_t p = vctp32q (i);
+ int32x4_t va = vldrwq_z_s32 (&(a[i]), p);
+ vstrwq_p_s32 (b + i, va, p);
+ }
+}
+
+/* Iteration counter counting up to num_iter, with a non-zero starting num. */
+void test3 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ int num_iter = (n + 15)/16;
+ for (int i = 1; i < num_iter; i++)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n -= 16;
+ }
+}
+
+/* Iteration counter counting up to num_iter, with a larger increment */
+void test4 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ int num_iter = (n + 15)/16;
+ for (int i = 0; i < num_iter; i+=2)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n -= 16;
+ }
+}
+
+/* Using an unpredicated store instruction within the loop. */
+void test5 (uint8_t *a, uint8_t *b, uint8_t *c, uint8_t *d, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_u8 (va, vb);
+ uint8x16_t vd = vaddq_x_u8 (va, vb, p);
+ vstrbq_u8 (d, vd);
+ n -= 16;
+ }
+}
+
+/* Using an unpredicated store outside the loop. */
+void test6 (uint8_t *a, uint8_t *b, uint8_t *c, int n, uint8x16_t vx)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_m_u8 (vx, va, vb, p);
+ vx = vaddq_u8 (vx, vc);
+ a += 16;
+ b += 16;
+ n -= 16;
+ }
+ vstrbq_u8 (c, vx);
+}
+
+/* Using a VPR that gets modified within the loop. */
+void test9 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ p++;
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using a VPR that gets re-generated within the loop. */
+void test10 (int32_t *a, int32_t *b, int32_t *c, int n)
+{
+ mve_pred16_t p = vctp32q (n);
+ while (n > 0)
+ {
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ p = vctp32q (n);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using vctp32q_m instead of vctp32q. */
+void test11 (int32_t *a, int32_t *b, int32_t *c, int n, mve_pred16_t p0)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q_m (n, p0);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using an unpredicated op with a scalar output, where the result is valid
+ outside the bb. This is invalid, because one of the inputs to the
+ unpredicated op is also unpredicated. */
+uint8_t test12 (uint8_t *a, uint8_t *b, uint8_t *c, int n, uint8x16_t vx)
+{
+ uint8_t sum = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_u8 (b);
+ uint8x16_t vc = vaddq_u8 (va, vb);
+ sum += vaddvq_u8 (vc);
+ a += 16;
+ b += 16;
+ n -= 16;
+ }
+ return sum;
+}
+
+/* Using an unpredicated vcmp to generate a new predicate value in the
+ loop and then using that VPR to predicate a store insn. */
+void test13 (int32_t *a, int32_t *b, int32x4_t vc, int32_t *c, int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_s32 (a);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_s32 (va, vb);
+ mve_pred16_t p1 = vcmpeqq_s32 (va, vc);
+ vstrwq_p_s32 (c, vc, p1);
+ c += 4;
+ a += 4;
+ b += 4;
+ n -= 4;
+ }
+}
+
+/* Using an across-vector unpredicated instruction. "vb" is the risk. */
+uint16_t test14 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16_t res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ vb = vaddq_u16 (va, vb);
+ res = vaddvq_u16 (vb);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+/* Using an across-vector unpredicated instruction. "vc" is the risk. */
+uint16_t test15 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16_t res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ uint16x8_t vc = vaddq_u16 (va, vb);
+ res = vaddvaq_u16 (res, vc);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+uint16_t test16 (uint16_t *a, uint16_t *b, uint16_t *c, int n)
+{
+ uint16_t res =0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp16q (n);
+ uint16x8_t vb = vldrhq_u16 (b);
+ uint16x8_t va = vldrhq_z_u16 (a, p);
+ res = vaddvaq_u16 (res, vb);
+ res = vaddvaq_p_u16 (res, va, p);
+ c += 8;
+ a += 8;
+ b += 8;
+ n -= 8;
+ }
+ return res;
+}
+
+int test17 (int8_t *a, int8_t *b, int8_t *c, int n)
+{
+ int res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ int8x16_t va = vldrbq_z_s8 (a, p);
+ res = vmaxvq (res, va);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+
+
+int test18 (int8_t *a, int8_t *b, int8_t *c, int n)
+{
+ int res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ int8x16_t va = vldrbq_z_s8 (a, p);
+ res = vminvq (res, va);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+int test19 (int8_t *a, int8_t *b, int8_t *c, int n)
+{
+ int res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ int8x16_t va = vldrbq_z_s8 (a, p);
+ res = vminavq (res, va);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+int test20 (uint8_t *a, uint8_t *b, uint8_t *c, int n)
+{
+ int res = 0;
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ res = vminvq (res, va);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+uint8x16_t test21 (uint8_t *a, uint32_t *b, int n, uint8x16_t res)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ res = vshlcq_u8 (va, b, 1);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+int8x16_t test22 (int8_t *a, int32_t *b, int n, int8x16_t res)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp8q (n);
+ int8x16_t va = vldrbq_z_s8 (a, p);
+ res = vshlcq_s8 (va, b, 1);
+ n-=16;
+ a+=16;
+ }
+ return res;
+}
+
+/* Using an unsigned number of elements to count down from, with a >0*/
+void test23 (int32_t *a, int32_t *b, int32_t *c, unsigned int n)
+{
+ while (n > 0)
+ {
+ mve_pred16_t p = vctp32q (n);
+ int32x4_t va = vldrwq_z_s32 (a, p);
+ int32x4_t vb = vldrwq_z_s32 (b, p);
+ int32x4_t vc = vaddq_x_s32 (va, vb, p);
+ vstrwq_p_s32 (c, vc, p);
+ c+=4;
+ a+=4;
+ b+=4;
+ n-=4;
+ }
+}
+
+/* Using an unsigned number of elements to count up to, with a <n*/
+void test24 (uint8_t *a, uint8_t *b, uint8_t *c, unsigned int n)
+{
+ for (int i = 0; i < n; i+=16)
+ {
+ mve_pred16_t p = vctp8q (n-i);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n-=16;
+ }
+}
+
+
+/* Using an unsigned number of elements to count up to, with a <=n*/
+void test25 (uint8_t *a, uint8_t *b, uint8_t *c, unsigned int n)
+{
+ for (int i = 1; i <= n; i+=16)
+ {
+ mve_pred16_t p = vctp8q (n-i+1);
+ uint8x16_t va = vldrbq_z_u8 (a, p);
+ uint8x16_t vb = vldrbq_z_u8 (b, p);
+ uint8x16_t vc = vaddq_x_u8 (va, vb, p);
+ vstrbq_p_u8 (c, vc, p);
+ n-=16;
+ }
+}
+
+/* { dg-final { scan-assembler-not "\tdlstp" } } */
+/* { dg-final { scan-assembler-not "\tletp" } } */
\ No newline at end of file
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-12-20 16:54 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-18 11:53 [PATCH 0/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops Andre Vieira
2023-12-18 11:53 ` [PATCH 1/2] arm: Add define_attr to to create a mapping between MVE predicated and unpredicated insns Andre Vieira
2023-12-20 16:54 ` Andre Vieira (lists)
2023-12-18 11:53 ` [PATCH 2/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops Andre Vieira
2023-12-20 16:54 ` Andre Vieira (lists)
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).