Hi All, This refactors the complex numbers bits of MVE to go through the same unspecs as the NEON variant. This is pre-work to allow code to be shared between NEON and MVE for the complex vectorization patches. Bootstrapped Regtested on arm-none-linux-gnueabihf and no issues. Codegen tested for -march=armv8.1-m.main+mve.fp -mfloat-abi=hard -mfpu=auto and no issues. This is just a splitting of a previously approved patch due to it having a dependency on the AArch64 bits which have been requested to be reworked. Will commit under the previous approval. Thanks, Tamar gcc/ChangeLog: * config/arm/arm_mve.h (__arm_vcmulq_rot90_f16): (__arm_vcmulq_rot270_f16, _arm_vcmulq_rot180_f16, __arm_vcmulq_f16, __arm_vcmulq_rot90_f32, __arm_vcmulq_rot270_f32, __arm_vcmulq_rot180_f32, __arm_vcmulq_f32, __arm_vcmlaq_f16, __arm_vcmlaq_rot180_f16, __arm_vcmlaq_rot270_f16, __arm_vcmlaq_rot90_f16, __arm_vcmlaq_f32, __arm_vcmlaq_rot180_f32, __arm_vcmlaq_rot270_f32, __arm_vcmlaq_rot90_f32): Update builtin calls. * config/arm/arm_mve_builtins.def (vcmulq_f, vcmulq_rot90_f, vcmulq_rot180_f, vcmulq_rot270_f, vcmlaq_f, vcmlaq_rot90_f, vcmlaq_rot180_f, vcmlaq_rot270_f): Removed. (vcmulq, vcmulq_rot90, vcmulq_rot180, vcmulq_rot270, vcmlaq, vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270): New. * config/arm/iterators.md (mve_rot): Add UNSPEC_VCMLA, UNSPEC_VCMLA90, UNSPEC_VCMLA180, UNSPEC_VCMLA270, UNSPEC_VCMUL, UNSPEC_VCMUL90, UNSPEC_VCMUL180, UNSPEC_VCMUL270. (VCMUL): New. * config/arm/mve.md (mve_vcmulq_f, mve_vcmulq_rot270_f, mve_vcmulq_rot90_f, mve_vcmlaq_f, mve_vcmlaq_rot180_f, mve_vcmlaq_rot270_f, mve_vcmlaq_rot90_f): Removed. (mve_vcmlaq, mve_vcmulq, mve_vcaddq, cadd3, mve_vcaddq): New. * config/arm/unspecs.md (UNSPEC_VCMUL90, UNSPEC_VCMUL270, UNSPEC_VCMUL, UNSPEC_VCMUL180): New. (VCMULQ_F, VCMULQ_ROT180_F, VCMULQ_ROT270_F, VCMULQ_ROT90_F, VCMLAQ_F, VCMLAQ_ROT180_F, VCMLAQ_ROT90_F, VCMLAQ_ROT270_F): Removed. --- inline copy of patch -- diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 987495dd234ad96ba1163a1f482fe183a46ff437..45014621f2533497e90ddf5257fb04e1fd9325b4 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -17348,28 +17348,28 @@ __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_rot90_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcmulq_rot90_fv8hf (__a, __b); + return __builtin_mve_vcmulq_rot90v8hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_rot270_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcmulq_rot270_fv8hf (__a, __b); + return __builtin_mve_vcmulq_rot270v8hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_rot180_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcmulq_rot180_fv8hf (__a, __b); + return __builtin_mve_vcmulq_rot180v8hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcmulq_fv8hf (__a, __b); + return __builtin_mve_vcmulqv8hf (__a, __b); } __extension__ extern __inline float16x8_t @@ -17600,28 +17600,28 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_rot90_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcmulq_rot90_fv4sf (__a, __b); + return __builtin_mve_vcmulq_rot90v4sf (__a, __b); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_rot270_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcmulq_rot270_fv4sf (__a, __b); + return __builtin_mve_vcmulq_rot270v4sf (__a, __b); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_rot180_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcmulq_rot180_fv4sf (__a, __b); + return __builtin_mve_vcmulq_rot180v4sf (__a, __b); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcmulq_fv4sf (__a, __b); + return __builtin_mve_vcmulqv4sf (__a, __b); } __extension__ extern __inline float32x4_t @@ -17790,28 +17790,28 @@ __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) { - return __builtin_mve_vcmlaq_fv8hf (__a, __b, __c); + return __builtin_mve_vcmlaqv8hf (__a, __b, __c); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_rot180_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) { - return __builtin_mve_vcmlaq_rot180_fv8hf (__a, __b, __c); + return __builtin_mve_vcmlaq_rot180v8hf (__a, __b, __c); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_rot270_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) { - return __builtin_mve_vcmlaq_rot270_fv8hf (__a, __b, __c); + return __builtin_mve_vcmlaq_rot270v8hf (__a, __b, __c); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_rot90_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) { - return __builtin_mve_vcmlaq_rot90_fv8hf (__a, __b, __c); + return __builtin_mve_vcmlaq_rot90v8hf (__a, __b, __c); } __extension__ extern __inline float16x8_t @@ -18098,28 +18098,28 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) { - return __builtin_mve_vcmlaq_fv4sf (__a, __b, __c); + return __builtin_mve_vcmlaqv4sf (__a, __b, __c); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_rot180_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) { - return __builtin_mve_vcmlaq_rot180_fv4sf (__a, __b, __c); + return __builtin_mve_vcmlaq_rot180v4sf (__a, __b, __c); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_rot270_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) { - return __builtin_mve_vcmlaq_rot270_fv4sf (__a, __b, __c); + return __builtin_mve_vcmlaq_rot270v4sf (__a, __b, __c); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_rot90_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) { - return __builtin_mve_vcmlaq_rot90_fv4sf (__a, __b, __c); + return __builtin_mve_vcmlaq_rot90v4sf (__a, __b, __c); } __extension__ extern __inline float32x4_t diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index b86e1793e686b07367732c4ffd0603deb78830bd..56b652fff0a6729d04982cc13a479587180b0208 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -260,10 +260,6 @@ VAR2 (BINOP_NONE_NONE_NONE, vmaxnmq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vmaxnmavq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vmaxnmaq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, veorq_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcmulq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vbicq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vandq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vaddq_n_f, v8hf, v4sf) @@ -464,10 +460,6 @@ VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmsq_f, v8hf, v4sf) VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmasq_n_f, v8hf, v4sf) VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmaq_n_f, v8hf, v4sf) VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmaq_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot90_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot270_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot180_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_f, v8hf, v4sf) VAR2 (TERNOP_NONE_NONE_NONE_IMM, vshrntq_n_s, v8hi, v4si) VAR2 (TERNOP_NONE_NONE_NONE_IMM, vshrnbq_n_s, v8hi, v4si) VAR2 (TERNOP_NONE_NONE_NONE_IMM, vrshrntq_n_s, v8hi, v4si) @@ -890,3 +882,11 @@ VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_carry_u, v16qi, v8hi, v4si) /* optabs without any suffixes. */ VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot90, v16qi, v8hi, v4si, v8hf, v4sf) VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot270, v16qi, v8hi, v4si, v8hf, v4sf) +VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90, v8hf, v4sf) +VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270, v8hf, v4sf) +VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180, v8hf, v4sf) +VAR2 (BINOP_NONE_NONE_NONE, vcmulq, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot90, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot270, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot180, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq, v8hf, v4sf) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index adf4c017735812fdb318f615ede1407298377519..c6b1b28c896f9fb3081021ea4efcae8abaaceb55 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -1183,7 +1183,18 @@ (define_int_attr rot [(UNSPEC_VCADD90 "90") (UNSPEC_VCMLA270 "270")]) (define_int_attr mve_rot [(UNSPEC_VCADD90 "_rot90") - (UNSPEC_VCADD270 "_rot270")]) + (UNSPEC_VCADD270 "_rot270") + (UNSPEC_VCMLA "") + (UNSPEC_VCMLA90 "_rot90") + (UNSPEC_VCMLA180 "_rot180") + (UNSPEC_VCMLA270 "_rot270") + (UNSPEC_VCMUL "") + (UNSPEC_VCMUL90 "_rot90") + (UNSPEC_VCMUL180 "_rot180") + (UNSPEC_VCMUL270 "_rot270")]) + +(define_int_iterator VCMUL [UNSPEC_VCMUL UNSPEC_VCMUL90 + UNSPEC_VCMUL180 UNSPEC_VCMUL270]) (define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8") (UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8") diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 516d0a3172e9cbb9a7fe0e9a1cd45ba5d935344b..d3f1cc1bbc6171c5d510b7ee3c7df178004d8c5b 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -2291,62 +2291,17 @@ (define_insn "mve_vcmpneq_n_f" ]) ;; -;; [vcmulq_f]) +;; [vcmulq, vcmulq_rot90, vcmulq_rot180, vcmulq_rot270]) ;; -(define_insn "mve_vcmulq_f" +(define_insn "mve_vcmulq" [ (set (match_operand:MVE_0 0 "s_register_operand" "") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w")] - VCMULQ_F)) + VCMUL)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmul.f%# %q0, %q1, %q2, #0" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcmulq_rot180_f]) -;; -(define_insn "mve_vcmulq_rot180_f" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") - (match_operand:MVE_0 2 "s_register_operand" "w")] - VCMULQ_ROT180_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmul.f%# %q0, %q1, %q2, #180" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcmulq_rot270_f]) -;; -(define_insn "mve_vcmulq_rot270_f" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") - (match_operand:MVE_0 2 "s_register_operand" "w")] - VCMULQ_ROT270_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmul.f%# %q0, %q1, %q2, #270" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcmulq_rot90_f]) -;; -(define_insn "mve_vcmulq_rot90_f" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") - (match_operand:MVE_0 2 "s_register_operand" "w")] - VCMULQ_ROT90_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmul.f%# %q0, %q1, %q2, #90" + "vcmul.f%# %q0, %q1, %q2, #" [(set_attr "type" "mve_move") ]) @@ -4118,66 +4073,20 @@ (define_insn "mve_vaddlvaq_p_v4si" [(set_attr "type" "mve_move") (set_attr "length""8")]) ;; -;; [vcmlaq_f]) -;; -(define_insn "mve_vcmlaq_f" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "=w") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") - (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:MVE_0 3 "s_register_operand" "w")] - VCMLAQ_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmla.f%# %q0, %q2, %q3, #0" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcmlaq_rot180_f]) +;; [vcmlaq, vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270]) ;; -(define_insn "mve_vcmlaq_rot180_f" +(define_insn "mve_vcmlaq" [ - (set (match_operand:MVE_0 0 "s_register_operand" "=w") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") - (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:MVE_0 3 "s_register_operand" "w")] - VCMLAQ_ROT180_F)) + (set (match_operand:MVE_0 0 "s_register_operand" "=w,w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0,Dz") + (match_operand:MVE_0 2 "s_register_operand" "w,w") + (match_operand:MVE_0 3 "s_register_operand" "w,w")] + VCMLA)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmla.f%# %q0, %q2, %q3, #180" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcmlaq_rot270_f]) -;; -(define_insn "mve_vcmlaq_rot270_f" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "=w") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") - (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:MVE_0 3 "s_register_operand" "w")] - VCMLAQ_ROT270_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmla.f%# %q0, %q2, %q3, #270" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcmlaq_rot90_f]) -;; -(define_insn "mve_vcmlaq_rot90_f" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "=w") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") - (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:MVE_0 3 "s_register_operand" "w")] - VCMLAQ_ROT90_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmla.f%# %q0, %q2, %q3, #90" + "@ + vcmla.f%# %q0, %q2, %q3, # + vcmul.f%# %q0, %q2, %q3, #" [(set_attr "type" "mve_move") ]) diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 3f9ebe7b1753045e53044324cc7302f51d0eed21..6ed23a97c0b2940a608b443ffd04328fa1b649b7 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -510,6 +510,10 @@ (define_c_enum "unspec" [ UNSPEC_VCMLA90 UNSPEC_VCMLA180 UNSPEC_VCMLA270 + UNSPEC_VCMUL + UNSPEC_VCMUL90 + UNSPEC_VCMUL180 + UNSPEC_VCMUL270 UNSPEC_MATMUL_S UNSPEC_MATMUL_U UNSPEC_MATMUL_US @@ -717,10 +721,6 @@ (define_c_enum "unspec" [ VCMPLTQ_N_F VCMPNEQ_F VCMPNEQ_N_F - VCMULQ_F - VCMULQ_ROT180_F - VCMULQ_ROT270_F - VCMULQ_ROT90_F VMAXNMAQ_F VMAXNMAVQ_F VMAXNMQ_F @@ -892,7 +892,6 @@ (define_c_enum "unspec" [ VMLSLDAVAQ_S VQSHRUNBQ_N_S VQRSHRUNTQ_N_S - VCMLAQ_F VMINNMAQ_M_F VFMASQ_N_F VDUPQ_M_N_F @@ -914,14 +913,12 @@ (define_c_enum "unspec" [ VADDLVAQ_P_S VQMOVUNBQ_M_S VCMPLEQ_M_F - VCMLAQ_ROT180_F VMLSLDAVAXQ_S VRNDXQ_M_F VFMSQ_F VMINNMVQ_P_F VMAXNMVQ_P_F VPSELQ_F - VCMLAQ_ROT90_F VQMOVUNTQ_M_S VREV64Q_M_F VNEGQ_M_F @@ -934,7 +931,6 @@ (define_c_enum "unspec" [ VRMLALDAVHQ_P_S VRMLALDAVHXQ_P_S VCMPEQQ_M_N_F - VCMLAQ_ROT270_F VMAXNMAQ_M_F VRNDQ_M_F VMLALDAVQ_P_U --