Hello, This patch is part of MVE ACLE intrinsics framework. This patches add support to update (read/write) the APSR (Application Program Status Register) register and FPSCR (Floating-point Status and Control Register) register for MVE. This patch also enables thumb2 mov RTL patterns for MVE. Please refer to Arm reference manual [1] for more details. [1] https://static.docs.arm.com/ddi0553/bh/DDI0553B_h_armv8m_arm.pdf?_ga=2.102521798.659307368.1572453718-1501600630.1548848914 Regression tested on arm-none-eabi and found no regressions. Ok for trunk? Thanks, Srinath gcc/ChangeLog: 2019-11-11 Andre Vieira Mihail Ionescu Srinath Parvathaneni * config/arm/thumb2.md (thumb2_movsfcc_soft_insn): Add check to not allow TARGET_HAVE_MVE for this pattern. (thumb2_cmse_entry_return): Add TARGET_HAVE_MVE check to update APSR register. * config/arm/unspecs.md (UNSPEC_GET_FPSCR): Define. (VUNSPEC_GET_FPSCR): Remove. * config/arm/vfp.md (thumb2_movhi_vfp): Add TARGET_HAVE_MVE check. (thumb2_movhi_fp16): Add TARGET_HAVE_MVE check. (thumb2_movsi_vfp): Add TARGET_HAVE_MVE check. (movdi_vfp): Add TARGET_HAVE_MVE check. (thumb2_movdf_vfp): Add TARGET_HAVE_MVE check. (thumb2_movsfcc_vfp): Add TARGET_HAVE_MVE check. (thumb2_movdfcc_vfp): Add TARGET_HAVE_MVE check. (push_multi_vfp): Add TARGET_HAVE_MVE check. (set_fpscr): Add TARGET_HAVE_MVE check. (get_fpscr): Add TARGET_HAVE_MVE check. ############### Attachment also inlined for ease of reply ############### diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 809461a25da5a8058a8afce972dea0d3131effc0..81afd8fcdc1b0a82493dc0758bce16fa9e5fde20 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -435,10 +435,10 @@ (define_insn "*cmovsi_insn" [(set (match_operand:SI 0 "arm_general_register_operand" "=r,r,r,r,r,r,r") (if_then_else:SI - (match_operator 1 "arm_comparison_operator" - [(match_operand 2 "cc_register" "") (const_int 0)]) - (match_operand:SI 3 "arm_reg_or_m1_or_1" "r, r,UM, r,U1,UM,U1") - (match_operand:SI 4 "arm_reg_or_m1_or_1" "r,UM, r,U1, r,UM,U1")))] + (match_operator 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:SI 3 "arm_reg_or_m1_or_1" "r, r,UM, r,U1,UM,U1") + (match_operand:SI 4 "arm_reg_or_m1_or_1" "r,UM, r,U1, r,UM,U1")))] "TARGET_THUMB2 && TARGET_COND_ARITH && (!((operands[3] == const1_rtx && operands[4] == constm1_rtx) || (operands[3] == constm1_rtx && operands[4] == const1_rtx)))" @@ -540,7 +540,7 @@ [(match_operand 4 "cc_register" "") (const_int 0)]) (match_operand:SF 1 "s_register_operand" "0,r") (match_operand:SF 2 "s_register_operand" "r,0")))] - "TARGET_THUMB2 && TARGET_SOFT_FLOAT" + "TARGET_THUMB2 && TARGET_SOFT_FLOAT && !TARGET_HAVE_MVE" "@ it\\t%D3\;mov%D3\\t%0, %2 it\\t%d3\;mov%d3\\t%0, %1" @@ -1226,7 +1226,7 @@ ; added to clear the APSR and potentially the FPSCR if VFP is available, so ; we adapt the length accordingly. (set (attr "length") - (if_then_else (match_test "TARGET_HARD_FLOAT") + (if_then_else (match_test "TARGET_HARD_FLOAT || TARGET_HAVE_MVE") (const_int 34) (const_int 8))) ; We do not support predicate execution of returns from cmse_nonsecure_entry diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index b3b4f8ee3e2d1bdad968a9dd8ccbc72ded274f48..ac7fe7d0af19f1965356d47d8327e24d410b99bd 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -170,6 +170,7 @@ UNSPEC_TORC ; Used by the intrinsic form of the iWMMXt TORC instruction. UNSPEC_TORVSC ; Used by the intrinsic form of the iWMMXt TORVSC instruction. UNSPEC_TEXTRC ; Used by the intrinsic form of the iWMMXt TEXTRC instruction. + UNSPEC_GET_FPSCR ; Represent fetch of FPSCR content. ]) @@ -216,7 +217,6 @@ VUNSPEC_SLX ; Represent a store-register-release-exclusive. VUNSPEC_LDA ; Represent a store-register-acquire. VUNSPEC_STL ; Represent a store-register-release. - VUNSPEC_GET_FPSCR ; Represent fetch of FPSCR content. VUNSPEC_SET_FPSCR ; Represent assign of FPSCR content. VUNSPEC_PROBE_STACK_RANGE ; Represent stack range probing. VUNSPEC_CDP ; Represent the coprocessor cdp instruction. diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 6349c0570540ec25a599166f5d427fcbdbf2af68..461a5d71ca8548cfc61c83f9716249425633ad21 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -74,10 +74,10 @@ (define_insn "*thumb2_movhi_vfp" [(set (match_operand:HI 0 "nonimmediate_operand" - "=rk, r, l, r, m, r, *t, r, *t") + "=rk, r, l, r, m, r, *t, r, *t, Up, r") (match_operand:HI 1 "general_operand" - "rk, I, Py, n, r, m, r, *t, *t"))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT + "rk, I, Py, n, r, m, r, *t, *t, r, Up"))] + "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) && !TARGET_VFP_FP16INST && (register_operand (operands[0], HImode) || register_operand (operands[1], HImode))" @@ -99,20 +99,24 @@ return "vmov%?\t%0, %1\t%@ int"; case 8: return "vmov%?.f32\t%0, %1\t%@ int"; + case 9: + return "vmsr%?\t P0, %1\t@ movhi"; + case 10: + return "vmrs%?\t %0, P0\t@ movhi"; default: gcc_unreachable (); } } [(set_attr "predicable" "yes") (set_attr "predicable_short_it" - "yes, no, yes, no, no, no, no, no, no") + "yes, no, yes, no, no, no, no, no, no, no, no") (set_attr "type" "mov_reg, mov_imm, mov_imm, mov_imm, store_4, load_4,\ - f_mcr, f_mrc, fmov") - (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *") - (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *") - (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *") - (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")] + f_mcr, f_mrc, fmov, mve_move, mve_move") + (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *, *, *") + (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *, *, *") + (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *, *, *") + (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4")] ) ;; Patterns for HI moves which provide more data transfer instructions when FP16 @@ -170,10 +174,10 @@ (define_insn "*thumb2_movhi_fp16" [(set (match_operand:HI 0 "nonimmediate_operand" - "=rk, r, l, r, m, r, *t, r, *t") + "=rk, r, l, r, m, r, *t, r, *t, Up, r") (match_operand:HI 1 "general_operand" - "rk, I, Py, n, r, m, r, *t, *t"))] - "TARGET_THUMB2 && TARGET_VFP_FP16INST + "rk, I, Py, n, r, m, r, *t, *t, r, Up"))] + "TARGET_THUMB2 && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE) && (register_operand (operands[0], HImode) || register_operand (operands[1], HImode))" { @@ -194,21 +198,25 @@ return "vmov.f16\t%0, %1\t%@ int"; case 8: return "vmov%?.f32\t%0, %1\t%@ int"; + case 9: + return "vmsr%?\tP0, %1\t%@ movhi"; + case 10: + return "vmrs%?\t%0, P0\t%@ movhi"; default: gcc_unreachable (); } } [(set_attr "predicable" - "yes, yes, yes, yes, yes, yes, no, no, yes") + "yes, yes, yes, yes, yes, yes, no, no, yes, yes, yes") (set_attr "predicable_short_it" - "yes, no, yes, no, no, no, no, no, no") + "yes, no, yes, no, no, no, no, no, no, no, no") (set_attr "type" "mov_reg, mov_imm, mov_imm, mov_imm, store_4, load_4,\ - f_mcr, f_mrc, fmov") - (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *") - (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *") - (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *") - (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")] + f_mcr, f_mrc, fmov, mve_move, mve_move") + (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *, *, *") + (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *, *, *") + (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *, *, *") + (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4")] ) ;; SImode moves @@ -258,9 +266,11 @@ ;; is chosen with length 2 when the instruction is predicated for ;; arm_restrict_it. (define_insn "*thumb2_movsi_vfp" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,lk*r,m,*t, r,*t,*t, *Uv") - (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,lk*r, r,*t,*t,*UvTu,*t"))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l,*hk,m,*m,*t,\ + r,*t,*t,*Uv, Up, r") + (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk,\ + r,*t,*t,*UvTu,*t, r, Up"))] + "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) && ( s_register_operand (operands[0], SImode) || s_register_operand (operands[1], SImode))" "* @@ -275,43 +285,53 @@ case 4: return \"movw%?\\t%0, %1\"; case 5: + case 6: /* Cannot load it directly, split to load it via MOV / MOVT. */ if (!MEM_P (operands[1]) && arm_disable_literal_pool) return \"#\"; return \"ldr%?\\t%0, %1\"; - case 6: - return \"str%?\\t%1, %0\"; case 7: - return \"vmov%?\\t%0, %1\\t%@ int\"; case 8: - return \"vmov%?\\t%0, %1\\t%@ int\"; + return \"str%?\\t%1, %0\"; case 9: + return \"vmov%?\\t%0, %1\\t%@ int\"; + case 10: + return \"vmov%?\\t%0, %1\\t%@ int\"; + case 11: return \"vmov%?.f32\\t%0, %1\\t%@ int\"; - case 10: case 11: + case 12: case 13: return output_move_vfp (operands); + case 14: + return \"vmsr\\t P0, %1\"; + case 15: + return \"vmrs\\t %0, P0\"; default: gcc_unreachable (); } " [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no") - (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores") - (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4") - (set_attr "pool_range" "*,*,*,*,*,1018,*,*,*,*,1018,*") - (set_attr "neg_pool_range" "*,*,*,*,*, 0,*,*,*,*,1008,*")] + (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,\ + no,no,no") + (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,load_4,\ + store_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores,mve_move,\ + mve_move") + (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4,4,4") + (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*,*,*") + (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*,*,*")] ) ;; DImode moves (define_insn "*movdi_vfp" - [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,w, Uv") - (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))] - "TARGET_32BIT && TARGET_HARD_FLOAT - && ( register_operand (operands[0], DImode) - || register_operand (operands[1], DImode)) - && !(TARGET_NEON && CONST_INT_P (operands[1]) - && simd_immediate_valid_for_move (operands[1], DImode, NULL, NULL))" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,\ + w, Uv") + (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))] + "TARGET_32BIT && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) + && (register_operand (operands[0], DImode) + || register_operand (operands[1], DImode)) + && !((TARGET_NEON || TARGET_HAVE_MVE) && CONST_INT_P (operands[1]) + && simd_immediate_valid_for_move (operands[1], DImode, NULL, NULL))" "* switch (which_alternative) { @@ -390,9 +410,15 @@ case 6: /* S register from immediate. */ return \"vmov.f16\\t%0, %1\t%@ __fp16\"; case 7: /* S register from memory. */ - return \"vld1.16\\t{%z0}, %A1\"; + if (TARGET_HAVE_MVE) + return \"vldr.16\\t%0, %A1\"; + else + return \"vld1.16\\t{%z0}, %A1\"; case 8: /* Memory from S register. */ - return \"vst1.16\\t{%z1}, %A0\"; + if (TARGET_HAVE_MVE) + return \"vstr.16\\t%1, %A0\"; + else + return \"vst1.16\\t{%z1}, %A0\"; case 9: /* ARM register from constant. */ { long bits; @@ -593,7 +619,7 @@ (define_insn "*thumb2_movsf_vfp" [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t, t ,Uv,r ,m,t,r") (match_operand:SF 1 "hard_sf_operand" " ?r,t,Dv,UvHa,t, mHa,r,t,r"))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT + "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) && ( s_register_operand (operands[0], SFmode) || s_register_operand (operands[1], SFmode))" "* @@ -682,7 +708,7 @@ (define_insn "*thumb2_movdf_vfp" [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w ,Uv,r ,m,w,r") (match_operand:DF 1 "hard_df_operand" " ?r,w,Dy,G,UvHa,w, mHa,r, w,r"))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT + "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) && ( register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode))" "* @@ -760,7 +786,7 @@ [(match_operand 4 "cc_register" "") (const_int 0)]) (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t") (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT && !arm_restrict_it" + "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) && !arm_restrict_it" "@ it\\t%D3\;vmov%D3.f32\\t%0, %2 it\\t%d3\;vmov%d3.f32\\t%0, %1 @@ -806,7 +832,8 @@ [(match_operand 4 "cc_register" "") (const_int 0)]) (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w") (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it" + "TARGET_THUMB2 && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE) && TARGET_VFP_DOUBLE + && !arm_restrict_it" "@ it\\t%D3\;vmov%D3.f64\\t%P0, %P2 it\\t%d3\;vmov%d3.f64\\t%P0, %P1 @@ -1982,7 +2009,7 @@ [(set (match_operand:BLK 0 "memory_operand" "=m") (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")] UNSPEC_PUSH_MULT))])] - "TARGET_32BIT && TARGET_HARD_FLOAT" + "TARGET_32BIT && (TARGET_HARD_FLOAT || TARGET_HAVE_MVE)" "* return vfp_output_vstmd (operands);" [(set_attr "type" "f_stored")] ) @@ -2070,16 +2097,18 @@ ;; Write Floating-point Status and Control Register. (define_insn "set_fpscr" - [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR)] - "TARGET_HARD_FLOAT" + [(set (reg:SI VFPCC_REGNUM) + (unspec_volatile:SI + [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR))] + "TARGET_HARD_FLOAT || TARGET_HAVE_MVE" "mcr\\tp10, 7, %0, cr1, cr0, 0\\t @SET_FPSCR" [(set_attr "type" "mrs")]) ;; Read Floating-point Status and Control Register. (define_insn "get_fpscr" [(set (match_operand:SI 0 "register_operand" "=r") - (unspec_volatile:SI [(const_int 0)] VUNSPEC_GET_FPSCR))] - "TARGET_HARD_FLOAT" + (unspec:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR))] + "TARGET_HARD_FLOAT || TARGET_HAVE_MVE" "mrc\\tp10, 7, %0, cr1, cr0, 0\\t @GET_FPSCR" [(set_attr "type" "mrs")])