From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1873) id 1D54138708D0; Sun, 14 Mar 2021 22:01:42 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 1D54138708D0 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Iain Buclaw To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/ibuclaw/heads/darwin)] D, Darwin, PPC : Implement fibre_switchContext. X-Act-Checkin: gcc X-Git-Author: Iain Sandoe X-Git-Refname: refs/users/ibuclaw/heads/darwin X-Git-Oldrev: afe6c872ff094cb917bfef2e51020f5b8ab46b75 X-Git-Newrev: 9cc0a00ce7cac481c7d27e2c3cbf2b32f1410ce2 Message-Id: <20210314220142.1D54138708D0@sourceware.org> Date: Sun, 14 Mar 2021 22:01:42 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 14 Mar 2021 22:01:42 -0000 https://gcc.gnu.org/g:9cc0a00ce7cac481c7d27e2c3cbf2b32f1410ce2 commit 9cc0a00ce7cac481c7d27e2c3cbf2b32f1410ce2 Author: Iain Sandoe Date: Fri Dec 11 00:48:15 2020 +0000 D, Darwin, PPC : Implement fibre_switchContext. For Darwin PPC, the callee-saves include the FPRs and 12 of the VRs. This implements the callee-saves and a rudimentary FDE that will assemble using assemblers without support for .cfi_xxxx. Diff: --- .../libdruntime/config/powerpc/switchcontext.S | 278 ++++++++++++++++++++- 1 file changed, 276 insertions(+), 2 deletions(-) diff --git a/libphobos/libdruntime/config/powerpc/switchcontext.S b/libphobos/libdruntime/config/powerpc/switchcontext.S index d4ea577c463..74395b04014 100644 --- a/libphobos/libdruntime/config/powerpc/switchcontext.S +++ b/libphobos/libdruntime/config/powerpc/switchcontext.S @@ -24,7 +24,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include "../common/threadasm.S" -#if !defined(__PPC64__) +#if !defined(__PPC64__) && !defined(__MACH__) /** * Performs a context switch. @@ -151,4 +151,278 @@ CSYM(fiber_switchContext): .cfi_endproc .size CSYM(fiber_switchContext),.-CSYM(fiber_switchContext) -#endif /* !defined(__PPC64__) */ +#elif defined(__MACH__) + +/* Implementation for Darwin/macOS preserving callee-saved regs. + + FIXME : There is no unwind frame. + FIXME : not sure if we should save the vsave reg (perhaps using the slot we have + r11 in at present). */ + +/* Darwin has a red zone (220 bytes for PPC 288 for PPC64) which we can write + to before the stack is updated without worrying about it being clobbered by + signals or hardware interrupts. + + The stack will be 16byte aligned on entry with: + PPC PPC64 + SP-> +---------------------------------------+ + | back chain to caller | 0 0 + +---------------------------------------+ + | slot to save CR | 4 8 + +---------------------------------------+ + | slot to save LR | 8 16 + +---------------------------------------+ + | etc.. etc.. as per C calling conv. | */ + +# if __PPC64__ +# define LD ld +# define ST std +# define STU stdu +# define SZ 8 +# define MACHINE ppc64 +# define RED_ZONE 288 +# else +# define LD lwz +# define ST stw +# define STU stwu +# define SZ 4 +# define MACHINE ppc7400 +# define RED_ZONE 220 +# endif + +# define SAVE_VECTORS 0 +/** + * Performs a context switch. + * + * r3 - old context pointer + * r4 - new context pointer + * + */ + .machine MACHINE + .text + .globl CSYM(fiber_switchContext) + .align 2 +CSYM(fiber_switchContext): +LFB0: + /* Get the link reg. */ + mflr r0 + /* Get the callee-saved crs (well all of them, actually). */ + mfcr r12 + + /* Save GPRs, we save the static chain here too although it is not clear if we need to. */ + ST r31, ( -1 * SZ)(r1) + ST r30, ( -2 * SZ)(r1) + ST r29, ( -3 * SZ)(r1) + ST r28, ( -4 * SZ)(r1) + ST r27, ( -5 * SZ)(r1) + ST r26, ( -6 * SZ)(r1) + ST r25, ( -7 * SZ)(r1) + ST r24, ( -8 * SZ)(r1) + ST r23, ( -9 * SZ)(r1) + ST r22, (-10 * SZ)(r1) + ST r21, (-11 * SZ)(r1) + ST r20, (-12 * SZ)(r1) + ST r19, (-13 * SZ)(r1) + ST r18, (-14 * SZ)(r1) + ST r17, (-15 * SZ)(r1) + ST r16, (-16 * SZ)(r1) + ST r15, (-17 * SZ)(r1) + ST r14, (-18 * SZ)(r1) + ST r13, (-19 * SZ)(r1) + + /* Save the lr and cr into the normal function linkage area. */ + ST r0, 2*SZ(r1) + ST r12, SZ(r1) + + /* We update the stack pointer here, since we do not want the GC to + scan the floating point registers. We are still 16-byte aligned. */ + STU r11, (-20 * SZ)(r1) + + /* Update the stack pointer in the old context as per comment above. */ + ST r1, 0(r3) + + /* Save FPRs - same for PPC and PPC64 */ + stfd f14, (-18 * 8)(r1) + stfd f15, (-17 * 8)(r1) + stfd f16, (-16 * 8)(r1) + stfd f17, (-15 * 8)(r1) + stfd f18, (-14 * 8)(r1) + stfd f19, (-13 * 8)(r1) + stfd f20, (-12 * 8)(r1) + stfd f21, (-11 * 8)(r1) + stfd f22, (-10 * 8)(r1) + stfd f23, ( -9 * 8)(r1) + stfd f24, ( -8 * 8)(r1) + stfd f25, ( -7 * 8)(r1) + stfd f26, ( -6 * 8)(r1) + stfd f27, ( -5 * 8)(r1) + stfd f28, ( -4 * 8)(r1) + stfd f29, ( -3 * 8)(r1) + stfd f30, ( -2 * 8)(r1) + stfd f31, ( -1 * 8)(r1) + +#if SAVE_VECTORS + /* We are still 16byte aligned - so we are ok for vector saves. + but the combined size of the vectors (12 x 16) + the FPRs (144) exceeds the + red zone size so we need to adjust the stack again - note this means careful + ordering is needed on the restore. */ + + addi r1, r1, -(12*16+18*8) + li r11, 0 + stvx v20,r11,r1 + addi r11, r11, 16 + stvx v21,r11,r1 + addi r11, r11, 16 + stvx v22,r11,r1 + addi r11, r11, 16 + stvx v23,r11,r1 + addi r11, r11, 16 + stvx v24,r11,r1 + addi r11, r11, 16 + stvx v25,r11,r1 + addi r11, r11, 16 + stvx v26,r11,r1 + addi r11, r11, 16 + stvx v27,r11,r1 + addi r11, r11, 16 + stvx v28,r11,r1 + addi r11, r11, 16 + stvx v29,r11,r1 + addi r11, r11, 16 + stvx v30,r11,r1 + addi r11, r11, 16 + stvx v31,r11,r1 + + /* Now do the same thing in reverse - starting with r4 pointing to + the block of GPRs - stage 1 point to the saved vectors and fprs. */ + + addi r1, r4, -(12*16+18*8) + li r11, 0 + lvx v20,r11,r1 + addi r11, r11, 16 + lvx v21,r11,r1 + addi r11, r11, 16 + lvx v22,r11,r1 + addi r11, r11, 16 + lvx v23,r11,r1 + addi r11, r11, 16 + lvx v24,r11,r1 + addi r11, r11, 16 + lvx v25,r11,r1 + addi r11, r11, 16 + lvx v26,r11,r1 + addi r11, r11, 16 + lvx v27,r11,r1 + addi r11, r11, 16 + lvx v28,r11,r1 + addi r11, r11, 16 + lvx v29,r11,r1 + addi r11, r11, 16 + lvx v30,r11,r1 + addi r11, r11, 16 + lvx v31,r11,r1 +#endif + + /* Now it is safe to update the stack pointer since the combined + size of the GPRs and FPRs will not exceed the red zone. */ + + addi r1, r4, 20 * SZ + + /* Restore FPRs */ + lfd f14, (-18 * 8)(r4) + lfd f15, (-17 * 8)(r4) + lfd f16, (-16 * 8)(r4) + lfd f17, (-15 * 8)(r4) + lfd f18, (-14 * 8)(r4) + lfd f19, (-13 * 8)(r4) + lfd f20, (-12 * 8)(r4) + lfd f21, (-11 * 8)(r4) + lfd f22, (-10 * 8)(r4) + lfd f23, ( -9 * 8)(r4) + lfd f24, ( -8 * 8)(r4) + lfd f25, ( -7 * 8)(r4) + lfd f26, ( -6 * 8)(r4) + lfd f27, ( -5 * 8)(r4) + lfd f28, ( -4 * 8)(r4) + lfd f29, ( -3 * 8)(r4) + lfd f30, ( -2 * 8)(r4) + lfd f31, ( -1 * 8)(r4) + + /* Pick up lr and cr */ + LD r0, 2*SZ(r1) + LD r12, SZ(r1) + + /* Restore GPRs */ + LD r11, (-20 * SZ)(r1) + LD r13, (-19 * SZ)(r1) + LD r14, (-18 * SZ)(r1) + LD r15, (-17 * SZ)(r1) + LD r16, (-16 * SZ)(r1) + LD r17, (-15 * SZ)(r1) + LD r18, (-14 * SZ)(r1) + LD r19, (-13 * SZ)(r1) + LD r20, (-12 * SZ)(r1) + LD r21, (-11 * SZ)(r1) + LD r22, (-10 * SZ)(r1) + LD r23, ( -9 * SZ)(r1) + LD r24, ( -8 * SZ)(r1) + LD r25, ( -7 * SZ)(r1) + LD r26, ( -6 * SZ)(r1) + LD r27, ( -5 * SZ)(r1) + LD r28, ( -4 * SZ)(r1) + LD r29, ( -3 * SZ)(r1) + LD r30, ( -2 * SZ)(r1) + LD r31, ( -1 * SZ)(r1) + + /* Set cr and lr */ + mtcr r12 + mtlr r0 + + /* Return and switch context */ + blr +LFE0: + +/* Minimal CFI / FDE which does not describe the stacking of the GPRs - but only that + the routine has been entered/exited. */ + +# if __PPC64__ +# define DATA_ALIGN 0x78 +# define ALIGN_SIZE 3 +# define ADDRD .quad +# else +# define DATA_ALIGN 0x7c +# define ALIGN_SIZE 3 +# define ADDRD .long +# endif + + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame1: + .set L$set$0,LECIE1-LSCIE1 + .long L$set$0 ; Length of Common Information Entry +LSCIE1: + .long 0 ; CIE Identifier Tag + .byte 0x3 ; CIE Version + .ascii "zR\0" ; CIE Augmentation + .byte 0x1 ; uleb128 0x1; CIE Code Alignment Factor + .byte DATA_ALIGN ; sleb128 -4/-8; CIE Data Alignment Factor + .byte 0x41 ; uleb128 0x41; CIE RA Column + .byte 0x1 ; uleb128 0x1; Augmentation size + .byte 0x10 ; FDE Encoding (pcrel) + .byte 0xc ; DW_CFA_def_cfa + .byte 0x1 ; uleb128 0x1 + .byte 0 ; uleb128 0 + .p2align ALIGN_SIZE,0 +LECIE1: +LSFDE1: + .set L$set$1,LEFDE1-LASFDE1 + .long L$set$1 ; FDE Length +LASFDE1: + .long LASFDE1-EH_frame1 ; FDE CIE offset + ADDRD LFB0-. ; FDE initial location + .set L$set$2,LFE0-LFB0 + ADDRD L$set$2 ; FDE address range + .byte 0 ; uleb128 0; Augmentation size + .p2align ALIGN_SIZE,0 +LEFDE1: + +#endif /* defined(__MACH__) */