public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/iains/heads/d-for-darwin)] D, Darwin, PPC : Implement fibre_switchContext.
@ 2020-12-21 20:37 Iain D Sandoe
  0 siblings, 0 replies; 3+ messages in thread
From: Iain D Sandoe @ 2020-12-21 20:37 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:df0835240c37ff74615181ba571c5ed0f417d966

commit df0835240c37ff74615181ba571c5ed0f417d966
Author: Iain Sandoe <iain@sandoe.co.uk>
Date:   Fri Dec 11 00:48:15 2020 +0000

    D, Darwin, PPC : Implement fibre_switchContext.
    
    For Darwin PPC, the callee-saves include the FPRs and
    12 of the VRs.
    
    This implements the callee-saves and a rudimentary FDE that
    will assemble using assemblers without support for .cfi_xxxx.

Diff:
---
 .../libdruntime/config/powerpc/switchcontext.S     | 278 ++++++++++++++++++++-
 1 file changed, 276 insertions(+), 2 deletions(-)

diff --git a/libphobos/libdruntime/config/powerpc/switchcontext.S b/libphobos/libdruntime/config/powerpc/switchcontext.S
index 6689217ca07..ddc65c23ed7 100644
--- a/libphobos/libdruntime/config/powerpc/switchcontext.S
+++ b/libphobos/libdruntime/config/powerpc/switchcontext.S
@@ -24,7 +24,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 #include "../common/threadasm.S"
 
-#if !defined(__PPC64__)
+#if !defined(__PPC64__) && !defined(__MACH__)
 
 /**
  * Performs a context switch.
@@ -151,4 +151,278 @@ CSYM(fiber_switchContext):
     .cfi_endproc
     .size CSYM(fiber_switchContext),.-CSYM(fiber_switchContext)
 
-#endif /* !defined(__PPC64__) */
+#elif defined(__MACH__)
+
+/* Implementation for Darwin/macOS preserving callee-saved regs.
+
+   FIXME : There is no unwind frame.
+   FIXME : not sure if we should save the vsave reg (perhaps using the slot we have
+           r11 in at present).  */
+
+/* Darwin has a red zone (220 bytes for PPC 288 for PPC64) which we can write
+   to before the stack is updated without worrying about it being clobbered by
+   signals or hardware interrupts.
+
+   The stack will be 16byte aligned on entry with:
+						  PPC	PPC64
+   SP-> +---------------------------------------+
+	| back chain to caller			| 0	  0
+	+---------------------------------------+
+	| slot to save CR			| 4       8
+	+---------------------------------------+
+	| slot to save LR			| 8       16
+	+---------------------------------------+
+	| etc.. etc.. as per C calling conv.    |  */
+
+# if __PPC64__
+#  define LD ld
+#  define ST std
+#  define STU stdu
+#  define SZ 8
+#  define MACHINE ppc64
+#  define RED_ZONE 288
+# else
+#  define LD lwz
+#  define ST stw
+#  define STU stwu
+#  define SZ 4
+#  define MACHINE ppc7400
+#  define RED_ZONE 220
+# endif
+
+# define SAVE_VECTORS 0
+/**
+ * Performs a context switch.
+ *
+ * r3 - old context pointer
+ * r4 - new context pointer
+ *
+ */
+    .machine MACHINE
+    .text
+    .globl CSYM(fiber_switchContext)
+    .align 2
+CSYM(fiber_switchContext):
+LFB0:
+    /* Get the link reg. */
+    mflr  r0
+    /* Get the callee-saved crs (well all of them, actually). */
+    mfcr  r12
+
+    /* Save GPRs, we save the static chain here too although it is not clear if we need to.  */
+    ST    r31, ( -1 * SZ)(r1)
+    ST    r30, ( -2 * SZ)(r1)
+    ST    r29, ( -3 * SZ)(r1)
+    ST    r28, ( -4 * SZ)(r1)
+    ST    r27, ( -5 * SZ)(r1)
+    ST    r26, ( -6 * SZ)(r1)
+    ST    r25, ( -7 * SZ)(r1)
+    ST    r24, ( -8 * SZ)(r1)
+    ST    r23, ( -9 * SZ)(r1)
+    ST    r22, (-10 * SZ)(r1)
+    ST    r21, (-11 * SZ)(r1)
+    ST    r20, (-12 * SZ)(r1)
+    ST    r19, (-13 * SZ)(r1)
+    ST    r18, (-14 * SZ)(r1)
+    ST    r17, (-15 * SZ)(r1)
+    ST    r16, (-16 * SZ)(r1)
+    ST    r15, (-17 * SZ)(r1)
+    ST    r14, (-18 * SZ)(r1)
+    ST    r13, (-19 * SZ)(r1)
+
+    /* Save the lr and cr into the normal function linkage area.  */
+    ST    r0, 2*SZ(r1)
+    ST    r12, SZ(r1)
+
+    /* We update the stack pointer here, since we do not want the GC to
+       scan the floating point registers. We are still 16-byte aligned. */
+    STU   r11, (-20 * SZ)(r1)
+
+    /* Update the stack pointer in the old context as per comment above. */
+    ST    r1, 0(r3)
+
+    /* Save FPRs - same for PPC and PPC64 */
+    stfd  f14, (-18 * 8)(r1)
+    stfd  f15, (-17 * 8)(r1)
+    stfd  f16, (-16 * 8)(r1)
+    stfd  f17, (-15 * 8)(r1)
+    stfd  f18, (-14 * 8)(r1)
+    stfd  f19, (-13 * 8)(r1)
+    stfd  f20, (-12 * 8)(r1)
+    stfd  f21, (-11 * 8)(r1)
+    stfd  f22, (-10 * 8)(r1)
+    stfd  f23, ( -9 * 8)(r1)
+    stfd  f24, ( -8 * 8)(r1)
+    stfd  f25, ( -7 * 8)(r1)
+    stfd  f26, ( -6 * 8)(r1)
+    stfd  f27, ( -5 * 8)(r1)
+    stfd  f28, ( -4 * 8)(r1)
+    stfd  f29, ( -3 * 8)(r1)
+    stfd  f30, ( -2 * 8)(r1)
+    stfd  f31, ( -1 * 8)(r1)
+
+#if SAVE_VECTORS
+    /* We are still 16byte aligned - so we are ok for vector saves.
+       but the combined size of the vectors (12 x 16) + the FPRs (144) exceeds the
+       red zone size so we need to adjust the stack again - note this means careful
+       ordering is needed on the restore.  */
+
+    addi  r1, r1, -(12*16+18*8)
+    li    r11, 0
+    stvx  v20,r11,r1
+    addi  r11, r11, 16
+    stvx  v21,r11,r1
+    addi  r11, r11, 16
+    stvx  v22,r11,r1
+    addi  r11, r11, 16
+    stvx  v23,r11,r1
+    addi  r11, r11, 16
+    stvx  v24,r11,r1
+    addi  r11, r11, 16
+    stvx  v25,r11,r1
+    addi  r11, r11, 16
+    stvx  v26,r11,r1
+    addi  r11, r11, 16
+    stvx  v27,r11,r1
+    addi  r11, r11, 16
+    stvx  v28,r11,r1
+    addi  r11, r11, 16
+    stvx  v29,r11,r1
+    addi  r11, r11, 16
+    stvx  v30,r11,r1
+    addi  r11, r11, 16
+    stvx  v31,r11,r1
+
+    /* Now do the same thing in reverse - starting with r4 pointing to
+       the block of GPRs - stage 1 point to the saved vectors and fprs. */
+
+    addi  r1, r4, -(12*16+18*8)
+    li    r11, 0
+    lvx   v20,r11,r1
+    addi  r11, r11, 16
+    lvx   v21,r11,r1
+    addi  r11, r11, 16
+    lvx   v22,r11,r1
+    addi  r11, r11, 16
+    lvx   v23,r11,r1
+    addi  r11, r11, 16
+    lvx   v24,r11,r1
+    addi  r11, r11, 16
+    lvx   v25,r11,r1
+    addi  r11, r11, 16
+    lvx   v26,r11,r1
+    addi  r11, r11, 16
+    lvx   v27,r11,r1
+    addi  r11, r11, 16
+    lvx   v28,r11,r1
+    addi  r11, r11, 16
+    lvx   v29,r11,r1
+    addi  r11, r11, 16
+    lvx   v30,r11,r1
+    addi  r11, r11, 16
+    lvx   v31,r11,r1
+#endif
+
+    /* Now it is safe to update the stack pointer since the combined
+       size of the GPRs and FPRs will not exceed the red zone.  */
+
+    addi  r1, r4, 20 * SZ
+
+    /* Restore FPRs */
+    lfd  f14, (-18 * 8)(r4)
+    lfd  f15, (-17 * 8)(r4)
+    lfd  f16, (-16 * 8)(r4)
+    lfd  f17, (-15 * 8)(r4)
+    lfd  f18, (-14 * 8)(r4)
+    lfd  f19, (-13 * 8)(r4)
+    lfd  f20, (-12 * 8)(r4)
+    lfd  f21, (-11 * 8)(r4)
+    lfd  f22, (-10 * 8)(r4)
+    lfd  f23, ( -9 * 8)(r4)
+    lfd  f24, ( -8 * 8)(r4)
+    lfd  f25, ( -7 * 8)(r4)
+    lfd  f26, ( -6 * 8)(r4)
+    lfd  f27, ( -5 * 8)(r4)
+    lfd  f28, ( -4 * 8)(r4)
+    lfd  f29, ( -3 * 8)(r4)
+    lfd  f30, ( -2 * 8)(r4)
+    lfd  f31, ( -1 * 8)(r4)
+
+    /* Pick up lr and cr */
+    LD    r0, 2*SZ(r1)
+    LD    r12, SZ(r1)
+
+    /* Restore GPRs */
+    LD     r11, (-20 * SZ)(r1)
+    LD     r13, (-19 * SZ)(r1)
+    LD     r14, (-18 * SZ)(r1)
+    LD     r15, (-17 * SZ)(r1)
+    LD     r16, (-16 * SZ)(r1)
+    LD     r17, (-15 * SZ)(r1)
+    LD     r18, (-14 * SZ)(r1)
+    LD     r19, (-13 * SZ)(r1)
+    LD     r20, (-12 * SZ)(r1)
+    LD     r21, (-11 * SZ)(r1)
+    LD     r22, (-10 * SZ)(r1)
+    LD     r23, ( -9 * SZ)(r1)
+    LD     r24, ( -8 * SZ)(r1)
+    LD     r25, ( -7 * SZ)(r1)
+    LD     r26, ( -6 * SZ)(r1)
+    LD     r27, ( -5 * SZ)(r1)
+    LD     r28, ( -4 * SZ)(r1)
+    LD     r29, ( -3 * SZ)(r1)
+    LD     r30, ( -2 * SZ)(r1)
+    LD     r31, ( -1 * SZ)(r1)
+
+    /* Set cr and lr */
+    mtcr  r12
+    mtlr  r0
+
+    /* Return and switch context */
+    blr
+LFE0:
+
+/* Minimal CFI / FDE which does not describe the stacking of the GPRs - but only that
+   the routine has been entered/exited.  */
+
+# if __PPC64__
+#  define DATA_ALIGN 0x78
+#  define ALIGN_SIZE 3
+#  define ADDRD .quad
+# else
+#  define DATA_ALIGN 0x7c
+#  define ALIGN_SIZE 3
+#  define ADDRD .long
+# endif
+
+	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set L$set$0,LECIE1-LSCIE1
+	.long L$set$0	; Length of Common Information Entry
+LSCIE1:
+	.long	0	; CIE Identifier Tag
+	.byte	0x3	; CIE Version
+	.ascii "zR\0"	; CIE Augmentation
+	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor
+	.byte	DATA_ALIGN	; sleb128 -4/-8; CIE Data Alignment Factor
+	.byte	0x41	; uleb128 0x41; CIE RA Column
+	.byte	0x1	; uleb128 0x1; Augmentation size
+	.byte	0x10	; FDE Encoding (pcrel)
+	.byte	0xc	; DW_CFA_def_cfa
+	.byte	0x1	; uleb128 0x1
+	.byte	0	; uleb128 0
+	.p2align ALIGN_SIZE,0
+LECIE1:
+LSFDE1:
+	.set L$set$1,LEFDE1-LASFDE1
+	.long L$set$1	; FDE Length
+LASFDE1:
+	.long	LASFDE1-EH_frame1	; FDE CIE offset
+	ADDRD	LFB0-.	; FDE initial location
+	.set L$set$2,LFE0-LFB0
+	ADDRD L$set$2	; FDE address range
+	.byte	0	; uleb128 0; Augmentation size
+	.p2align ALIGN_SIZE,0
+LEFDE1:
+
+#endif /* defined(__MACH__) */


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [gcc(refs/users/iains/heads/d-for-darwin)] D, Darwin, PPC : Implement fibre_switchContext.
@ 2021-01-11 21:27 Iain D Sandoe
  0 siblings, 0 replies; 3+ messages in thread
From: Iain D Sandoe @ 2021-01-11 21:27 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d962c3361a351e424766c9424e723c3def0169bc

commit d962c3361a351e424766c9424e723c3def0169bc
Author: Iain Sandoe <iain@sandoe.co.uk>
Date:   Fri Dec 11 00:48:15 2020 +0000

    D, Darwin, PPC : Implement fibre_switchContext.
    
    For Darwin PPC, the callee-saves include the FPRs and
    12 of the VRs.
    
    This implements the callee-saves and a rudimentary FDE that
    will assemble using assemblers without support for .cfi_xxxx.

Diff:
---
 .../libdruntime/config/powerpc/switchcontext.S     | 278 ++++++++++++++++++++-
 1 file changed, 276 insertions(+), 2 deletions(-)

diff --git a/libphobos/libdruntime/config/powerpc/switchcontext.S b/libphobos/libdruntime/config/powerpc/switchcontext.S
index d4ea577c463..74395b04014 100644
--- a/libphobos/libdruntime/config/powerpc/switchcontext.S
+++ b/libphobos/libdruntime/config/powerpc/switchcontext.S
@@ -24,7 +24,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 #include "../common/threadasm.S"
 
-#if !defined(__PPC64__)
+#if !defined(__PPC64__) && !defined(__MACH__)
 
 /**
  * Performs a context switch.
@@ -151,4 +151,278 @@ CSYM(fiber_switchContext):
     .cfi_endproc
     .size CSYM(fiber_switchContext),.-CSYM(fiber_switchContext)
 
-#endif /* !defined(__PPC64__) */
+#elif defined(__MACH__)
+
+/* Implementation for Darwin/macOS preserving callee-saved regs.
+
+   FIXME : There is no unwind frame.
+   FIXME : not sure if we should save the vsave reg (perhaps using the slot we have
+           r11 in at present).  */
+
+/* Darwin has a red zone (220 bytes for PPC 288 for PPC64) which we can write
+   to before the stack is updated without worrying about it being clobbered by
+   signals or hardware interrupts.
+
+   The stack will be 16byte aligned on entry with:
+						  PPC	PPC64
+   SP-> +---------------------------------------+
+	| back chain to caller			| 0	  0
+	+---------------------------------------+
+	| slot to save CR			| 4       8
+	+---------------------------------------+
+	| slot to save LR			| 8       16
+	+---------------------------------------+
+	| etc.. etc.. as per C calling conv.    |  */
+
+# if __PPC64__
+#  define LD ld
+#  define ST std
+#  define STU stdu
+#  define SZ 8
+#  define MACHINE ppc64
+#  define RED_ZONE 288
+# else
+#  define LD lwz
+#  define ST stw
+#  define STU stwu
+#  define SZ 4
+#  define MACHINE ppc7400
+#  define RED_ZONE 220
+# endif
+
+# define SAVE_VECTORS 0
+/**
+ * Performs a context switch.
+ *
+ * r3 - old context pointer
+ * r4 - new context pointer
+ *
+ */
+    .machine MACHINE
+    .text
+    .globl CSYM(fiber_switchContext)
+    .align 2
+CSYM(fiber_switchContext):
+LFB0:
+    /* Get the link reg. */
+    mflr  r0
+    /* Get the callee-saved crs (well all of them, actually). */
+    mfcr  r12
+
+    /* Save GPRs, we save the static chain here too although it is not clear if we need to.  */
+    ST    r31, ( -1 * SZ)(r1)
+    ST    r30, ( -2 * SZ)(r1)
+    ST    r29, ( -3 * SZ)(r1)
+    ST    r28, ( -4 * SZ)(r1)
+    ST    r27, ( -5 * SZ)(r1)
+    ST    r26, ( -6 * SZ)(r1)
+    ST    r25, ( -7 * SZ)(r1)
+    ST    r24, ( -8 * SZ)(r1)
+    ST    r23, ( -9 * SZ)(r1)
+    ST    r22, (-10 * SZ)(r1)
+    ST    r21, (-11 * SZ)(r1)
+    ST    r20, (-12 * SZ)(r1)
+    ST    r19, (-13 * SZ)(r1)
+    ST    r18, (-14 * SZ)(r1)
+    ST    r17, (-15 * SZ)(r1)
+    ST    r16, (-16 * SZ)(r1)
+    ST    r15, (-17 * SZ)(r1)
+    ST    r14, (-18 * SZ)(r1)
+    ST    r13, (-19 * SZ)(r1)
+
+    /* Save the lr and cr into the normal function linkage area.  */
+    ST    r0, 2*SZ(r1)
+    ST    r12, SZ(r1)
+
+    /* We update the stack pointer here, since we do not want the GC to
+       scan the floating point registers. We are still 16-byte aligned. */
+    STU   r11, (-20 * SZ)(r1)
+
+    /* Update the stack pointer in the old context as per comment above. */
+    ST    r1, 0(r3)
+
+    /* Save FPRs - same for PPC and PPC64 */
+    stfd  f14, (-18 * 8)(r1)
+    stfd  f15, (-17 * 8)(r1)
+    stfd  f16, (-16 * 8)(r1)
+    stfd  f17, (-15 * 8)(r1)
+    stfd  f18, (-14 * 8)(r1)
+    stfd  f19, (-13 * 8)(r1)
+    stfd  f20, (-12 * 8)(r1)
+    stfd  f21, (-11 * 8)(r1)
+    stfd  f22, (-10 * 8)(r1)
+    stfd  f23, ( -9 * 8)(r1)
+    stfd  f24, ( -8 * 8)(r1)
+    stfd  f25, ( -7 * 8)(r1)
+    stfd  f26, ( -6 * 8)(r1)
+    stfd  f27, ( -5 * 8)(r1)
+    stfd  f28, ( -4 * 8)(r1)
+    stfd  f29, ( -3 * 8)(r1)
+    stfd  f30, ( -2 * 8)(r1)
+    stfd  f31, ( -1 * 8)(r1)
+
+#if SAVE_VECTORS
+    /* We are still 16byte aligned - so we are ok for vector saves.
+       but the combined size of the vectors (12 x 16) + the FPRs (144) exceeds the
+       red zone size so we need to adjust the stack again - note this means careful
+       ordering is needed on the restore.  */
+
+    addi  r1, r1, -(12*16+18*8)
+    li    r11, 0
+    stvx  v20,r11,r1
+    addi  r11, r11, 16
+    stvx  v21,r11,r1
+    addi  r11, r11, 16
+    stvx  v22,r11,r1
+    addi  r11, r11, 16
+    stvx  v23,r11,r1
+    addi  r11, r11, 16
+    stvx  v24,r11,r1
+    addi  r11, r11, 16
+    stvx  v25,r11,r1
+    addi  r11, r11, 16
+    stvx  v26,r11,r1
+    addi  r11, r11, 16
+    stvx  v27,r11,r1
+    addi  r11, r11, 16
+    stvx  v28,r11,r1
+    addi  r11, r11, 16
+    stvx  v29,r11,r1
+    addi  r11, r11, 16
+    stvx  v30,r11,r1
+    addi  r11, r11, 16
+    stvx  v31,r11,r1
+
+    /* Now do the same thing in reverse - starting with r4 pointing to
+       the block of GPRs - stage 1 point to the saved vectors and fprs. */
+
+    addi  r1, r4, -(12*16+18*8)
+    li    r11, 0
+    lvx   v20,r11,r1
+    addi  r11, r11, 16
+    lvx   v21,r11,r1
+    addi  r11, r11, 16
+    lvx   v22,r11,r1
+    addi  r11, r11, 16
+    lvx   v23,r11,r1
+    addi  r11, r11, 16
+    lvx   v24,r11,r1
+    addi  r11, r11, 16
+    lvx   v25,r11,r1
+    addi  r11, r11, 16
+    lvx   v26,r11,r1
+    addi  r11, r11, 16
+    lvx   v27,r11,r1
+    addi  r11, r11, 16
+    lvx   v28,r11,r1
+    addi  r11, r11, 16
+    lvx   v29,r11,r1
+    addi  r11, r11, 16
+    lvx   v30,r11,r1
+    addi  r11, r11, 16
+    lvx   v31,r11,r1
+#endif
+
+    /* Now it is safe to update the stack pointer since the combined
+       size of the GPRs and FPRs will not exceed the red zone.  */
+
+    addi  r1, r4, 20 * SZ
+
+    /* Restore FPRs */
+    lfd  f14, (-18 * 8)(r4)
+    lfd  f15, (-17 * 8)(r4)
+    lfd  f16, (-16 * 8)(r4)
+    lfd  f17, (-15 * 8)(r4)
+    lfd  f18, (-14 * 8)(r4)
+    lfd  f19, (-13 * 8)(r4)
+    lfd  f20, (-12 * 8)(r4)
+    lfd  f21, (-11 * 8)(r4)
+    lfd  f22, (-10 * 8)(r4)
+    lfd  f23, ( -9 * 8)(r4)
+    lfd  f24, ( -8 * 8)(r4)
+    lfd  f25, ( -7 * 8)(r4)
+    lfd  f26, ( -6 * 8)(r4)
+    lfd  f27, ( -5 * 8)(r4)
+    lfd  f28, ( -4 * 8)(r4)
+    lfd  f29, ( -3 * 8)(r4)
+    lfd  f30, ( -2 * 8)(r4)
+    lfd  f31, ( -1 * 8)(r4)
+
+    /* Pick up lr and cr */
+    LD    r0, 2*SZ(r1)
+    LD    r12, SZ(r1)
+
+    /* Restore GPRs */
+    LD     r11, (-20 * SZ)(r1)
+    LD     r13, (-19 * SZ)(r1)
+    LD     r14, (-18 * SZ)(r1)
+    LD     r15, (-17 * SZ)(r1)
+    LD     r16, (-16 * SZ)(r1)
+    LD     r17, (-15 * SZ)(r1)
+    LD     r18, (-14 * SZ)(r1)
+    LD     r19, (-13 * SZ)(r1)
+    LD     r20, (-12 * SZ)(r1)
+    LD     r21, (-11 * SZ)(r1)
+    LD     r22, (-10 * SZ)(r1)
+    LD     r23, ( -9 * SZ)(r1)
+    LD     r24, ( -8 * SZ)(r1)
+    LD     r25, ( -7 * SZ)(r1)
+    LD     r26, ( -6 * SZ)(r1)
+    LD     r27, ( -5 * SZ)(r1)
+    LD     r28, ( -4 * SZ)(r1)
+    LD     r29, ( -3 * SZ)(r1)
+    LD     r30, ( -2 * SZ)(r1)
+    LD     r31, ( -1 * SZ)(r1)
+
+    /* Set cr and lr */
+    mtcr  r12
+    mtlr  r0
+
+    /* Return and switch context */
+    blr
+LFE0:
+
+/* Minimal CFI / FDE which does not describe the stacking of the GPRs - but only that
+   the routine has been entered/exited.  */
+
+# if __PPC64__
+#  define DATA_ALIGN 0x78
+#  define ALIGN_SIZE 3
+#  define ADDRD .quad
+# else
+#  define DATA_ALIGN 0x7c
+#  define ALIGN_SIZE 3
+#  define ADDRD .long
+# endif
+
+	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set L$set$0,LECIE1-LSCIE1
+	.long L$set$0	; Length of Common Information Entry
+LSCIE1:
+	.long	0	; CIE Identifier Tag
+	.byte	0x3	; CIE Version
+	.ascii "zR\0"	; CIE Augmentation
+	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor
+	.byte	DATA_ALIGN	; sleb128 -4/-8; CIE Data Alignment Factor
+	.byte	0x41	; uleb128 0x41; CIE RA Column
+	.byte	0x1	; uleb128 0x1; Augmentation size
+	.byte	0x10	; FDE Encoding (pcrel)
+	.byte	0xc	; DW_CFA_def_cfa
+	.byte	0x1	; uleb128 0x1
+	.byte	0	; uleb128 0
+	.p2align ALIGN_SIZE,0
+LECIE1:
+LSFDE1:
+	.set L$set$1,LEFDE1-LASFDE1
+	.long L$set$1	; FDE Length
+LASFDE1:
+	.long	LASFDE1-EH_frame1	; FDE CIE offset
+	ADDRD	LFB0-.	; FDE initial location
+	.set L$set$2,LFE0-LFB0
+	ADDRD L$set$2	; FDE address range
+	.byte	0	; uleb128 0; Augmentation size
+	.p2align ALIGN_SIZE,0
+LEFDE1:
+
+#endif /* defined(__MACH__) */


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [gcc(refs/users/iains/heads/d-for-darwin)] D, Darwin, PPC : Implement fibre_switchContext.
@ 2020-12-13 17:44 Iain D Sandoe
  0 siblings, 0 replies; 3+ messages in thread
From: Iain D Sandoe @ 2020-12-13 17:44 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:f20e60bef60626ea5c66d7711a76465092229395

commit f20e60bef60626ea5c66d7711a76465092229395
Author: Iain Sandoe <iain@sandoe.co.uk>
Date:   Fri Dec 11 00:48:15 2020 +0000

    D, Darwin, PPC : Implement fibre_switchContext.
    
    For Darwin PPC, the callee-saves include the FPRs and
    12 of the VRs.
    
    This implements the callee-saves and a rudimentary FDE that
    will assemble using assemblers without support for .cfi_xxxx.

Diff:
---
 .../libdruntime/config/powerpc/switchcontext.S     | 278 ++++++++++++++++++++-
 1 file changed, 276 insertions(+), 2 deletions(-)

diff --git a/libphobos/libdruntime/config/powerpc/switchcontext.S b/libphobos/libdruntime/config/powerpc/switchcontext.S
index 6689217ca07..ddc65c23ed7 100644
--- a/libphobos/libdruntime/config/powerpc/switchcontext.S
+++ b/libphobos/libdruntime/config/powerpc/switchcontext.S
@@ -24,7 +24,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 #include "../common/threadasm.S"
 
-#if !defined(__PPC64__)
+#if !defined(__PPC64__) && !defined(__MACH__)
 
 /**
  * Performs a context switch.
@@ -151,4 +151,278 @@ CSYM(fiber_switchContext):
     .cfi_endproc
     .size CSYM(fiber_switchContext),.-CSYM(fiber_switchContext)
 
-#endif /* !defined(__PPC64__) */
+#elif defined(__MACH__)
+
+/* Implementation for Darwin/macOS preserving callee-saved regs.
+
+   FIXME : There is no unwind frame.
+   FIXME : not sure if we should save the vsave reg (perhaps using the slot we have
+           r11 in at present).  */
+
+/* Darwin has a red zone (220 bytes for PPC 288 for PPC64) which we can write
+   to before the stack is updated without worrying about it being clobbered by
+   signals or hardware interrupts.
+
+   The stack will be 16byte aligned on entry with:
+						  PPC	PPC64
+   SP-> +---------------------------------------+
+	| back chain to caller			| 0	  0
+	+---------------------------------------+
+	| slot to save CR			| 4       8
+	+---------------------------------------+
+	| slot to save LR			| 8       16
+	+---------------------------------------+
+	| etc.. etc.. as per C calling conv.    |  */
+
+# if __PPC64__
+#  define LD ld
+#  define ST std
+#  define STU stdu
+#  define SZ 8
+#  define MACHINE ppc64
+#  define RED_ZONE 288
+# else
+#  define LD lwz
+#  define ST stw
+#  define STU stwu
+#  define SZ 4
+#  define MACHINE ppc7400
+#  define RED_ZONE 220
+# endif
+
+# define SAVE_VECTORS 0
+/**
+ * Performs a context switch.
+ *
+ * r3 - old context pointer
+ * r4 - new context pointer
+ *
+ */
+    .machine MACHINE
+    .text
+    .globl CSYM(fiber_switchContext)
+    .align 2
+CSYM(fiber_switchContext):
+LFB0:
+    /* Get the link reg. */
+    mflr  r0
+    /* Get the callee-saved crs (well all of them, actually). */
+    mfcr  r12
+
+    /* Save GPRs, we save the static chain here too although it is not clear if we need to.  */
+    ST    r31, ( -1 * SZ)(r1)
+    ST    r30, ( -2 * SZ)(r1)
+    ST    r29, ( -3 * SZ)(r1)
+    ST    r28, ( -4 * SZ)(r1)
+    ST    r27, ( -5 * SZ)(r1)
+    ST    r26, ( -6 * SZ)(r1)
+    ST    r25, ( -7 * SZ)(r1)
+    ST    r24, ( -8 * SZ)(r1)
+    ST    r23, ( -9 * SZ)(r1)
+    ST    r22, (-10 * SZ)(r1)
+    ST    r21, (-11 * SZ)(r1)
+    ST    r20, (-12 * SZ)(r1)
+    ST    r19, (-13 * SZ)(r1)
+    ST    r18, (-14 * SZ)(r1)
+    ST    r17, (-15 * SZ)(r1)
+    ST    r16, (-16 * SZ)(r1)
+    ST    r15, (-17 * SZ)(r1)
+    ST    r14, (-18 * SZ)(r1)
+    ST    r13, (-19 * SZ)(r1)
+
+    /* Save the lr and cr into the normal function linkage area.  */
+    ST    r0, 2*SZ(r1)
+    ST    r12, SZ(r1)
+
+    /* We update the stack pointer here, since we do not want the GC to
+       scan the floating point registers. We are still 16-byte aligned. */
+    STU   r11, (-20 * SZ)(r1)
+
+    /* Update the stack pointer in the old context as per comment above. */
+    ST    r1, 0(r3)
+
+    /* Save FPRs - same for PPC and PPC64 */
+    stfd  f14, (-18 * 8)(r1)
+    stfd  f15, (-17 * 8)(r1)
+    stfd  f16, (-16 * 8)(r1)
+    stfd  f17, (-15 * 8)(r1)
+    stfd  f18, (-14 * 8)(r1)
+    stfd  f19, (-13 * 8)(r1)
+    stfd  f20, (-12 * 8)(r1)
+    stfd  f21, (-11 * 8)(r1)
+    stfd  f22, (-10 * 8)(r1)
+    stfd  f23, ( -9 * 8)(r1)
+    stfd  f24, ( -8 * 8)(r1)
+    stfd  f25, ( -7 * 8)(r1)
+    stfd  f26, ( -6 * 8)(r1)
+    stfd  f27, ( -5 * 8)(r1)
+    stfd  f28, ( -4 * 8)(r1)
+    stfd  f29, ( -3 * 8)(r1)
+    stfd  f30, ( -2 * 8)(r1)
+    stfd  f31, ( -1 * 8)(r1)
+
+#if SAVE_VECTORS
+    /* We are still 16byte aligned - so we are ok for vector saves.
+       but the combined size of the vectors (12 x 16) + the FPRs (144) exceeds the
+       red zone size so we need to adjust the stack again - note this means careful
+       ordering is needed on the restore.  */
+
+    addi  r1, r1, -(12*16+18*8)
+    li    r11, 0
+    stvx  v20,r11,r1
+    addi  r11, r11, 16
+    stvx  v21,r11,r1
+    addi  r11, r11, 16
+    stvx  v22,r11,r1
+    addi  r11, r11, 16
+    stvx  v23,r11,r1
+    addi  r11, r11, 16
+    stvx  v24,r11,r1
+    addi  r11, r11, 16
+    stvx  v25,r11,r1
+    addi  r11, r11, 16
+    stvx  v26,r11,r1
+    addi  r11, r11, 16
+    stvx  v27,r11,r1
+    addi  r11, r11, 16
+    stvx  v28,r11,r1
+    addi  r11, r11, 16
+    stvx  v29,r11,r1
+    addi  r11, r11, 16
+    stvx  v30,r11,r1
+    addi  r11, r11, 16
+    stvx  v31,r11,r1
+
+    /* Now do the same thing in reverse - starting with r4 pointing to
+       the block of GPRs - stage 1 point to the saved vectors and fprs. */
+
+    addi  r1, r4, -(12*16+18*8)
+    li    r11, 0
+    lvx   v20,r11,r1
+    addi  r11, r11, 16
+    lvx   v21,r11,r1
+    addi  r11, r11, 16
+    lvx   v22,r11,r1
+    addi  r11, r11, 16
+    lvx   v23,r11,r1
+    addi  r11, r11, 16
+    lvx   v24,r11,r1
+    addi  r11, r11, 16
+    lvx   v25,r11,r1
+    addi  r11, r11, 16
+    lvx   v26,r11,r1
+    addi  r11, r11, 16
+    lvx   v27,r11,r1
+    addi  r11, r11, 16
+    lvx   v28,r11,r1
+    addi  r11, r11, 16
+    lvx   v29,r11,r1
+    addi  r11, r11, 16
+    lvx   v30,r11,r1
+    addi  r11, r11, 16
+    lvx   v31,r11,r1
+#endif
+
+    /* Now it is safe to update the stack pointer since the combined
+       size of the GPRs and FPRs will not exceed the red zone.  */
+
+    addi  r1, r4, 20 * SZ
+
+    /* Restore FPRs */
+    lfd  f14, (-18 * 8)(r4)
+    lfd  f15, (-17 * 8)(r4)
+    lfd  f16, (-16 * 8)(r4)
+    lfd  f17, (-15 * 8)(r4)
+    lfd  f18, (-14 * 8)(r4)
+    lfd  f19, (-13 * 8)(r4)
+    lfd  f20, (-12 * 8)(r4)
+    lfd  f21, (-11 * 8)(r4)
+    lfd  f22, (-10 * 8)(r4)
+    lfd  f23, ( -9 * 8)(r4)
+    lfd  f24, ( -8 * 8)(r4)
+    lfd  f25, ( -7 * 8)(r4)
+    lfd  f26, ( -6 * 8)(r4)
+    lfd  f27, ( -5 * 8)(r4)
+    lfd  f28, ( -4 * 8)(r4)
+    lfd  f29, ( -3 * 8)(r4)
+    lfd  f30, ( -2 * 8)(r4)
+    lfd  f31, ( -1 * 8)(r4)
+
+    /* Pick up lr and cr */
+    LD    r0, 2*SZ(r1)
+    LD    r12, SZ(r1)
+
+    /* Restore GPRs */
+    LD     r11, (-20 * SZ)(r1)
+    LD     r13, (-19 * SZ)(r1)
+    LD     r14, (-18 * SZ)(r1)
+    LD     r15, (-17 * SZ)(r1)
+    LD     r16, (-16 * SZ)(r1)
+    LD     r17, (-15 * SZ)(r1)
+    LD     r18, (-14 * SZ)(r1)
+    LD     r19, (-13 * SZ)(r1)
+    LD     r20, (-12 * SZ)(r1)
+    LD     r21, (-11 * SZ)(r1)
+    LD     r22, (-10 * SZ)(r1)
+    LD     r23, ( -9 * SZ)(r1)
+    LD     r24, ( -8 * SZ)(r1)
+    LD     r25, ( -7 * SZ)(r1)
+    LD     r26, ( -6 * SZ)(r1)
+    LD     r27, ( -5 * SZ)(r1)
+    LD     r28, ( -4 * SZ)(r1)
+    LD     r29, ( -3 * SZ)(r1)
+    LD     r30, ( -2 * SZ)(r1)
+    LD     r31, ( -1 * SZ)(r1)
+
+    /* Set cr and lr */
+    mtcr  r12
+    mtlr  r0
+
+    /* Return and switch context */
+    blr
+LFE0:
+
+/* Minimal CFI / FDE which does not describe the stacking of the GPRs - but only that
+   the routine has been entered/exited.  */
+
+# if __PPC64__
+#  define DATA_ALIGN 0x78
+#  define ALIGN_SIZE 3
+#  define ADDRD .quad
+# else
+#  define DATA_ALIGN 0x7c
+#  define ALIGN_SIZE 3
+#  define ADDRD .long
+# endif
+
+	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set L$set$0,LECIE1-LSCIE1
+	.long L$set$0	; Length of Common Information Entry
+LSCIE1:
+	.long	0	; CIE Identifier Tag
+	.byte	0x3	; CIE Version
+	.ascii "zR\0"	; CIE Augmentation
+	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor
+	.byte	DATA_ALIGN	; sleb128 -4/-8; CIE Data Alignment Factor
+	.byte	0x41	; uleb128 0x41; CIE RA Column
+	.byte	0x1	; uleb128 0x1; Augmentation size
+	.byte	0x10	; FDE Encoding (pcrel)
+	.byte	0xc	; DW_CFA_def_cfa
+	.byte	0x1	; uleb128 0x1
+	.byte	0	; uleb128 0
+	.p2align ALIGN_SIZE,0
+LECIE1:
+LSFDE1:
+	.set L$set$1,LEFDE1-LASFDE1
+	.long L$set$1	; FDE Length
+LASFDE1:
+	.long	LASFDE1-EH_frame1	; FDE CIE offset
+	ADDRD	LFB0-.	; FDE initial location
+	.set L$set$2,LFE0-LFB0
+	ADDRD L$set$2	; FDE address range
+	.byte	0	; uleb128 0; Augmentation size
+	.p2align ALIGN_SIZE,0
+LEFDE1:
+
+#endif /* defined(__MACH__) */


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-01-11 21:27 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-21 20:37 [gcc(refs/users/iains/heads/d-for-darwin)] D, Darwin, PPC : Implement fibre_switchContext Iain D Sandoe
  -- strict thread matches above, loose matches on Subject: below --
2021-01-11 21:27 Iain D Sandoe
2020-12-13 17:44 Iain D Sandoe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).