public inbox for libffi-discuss@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] Go closures for s390[x]
@ 2014-12-16 15:05 Dominik Vogt
  2014-12-18 21:29 ` Richard Henderson
  0 siblings, 1 reply; 7+ messages in thread
From: Dominik Vogt @ 2014-12-16 15:05 UTC (permalink / raw)
  To: libffi-discuss; +Cc: Ulrich Weigand, Andreas Krebbel

[-- Attachment #1: Type: text/plain, Size: 886 bytes --]

The attached patch adds Go closure support for s390[x] atop
Richard's go-closure branch in the Gcc repository (it requires on
other patches discussed in the general Go closure topic).

ChangeLog:
--
2014-12-16  Dominik Vogt  <vogt@linux.vnet.ibm.com>

	* src/s390/sysv.S (ffi_call_SYSV): Adapt for Go closure support.
	Rewrite cfi information.
	(ffi_closure_SYSV): Adapt for Go closure support.  Rewrite cfi
	information.
	(ffi_go_closure_SYSV): New function.
	* src/s390/ffi.c (ffi_call_int): Renamed from ffi_call, add closure
	argument.
	(ffi_call): New interface function.
	(ffi_call_go): New interface function for go calls.
	(ffi_closure_helper_SYSV): Pass closure internals as arguments.
	(ffi_prep_go_closure): New function for Go closure support.
	* src/s390/ffitarget.h (FFI_GO_CLOSURES): Activate Go closure support.
--

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany

[-- Attachment #2: 0001-libffi-S-390-Go-closure-support.patch --]
[-- Type: text/x-diff, Size: 21228 bytes --]

From 7df911133b96aca5b9a40a7a33b42f356c8db530 Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Wed, 10 Dec 2014 11:47:25 +0100
Subject: [PATCH] libffi: S/390 Go closure support.

---
 libffi/src/s390/ffi.c       |  78 +++++---
 libffi/src/s390/ffitarget.h |   1 +
 libffi/src/s390/sysv.S      | 459 ++++++++++++++++++++++++--------------------
 3 files changed, 303 insertions(+), 235 deletions(-)

diff --git a/libffi/src/s390/ffi.c b/libffi/src/s390/ffi.c
index 520ec7c..477b85e 100644
--- a/libffi/src/s390/ffi.c
+++ b/libffi/src/s390/ffi.c
@@ -65,21 +65,6 @@
 /*===================== End of Defines ===============================*/
  
 /*====================================================================*/
-/*                          Prototypes                                */
-/*                          ----------                                */
-/*====================================================================*/
- 
-static void ffi_prep_args (unsigned char *, extended_cif *);
-void
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
-__attribute__ ((visibility ("hidden")))
-#endif
-ffi_closure_helper_SYSV (ffi_closure *, unsigned long *, 
-			 unsigned long long *, unsigned long *);
-
-/*====================== End of Prototypes ===========================*/
- 
-/*====================================================================*/
 /*                          Externals                                 */
 /*                          ---------                                 */
 /*====================================================================*/
@@ -89,9 +74,10 @@ extern void ffi_call_SYSV(unsigned,
 			  void (*)(unsigned char *, extended_cif *),
 			  unsigned,
 			  void *,
-			  void (*fn)(void));
+			  void (*fn)(void), void *);
 
 extern void ffi_closure_SYSV(void);
+extern void ffi_go_closure_SYSV(void);
  
 /*====================== End of Externals ============================*/
  
@@ -504,11 +490,12 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 /*                                                                    */
 /*====================================================================*/
  
-void
-ffi_call(ffi_cif *cif,
-	 void (*fn)(void),
-	 void *rvalue,
-	 void **avalue)
+static void
+ffi_call_int(ffi_cif *cif,
+	     void (*fn)(void),
+	     void *rvalue,
+	     void **avalue,
+	     void *closure)
 {
   int ret_type = cif->flags;
   extended_cif ecif;
@@ -530,7 +517,7 @@ ffi_call(ffi_cif *cif,
     {
       case FFI_SYSV:
         ffi_call_SYSV (cif->bytes, &ecif, ffi_prep_args,
-		       ret_type, ecif.rvalue, fn);
+		       ret_type, ecif.rvalue, fn, closure);
         break;
  
       default:
@@ -538,6 +525,19 @@ ffi_call(ffi_cif *cif,
         break;
     }
 }
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  ffi_call_int(cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	     void **avalue, void *closure)
+{
+  ffi_call_int(cif, fn, rvalue, avalue, closure);
+}
  
 /*======================== End of Routine ============================*/
 
@@ -548,9 +548,12 @@ ffi_call(ffi_cif *cif,
 /* Function - Call a FFI closure target function.                     */
 /*                                                                    */
 /*====================================================================*/
- 
+
+FFI_HIDDEN
 void
-ffi_closure_helper_SYSV (ffi_closure *closure,
+ffi_closure_helper_SYSV (ffi_cif *cif,
+			 void (*fun)(ffi_cif*,void*,void**,void*),
+			 void *user_data,
 			 unsigned long *p_gpr,
 			 unsigned long long *p_fpr,
 			 unsigned long *p_ov)
@@ -570,20 +573,18 @@ ffi_closure_helper_SYSV (ffi_closure *closure,
 
   /* Allocate buffer for argument list pointers.  */
 
-  p_arg = avalue = alloca (closure->cif->nargs * sizeof (void *));
+  p_arg = avalue = alloca (cif->nargs * sizeof (void *));
 
   /* If we returning a structure, pass the structure address 
      directly to the target function.  Otherwise, have the target 
      function store the return value to the GPR save area.  */
 
-  if (closure->cif->flags == FFI390_RET_STRUCT)
+  if (cif->flags == FFI390_RET_STRUCT)
     rvalue = (void *) p_gpr[n_gpr++];
 
   /* Now for the arguments.  */
 
-  for (ptr = closure->cif->arg_types, i = closure->cif->nargs;
-       i > 0;
-       i--, p_arg++, ptr++)
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, p_arg++, ptr++)
     {
       int deref_struct_pointer = 0;
       int type = (*ptr)->type;
@@ -689,10 +690,10 @@ ffi_closure_helper_SYSV (ffi_closure *closure,
 
 
   /* Call the target function.  */
-  (closure->fun) (closure->cif, rvalue, avalue, closure->user_data);
+  (fun) (cif, rvalue, avalue, user_data);
 
   /* Convert the return value.  */
-  switch (closure->cif->rtype->type)
+  switch (cif->rtype->type)
     {
       /* Void is easy, and so is struct.  */
       case FFI_TYPE_VOID:
@@ -790,3 +791,18 @@ ffi_prep_closure_loc (ffi_closure *closure,
 
 /*======================== End of Routine ============================*/
  
+/* Build a Go language closure.  */
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif *cif,
+		     void (*fun)(ffi_cif*,void*,void**,void*))
+{
+  if (cif->abi != FFI_SYSV)
+    return FFI_BAD_ABI;
+
+  closure->tramp = ffi_go_closure_SYSV;
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
+}
diff --git a/libffi/src/s390/ffitarget.h b/libffi/src/s390/ffitarget.h
index 0e4868a..d8a4ee4 100644
--- a/libffi/src/s390/ffitarget.h
+++ b/libffi/src/s390/ffitarget.h
@@ -58,6 +58,7 @@ typedef enum ffi_abi {
 /* ---- Definitions for closures ----------------------------------------- */
 
 #define FFI_CLOSURES 1
+#define FFI_GO_CLOSURES 1
 #ifdef S390X
 #define FFI_TRAMPOLINE_SIZE 32
 #else
diff --git a/libffi/src/s390/sysv.S b/libffi/src/s390/sysv.S
index 4731a31..14672ac 100644
--- a/libffi/src/s390/sysv.S
+++ b/libffi/src/s390/sysv.S
@@ -39,18 +39,29 @@
 	# r5:	ret_type
 	# r6:	ecif.rvalue
 	# ov:	fn 
- 
+	# ov+8:	closure
+
 	# This assumes we are using gas.
 	.globl	ffi_call_SYSV
+	FFI_HIDDEN(ffi_call_SYSV)
 	.type	ffi_call_SYSV,%function
 ffi_call_SYSV:
-.LFB1:
+	.cfi_startproc
 	stm	%r6,%r15,24(%r15)		# Save registers
-.LCFI0:
+	.cfi_offset r6, -72
+	.cfi_offset r7, -68
+	.cfi_offset r8, -64
+	.cfi_offset r9, -60
+	.cfi_offset r10, -56
+	.cfi_offset r11, -52
+	.cfi_offset r12, -48
+	.cfi_offset r13, -44
+	.cfi_offset r14, -40
+	.cfi_offset r15, -36
 	basr	%r13,0				# Set up base register
 .Lbase:
 	lr	%r11,%r15			# Set up frame pointer
-.LCFI1:
+	.cfi_def_cfa_register r11
 	sr	%r15,%r2
 	ahi	%r15,-96-48			# Allocate stack
 	lr	%r8,%r6				# Save ecif.rvalue
@@ -59,12 +70,13 @@ ffi_call_SYSV:
 	l	%r7,96(%r11)			# Load function address
 	st	%r11,0(%r15)			# Set up back chain
 	ahi	%r11,-48			# Register save area
-.LCFI2:
+	.cfi_adjust_cfa_offset 48
 
 	la	%r2,96(%r15)			# Save area
 						# r3 already holds &ecif
 	basr	%r14,%r4			# Call ffi_prep_args
 
+	l	%r0,96+48+4(%r11)		# Go closure -> static chain
 	lm	%r2,%r6,0(%r11)			# Load arguments
 	ld	%f0,32(%r11)
 	ld	%f2,40(%r11)
@@ -74,31 +86,106 @@ ffi_call_SYSV:
 .LretNone:					# Return void
 	l	%r4,48+56(%r11)
 	lm	%r6,%r15,48+24(%r11)
+	.cfi_remember_state
+	.cfi_restore 15
+	.cfi_restore 14
+	.cfi_restore 13
+	.cfi_restore 12
+	.cfi_restore 11
+	.cfi_restore 10
+	.cfi_restore 9
+	.cfi_restore 8
+	.cfi_restore 7
+	.cfi_restore 6
+	.cfi_def_cfa r15, 96
 	br	%r4
+	.cfi_restore_state
+	# This nopr is necessary so that the .cfi instructions between the br
+	# above and the label below get executed.  See execute_cfa_program() in
+	# the Gcc source code, libgcc/unwind-dw2.c.
+	nopr
 
 .LretFloat:
 	l	%r4,48+56(%r11)
 	ste	%f0,0(%r8)			# Return float
 	lm	%r6,%r15,48+24(%r11)
+	.cfi_remember_state
+	.cfi_restore 15
+	.cfi_restore 14
+	.cfi_restore 13
+	.cfi_restore 12
+	.cfi_restore 11
+	.cfi_restore 10
+	.cfi_restore 9
+	.cfi_restore 8
+	.cfi_restore 7
+	.cfi_restore 6
+	.cfi_def_cfa r15, 96
 	br	%r4
+	.cfi_restore_state
+	# See comment on the nopr above.
+	nopr
  
 .LretDouble:
 	l	%r4,48+56(%r11)
 	std	%f0,0(%r8)			# Return double
 	lm	%r6,%r15,48+24(%r11)
+	.cfi_remember_state
+	.cfi_restore 15
+	.cfi_restore 14
+	.cfi_restore 13
+	.cfi_restore 12
+	.cfi_restore 11
+	.cfi_restore 10
+	.cfi_restore 9
+	.cfi_restore 8
+	.cfi_restore 7
+	.cfi_restore 6
+	.cfi_def_cfa r15, 96
 	br	%r4
+	.cfi_restore_state
+	# See comment on the nopr above.
+	nopr
 
 .LretInt32:
 	l	%r4,48+56(%r11)
 	st	%r2,0(%r8)			# Return int
 	lm	%r6,%r15,48+24(%r11)
+	.cfi_remember_state
+	.cfi_restore 15
+	.cfi_restore 14
+	.cfi_restore 13
+	.cfi_restore 12
+	.cfi_restore 11
+	.cfi_restore 10
+	.cfi_restore 9
+	.cfi_restore 8
+	.cfi_restore 7
+	.cfi_restore 6
+	.cfi_def_cfa r15, 96
 	br	%r4
+	.cfi_restore_state
+	# See comment on the nopr above.
+	nopr
  
 .LretInt64:
 	l	%r4,48+56(%r11)
 	stm	%r2,%r3,0(%r8)			# Return long long
 	lm	%r6,%r15,48+24(%r11)
+	.cfi_remember_state
+	.cfi_restore 15
+	.cfi_restore 14
+	.cfi_restore 13
+	.cfi_restore 12
+	.cfi_restore 11
+	.cfi_restore 10
+	.cfi_restore 9
+	.cfi_restore 8
+	.cfi_restore 7
+	.cfi_restore 6
+	.cfi_def_cfa r15, 96
 	br	%r4
+	.cfi_endproc
  
 .Ltable:
 	.byte	.LretNone-.Lbase		# FFI390_RET_VOID
@@ -108,129 +195,78 @@ ffi_call_SYSV:
 	.byte	.LretInt32-.Lbase		# FFI390_RET_INT32
 	.byte	.LretInt64-.Lbase		# FFI390_RET_INT64
 
-.LFE1: 
 .ffi_call_SYSV_end:
 	.size	 ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
 
 
 	.globl	ffi_closure_SYSV
+	FFI_HIDDEN(ffi_closure_SYSV)
 	.type	ffi_closure_SYSV,%function
 ffi_closure_SYSV:
-.LFB2:
+	.cfi_startproc
+	stm	%r2,%r6,8(%r15)			# Save arguments
+	.cfi_offset r6, -72
+	lr	%r4,%r0				# Closure
+	l	%r2,16(%r4)			#   ->cif
+	l	%r3,20(%r4)			#   ->fun
+	l	%r4,24(%r4)			#   ->user_data
+.Ldoclosure:
 	stm	%r12,%r15,48(%r15)		# Save registers
-.LCFI10:
+	.cfi_offset r12, -48
+	.cfi_offset r13, -44
+	.cfi_offset r14, -40
+	.cfi_offset r15, -36
 	basr	%r13,0				# Set up base register
 .Lcbase:
-	stm	%r2,%r6,8(%r15)			# Save arguments
 	std	%f0,64(%r15)
 	std	%f2,72(%r15)
 	lr	%r1,%r15			# Set up stack frame
-	ahi	%r15,-96
-.LCFI11:
+	ahi	%r15,-104
+	.cfi_adjust_cfa_offset 104
 	l	%r12,.Lchelper-.Lcbase(%r13)	# Get helper function
-	lr	%r2,%r0				# Closure
-	la	%r3,8(%r1)			# GPRs
-	la	%r4,64(%r1)			# FPRs
-	la	%r5,96(%r1)			# Overflow
+	la	%r5,96(%r1)
+	st	%r5,96(%r15)			# Overflow
+	la	%r5,8(%r1)			# GPRs
+	la	%r6,64(%r1)			# FPRs
 	st	%r1,0(%r15)			# Set up back chain
 
 	bas	%r14,0(%r12,%r13)		# Call helper
 
-	l	%r4,96+56(%r15)
-	ld	%f0,96+64(%r15)			# Load return registers
-	lm	%r2,%r3,96+8(%r15)
-	lm	%r12,%r15,96+48(%r15)
+	l	%r4,104+56(%r15)
+	ld	%f0,104+64(%r15)		# Load return registers
+	lm	%r2,%r3,104+8(%r15)
+	l	%r6,104+24(%r15)		# Restore saved registers
+	.cfi_restore r6
+	lm	%r12,%r15,104+48(%r15)
+	.cfi_adjust_cfa_offset -104
+	.cfi_restore r12
+	.cfi_restore r13
+	.cfi_restore r14
+	.cfi_restore r15
 	br	%r4
+	.cfi_endproc
 
 	.align 4
 .Lchelper:
 	.long	ffi_closure_helper_SYSV-.Lcbase
 
-.LFE2: 
 
 .ffi_closure_SYSV_end:
 	.size	 ffi_closure_SYSV,.ffi_closure_SYSV_end-ffi_closure_SYSV
 
 
-	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
-.Lframe1:
-	.4byte	.LECIE1-.LSCIE1	# Length of Common Information Entry
-.LSCIE1:
-	.4byte	0x0	# CIE Identifier Tag
-	.byte	0x1	# CIE Version
-	.ascii "zR\0"	# CIE Augmentation
-	.uleb128 0x1	# CIE Code Alignment Factor
-	.sleb128 -4	# CIE Data Alignment Factor
-	.byte	0xe	# CIE RA Column
-	.uleb128 0x1	# Augmentation size
-	.byte	0x1b	# FDE Encoding (pcrel sdata4)
-	.byte	0xc	# DW_CFA_def_cfa
-	.uleb128 0xf
-	.uleb128 0x60
-	.align	4
-.LECIE1:
-.LSFDE1:
-	.4byte	.LEFDE1-.LASFDE1	# FDE Length
-.LASFDE1:
-	.4byte	.LASFDE1-.Lframe1	# FDE CIE offset
-	.4byte	.LFB1-.	# FDE initial location
-	.4byte	.LFE1-.LFB1	# FDE address range
-	.uleb128 0x0	# Augmentation size
-	.byte	0x4	# DW_CFA_advance_loc4
-	.4byte	.LCFI0-.LFB1
-	.byte	0x8f	# DW_CFA_offset, column 0xf
-	.uleb128 0x9
-	.byte	0x8e	# DW_CFA_offset, column 0xe
-	.uleb128 0xa
-	.byte	0x8d	# DW_CFA_offset, column 0xd
-	.uleb128 0xb
-	.byte	0x8c	# DW_CFA_offset, column 0xc
-	.uleb128 0xc
-	.byte	0x8b	# DW_CFA_offset, column 0xb
-	.uleb128 0xd
-	.byte	0x8a	# DW_CFA_offset, column 0xa
-	.uleb128 0xe
-	.byte	0x89	# DW_CFA_offset, column 0x9
-	.uleb128 0xf
-	.byte	0x88	# DW_CFA_offset, column 0x8
-	.uleb128 0x10
-	.byte	0x87	# DW_CFA_offset, column 0x7
-	.uleb128 0x11
-	.byte	0x86	# DW_CFA_offset, column 0x6
-	.uleb128 0x12
-	.byte	0x4	# DW_CFA_advance_loc4
-	.4byte	.LCFI1-.LCFI0
-	.byte	0xd	# DW_CFA_def_cfa_register
-	.uleb128 0xb
-	.byte	0x4	# DW_CFA_advance_loc4
-	.4byte	.LCFI2-.LCFI1
-	.byte	0xe	# DW_CFA_def_cfa_offset
-	.uleb128 0x90
-	.align	4
-.LEFDE1:
-.LSFDE2:
-	.4byte	.LEFDE2-.LASFDE2	# FDE Length
-.LASFDE2:
-	.4byte	.LASFDE2-.Lframe1	# FDE CIE offset
-	.4byte	.LFB2-.	# FDE initial location
-	.4byte	.LFE2-.LFB2	# FDE address range
-	.uleb128 0x0	# Augmentation size
-	.byte	0x4	# DW_CFA_advance_loc4
-	.4byte	.LCFI10-.LFB2
-	.byte	0x8f	# DW_CFA_offset, column 0xf
-	.uleb128 0x9
-	.byte	0x8e	# DW_CFA_offset, column 0xe
-	.uleb128 0xa
-	.byte	0x8d	# DW_CFA_offset, column 0xd
-	.uleb128 0xb
-	.byte	0x8c	# DW_CFA_offset, column 0xc
-	.uleb128 0xc
-	.byte	0x4	# DW_CFA_advance_loc4
-	.4byte	.LCFI11-.LCFI10
-	.byte	0xe	# DW_CFA_def_cfa_offset
-	.uleb128 0xc0
-	.align	4
-.LEFDE2:
+	.globl	ffi_go_closure_SYSV
+	FFI_HIDDEN(ffi_go_closure_SYSV)
+	.type	ffi_go_closure_SYSV,%function
+ffi_go_closure_SYSV:
+	.cfi_startproc
+	stm	%r2,%r6,8(%r15)			# Save arguments
+	.cfi_offset r6, -72
+	lr	%r4,%r0				# Load closure -> user_data
+	l	%r2,4(%r4)			#   ->cif
+	l	%r3,8(%r4)			#   ->fun
+	j	.Ldoclosure
+	.cfi_endproc
 
 #else
  
@@ -242,17 +278,28 @@ ffi_closure_SYSV:
 	# r5:	ret_type
 	# r6:	ecif.rvalue
 	# ov:	fn 
- 
+	# ov+8:	closure
+
 	# This assumes we are using gas.
 	.globl	ffi_call_SYSV
+	FFI_HIDDEN(ffi_call_SYSV)
 	.type	ffi_call_SYSV,%function
 ffi_call_SYSV:
-.LFB1:
+	.cfi_startproc
 	stmg	%r6,%r15,48(%r15)		# Save registers
-.LCFI0:
+	.cfi_offset r6, -112
+	.cfi_offset r7, -104
+	.cfi_offset r8, -96
+	.cfi_offset r9, -88
+	.cfi_offset r10, -80
+	.cfi_offset r11, -72
+	.cfi_offset r12, -64
+	.cfi_offset r13, -56
+	.cfi_offset r14, -48
+	.cfi_offset r15, -40
 	larl	%r13,.Lbase			# Set up base register
 	lgr	%r11,%r15			# Set up frame pointer
-.LCFI1:
+	.cfi_def_cfa_register r11
 	sgr	%r15,%r2
 	aghi	%r15,-160-80			# Allocate stack
 	lgr	%r8,%r6				# Save ecif.rvalue
@@ -260,12 +307,13 @@ ffi_call_SYSV:
 	lg	%r7,160(%r11)			# Load function address
 	stg	%r11,0(%r15)			# Set up back chain
 	aghi	%r11,-80			# Register save area
-.LCFI2:
+	.cfi_adjust_cfa_offset 80
 
 	la	%r2,160(%r15)			# Save area
 						# r3 already holds &ecif
 	basr	%r14,%r4			# Call ffi_prep_args
 
+	lg	%r0,160+80+8(%r11)		# Go closure -> static chain
 	lmg	%r2,%r6,0(%r11)			# Load arguments
 	ld	%f0,48(%r11)
 	ld	%f2,56(%r11)
@@ -278,154 +326,157 @@ ffi_call_SYSV:
 .LretNone:					# Return void
 	lg	%r4,80+112(%r11)
 	lmg	%r6,%r15,80+48(%r11)
+	.cfi_remember_state
+	.cfi_restore r15
+	.cfi_restore r14
+	.cfi_restore r13
+	.cfi_restore r12
+	.cfi_restore r11
+	.cfi_restore r10
+	.cfi_restore r9
+	.cfi_restore r8
+	.cfi_restore r7
+	.cfi_restore r6
+	.cfi_def_cfa r15, 160
 	br	%r4
+	.cfi_restore_state
+	# This nopr is necessary so that the .cfi instructions between the br
+	# above and the label below get executed.  See execute_cfa_program() in
+	# the Gcc source code, libgcc/unwind-dw2.c.
+	nopr
 
 .LretFloat:
 	lg	%r4,80+112(%r11)
 	ste	%f0,0(%r8)			# Return float
 	lmg	%r6,%r15,80+48(%r11)
+	.cfi_remember_state
+	.cfi_restore r6
+	.cfi_restore r7
+	.cfi_restore r8
+	.cfi_restore r9
+	.cfi_restore r10
+	.cfi_restore r11
+	.cfi_restore r12
+	.cfi_restore r13
+	.cfi_restore r14
+	.cfi_restore r15
+	.cfi_def_cfa r15, 160
 	br	%r4
+	.cfi_restore_state
+	# See comment on the nopr above.
+	nopr
  
 .LretDouble:
 	lg	%r4,80+112(%r11)
 	std	%f0,0(%r8)			# Return double
 	lmg	%r6,%r15,80+48(%r11)
+	.cfi_remember_state
+	.cfi_restore r15
+	.cfi_restore r14
+	.cfi_restore r13
+	.cfi_restore r12
+	.cfi_restore r11
+	.cfi_restore r10
+	.cfi_restore r9
+	.cfi_restore r8
+	.cfi_restore r7
+	.cfi_restore r6
+	.cfi_def_cfa r15, 160
 	br	%r4
+	.cfi_restore_state
+	# See comment on the nopr above.
+	nopr
 
-.LretInt32:
-	lg	%r4,80+112(%r11)
-	st	%r2,0(%r8)			# Return int
-	lmg	%r6,%r15,80+48(%r11)
-	br	%r4
- 
 .LretInt64:
 	lg	%r4,80+112(%r11)
 	stg	%r2,0(%r8)			# Return long
 	lmg	%r6,%r15,80+48(%r11)
+	.cfi_restore r15
+	.cfi_restore r14
+	.cfi_restore r13
+	.cfi_restore r12
+	.cfi_restore r11
+	.cfi_restore r10
+	.cfi_restore r9
+	.cfi_restore r8
+	.cfi_restore r7
+	.cfi_restore r6
+	.cfi_def_cfa r15, 160
 	br	%r4
+	.cfi_endproc
  
 .Ltable:
 	.byte	.LretNone-.Lbase		# FFI390_RET_VOID
 	.byte	.LretNone-.Lbase		# FFI390_RET_STRUCT
 	.byte	.LretFloat-.Lbase		# FFI390_RET_FLOAT
 	.byte	.LretDouble-.Lbase		# FFI390_RET_DOUBLE
-	.byte	.LretInt32-.Lbase		# FFI390_RET_INT32
+	.byte	0				# int32 retval not supported
 	.byte	.LretInt64-.Lbase		# FFI390_RET_INT64
 
-.LFE1: 
 .ffi_call_SYSV_end:
 	.size	 ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
 
 
 	.globl	ffi_closure_SYSV
+	FFI_HIDDEN(ffi_closure_SYSV)
 	.type	ffi_closure_SYSV,%function
 ffi_closure_SYSV:
-.LFB2:
-	stmg	%r14,%r15,112(%r15)		# Save registers
-.LCFI10:
+	.cfi_startproc
 	stmg	%r2,%r6,16(%r15)		# Save arguments
-	std	%f0,128(%r15)
+	.cfi_offset r6, -112
+	lgr	%r4,%r0				# Load closure
+	lg	%r2,32(%r4)			#   ->cif
+	lg	%r3,40(%r4)			#   ->fun
+	lg	%r4,48(%r4)			#   ->user_data
+.Ldoclosure:
+	stmg	%r14,%r15,112(%r15)		# Save registers
+	.cfi_offset r14, -48
+	.cfi_offset r15, -40
+	std	%f0,128(%r15)			# Save arguments
 	std	%f2,136(%r15)
 	std	%f4,144(%r15)
 	std	%f6,152(%r15)
 	lgr	%r1,%r15			# Set up stack frame
-	aghi	%r15,-160
-.LCFI11:
-	lgr	%r2,%r0				# Closure
-	la	%r3,16(%r1)			# GPRs
-	la	%r4,128(%r1)			# FPRs
-	la	%r5,160(%r1)			# Overflow
+	aghi	%r15,-168
+	.cfi_adjust_cfa_offset 168
+	la	%r5,160(%r1)
+	stg	%r5,160(%r15)			# Overflow
+	la	%r5,16(%r1)			# GPRs
+	la	%r6,128(%r1)			# FPRs
 	stg	%r1,0(%r15)			# Set up back chain
 
 	brasl	%r14,ffi_closure_helper_SYSV	# Call helper
 
-	lg	%r14,160+112(%r15)
-	ld	%f0,160+128(%r15)		# Load return registers
-	lg	%r2,160+16(%r15)
-	la	%r15,160(%r15)
+	ld	%f0,168+128(%r15)		# Load return registers
+	lg	%r2,168+16(%r15)
+	lg	%r6,168+48(%r15)		# Restore saved registers
+	.cfi_restore r6
+	lmg	%r14,%r15,168+112(%r15)
+	.cfi_restore r14
+	.cfi_restore r15
+	.cfi_adjust_cfa_offset -168
 	br	%r14
-.LFE2: 
+	.cfi_endproc
 
 .ffi_closure_SYSV_end:
 	.size	 ffi_closure_SYSV,.ffi_closure_SYSV_end-ffi_closure_SYSV
 
-
-
-	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
-.Lframe1:
-	.4byte	.LECIE1-.LSCIE1	# Length of Common Information Entry
-.LSCIE1:
-	.4byte	0x0	# CIE Identifier Tag
-	.byte	0x1	# CIE Version
-	.ascii "zR\0"	# CIE Augmentation
-	.uleb128 0x1	# CIE Code Alignment Factor
-	.sleb128 -8	# CIE Data Alignment Factor
-	.byte	0xe	# CIE RA Column
-	.uleb128 0x1	# Augmentation size
-	.byte	0x1b	# FDE Encoding (pcrel sdata4)
-	.byte	0xc	# DW_CFA_def_cfa
-	.uleb128 0xf
-	.uleb128 0xa0
-	.align	8
-.LECIE1:
-.LSFDE1:
-	.4byte	.LEFDE1-.LASFDE1	# FDE Length
-.LASFDE1:
-	.4byte	.LASFDE1-.Lframe1	# FDE CIE offset
-	.4byte	.LFB1-.	# FDE initial location
-	.4byte	.LFE1-.LFB1	# FDE address range
-	.uleb128 0x0	# Augmentation size
-	.byte	0x4	# DW_CFA_advance_loc4
-	.4byte	.LCFI0-.LFB1
-	.byte	0x8f	# DW_CFA_offset, column 0xf
-	.uleb128 0x5
-	.byte	0x8e	# DW_CFA_offset, column 0xe
-	.uleb128 0x6
-	.byte	0x8d	# DW_CFA_offset, column 0xd
-	.uleb128 0x7
-	.byte	0x8c	# DW_CFA_offset, column 0xc
-	.uleb128 0x8
-	.byte	0x8b	# DW_CFA_offset, column 0xb
-	.uleb128 0x9
-	.byte	0x8a	# DW_CFA_offset, column 0xa
-	.uleb128 0xa
-	.byte	0x89	# DW_CFA_offset, column 0x9
-	.uleb128 0xb
-	.byte	0x88	# DW_CFA_offset, column 0x8
-	.uleb128 0xc
-	.byte	0x87	# DW_CFA_offset, column 0x7
-	.uleb128 0xd
-	.byte	0x86	# DW_CFA_offset, column 0x6
-	.uleb128 0xe
-	.byte	0x4	# DW_CFA_advance_loc4
-	.4byte	.LCFI1-.LCFI0
-	.byte	0xd	# DW_CFA_def_cfa_register
-	.uleb128 0xb
-	.byte	0x4	# DW_CFA_advance_loc4
-	.4byte	.LCFI2-.LCFI1
-	.byte	0xe	# DW_CFA_def_cfa_offset
-	.uleb128 0xf0
-	.align	8
-.LEFDE1:
-.LSFDE2:
-	.4byte	.LEFDE2-.LASFDE2	# FDE Length
-.LASFDE2:
-	.4byte	.LASFDE2-.Lframe1	# FDE CIE offset
-	.4byte	.LFB2-.	# FDE initial location
-	.4byte	.LFE2-.LFB2	# FDE address range
-	.uleb128 0x0	# Augmentation size
-	.byte	0x4	# DW_CFA_advance_loc4
-	.4byte	.LCFI10-.LFB2
-	.byte	0x8f	# DW_CFA_offset, column 0xf
-	.uleb128 0x5
-	.byte	0x8e	# DW_CFA_offset, column 0xe
-	.uleb128 0x6
-	.byte	0x4	# DW_CFA_advance_loc4
-	.4byte	.LCFI11-.LCFI10
-	.byte	0xe	# DW_CFA_def_cfa_offset
-	.uleb128 0x140
-	.align	8
-.LEFDE2:
+	
+	.globl	ffi_go_closure_SYSV
+	FFI_HIDDEN(ffi_go_closure_SYSV)
+	.type	ffi_go_closure_SYSV,%function
+ffi_go_closure_SYSV:
+	.cfi_startproc
+	stmg	%r2,%r6,16(%r15)		# Save arguments
+	.cfi_offset r6, -112
+	lgr	%r4,%r0				# Load closure -> user_data
+	lg	%r2,8(%r4)			#   ->cif
+	lg	%r3,16(%r4)			#   ->fun
+	j	.Ldoclosure
+	.cfi_endproc
+
+.ffi_go_closure_SYSV_end:
+	.size	 ffi_go_closure_SYSV,.ffi_go_closure_SYSV_end-ffi_go_closure_SYSV
 
 #endif
 
-- 
1.8.4.2


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Go closures for s390[x]
  2014-12-16 15:05 [PATCH] Go closures for s390[x] Dominik Vogt
@ 2014-12-18 21:29 ` Richard Henderson
  2014-12-19 13:13   ` Ulrich Weigand
  2014-12-22 10:27   ` Dominik Vogt
  0 siblings, 2 replies; 7+ messages in thread
From: Richard Henderson @ 2014-12-18 21:29 UTC (permalink / raw)
  To: libffi-discuss, Ulrich Weigand, Andreas Krebbel

On 12/16/2014 09:05 AM, Dominik Vogt wrote:
>  	lm	%r6,%r15,48+24(%r11)
> +	.cfi_remember_state
> +	.cfi_restore 15
> +	.cfi_restore 14
> +	.cfi_restore 13
> +	.cfi_restore 12
> +	.cfi_restore 11
> +	.cfi_restore 10
> +	.cfi_restore 9
> +	.cfi_restore 8
> +	.cfi_restore 7
> +	.cfi_restore 6
> +	.cfi_def_cfa r15, 96
>  	br	%r4
> +	.cfi_restore_state
> +	# This nopr is necessary so that the .cfi instructions between the br
> +	# above and the label below get executed.  See execute_cfa_program() in
> +	# the Gcc source code, libgcc/unwind-dw2.c.
> +	nopr

I'm not really sure you need any of these restores, since the data is still on
the stack, unclobbered.  I think you'd really only need to reset the cfa here.

The nopr seems like a red herring.  I don't see why the < vs <= for
execute_cfa_program is relevant -- this is not following a call.

That said, I've got some followup patches to clean up more of the s390 backend
that I'll post in a moment.



r~

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Go closures for s390[x]
  2014-12-18 21:29 ` Richard Henderson
@ 2014-12-19 13:13   ` Ulrich Weigand
  2014-12-19 14:10     ` Richard Henderson
  2014-12-22 10:27   ` Dominik Vogt
  1 sibling, 1 reply; 7+ messages in thread
From: Ulrich Weigand @ 2014-12-19 13:13 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libffi-discuss, Ulrich Weigand, Andreas Krebbel

Richard Henderson wrote:
> On 12/16/2014 09:05 AM, Dominik Vogt wrote:
> >  	lm	%r6,%r15,48+24(%r11)
> > +	.cfi_remember_state
> > +	.cfi_restore 15
> > +	.cfi_restore 14
> > +	.cfi_restore 13
> > +	.cfi_restore 12
> > +	.cfi_restore 11
> > +	.cfi_restore 10
> > +	.cfi_restore 9
> > +	.cfi_restore 8
> > +	.cfi_restore 7
> > +	.cfi_restore 6
> > +	.cfi_def_cfa r15, 96
> >  	br	%r4
> > +	.cfi_restore_state
> > +	# This nopr is necessary so that the .cfi instructions between the br
> > +	# above and the label below get executed.  See execute_cfa_program() in
> > +	# the Gcc source code, libgcc/unwind-dw2.c.
> > +	nopr
> 
> I'm not really sure you need any of these restores, since the data is still on
> the stack, unclobbered.  I think you'd really only need to reset the cfa here.

Ah, right.  The GPRs are saved in the 96/160-byte bias area, which is not
clobbered even by signal handlers.  (I guess GCC could also do that same
optimization when creating CFI for epilogues ...)

It looks like we do need the restore of r15, though, otherwise the CFA
compuation will be wrong.

> The nopr seems like a red herring.  I don't see why the < vs <= for
> execute_cfa_program is relevant -- this is not following a call.

Actually, it sort-of is; the code does:
        la      %r14,0(%r13,%r9)                # Set return address
        br      %r7                             # ... and call function

i.e. sets the return address register to point to one of the return stubs
and then jumps to the target function instead of calling it; so from the
point of view of an unwinder, it looks like the target function was called
from the instruction immediately preceding the return stub.

Bye,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU/Linux compilers and toolchain
  Ulrich.Weigand@de.ibm.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Go closures for s390[x]
  2014-12-19 13:13   ` Ulrich Weigand
@ 2014-12-19 14:10     ` Richard Henderson
  2014-12-19 14:48       ` Ulrich Weigand
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Henderson @ 2014-12-19 14:10 UTC (permalink / raw)
  To: Ulrich Weigand; +Cc: libffi-discuss, Ulrich Weigand, Andreas Krebbel

On 12/19/2014 07:13 AM, Ulrich Weigand wrote:
> Actually, it sort-of is; the code does:
>         la      %r14,0(%r13,%r9)                # Set return address
>         br      %r7                             # ... and call function
> 
> i.e. sets the return address register to point to one of the return stubs
> and then jumps to the target function instead of calling it; so from the
> point of view of an unwinder, it looks like the target function was called
> from the instruction immediately preceding the return stub.

Ah, good point.  That's the sort of verbage that should be in the comment then.

Is that optimization really worth it?  Is there no call/return prediction stack
to get confused?  I know I replicated it in the code that I wrote, but really
only now do I start to question it.


r~

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Go closures for s390[x]
  2014-12-19 14:10     ` Richard Henderson
@ 2014-12-19 14:48       ` Ulrich Weigand
  0 siblings, 0 replies; 7+ messages in thread
From: Ulrich Weigand @ 2014-12-19 14:48 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libffi-discuss, Ulrich Weigand, Andreas Krebbel

Richard Henderson wrote:
> On 12/19/2014 07:13 AM, Ulrich Weigand wrote:
> > Actually, it sort-of is; the code does:
> >         la      %r14,0(%r13,%r9)                # Set return address
> >         br      %r7                             # ... and call function
> > 
> > i.e. sets the return address register to point to one of the return stubs
> > and then jumps to the target function instead of calling it; so from the
> > point of view of an unwinder, it looks like the target function was called
> > from the instruction immediately preceding the return stub.
> 
> Ah, good point.  That's the sort of verbage that should be in the comment then.
> 
> Is that optimization really worth it?  Is there no call/return prediction stack
> to get confused?  I know I replicated it in the code that I wrote, but really
> only now do I start to question it.

There's no call/return stack as such on current processors; we don't have
hard-coded call/return instructions, and the various OSes on the platform
use registers in quite different ways as part of their calling conventions.

However, I agree that in general it's probably best to avoid tricks like that.


-- 
  Dr. Ulrich Weigand
  GNU/Linux compilers and toolchain
  Ulrich.Weigand@de.ibm.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Go closures for s390[x]
  2014-12-18 21:29 ` Richard Henderson
  2014-12-19 13:13   ` Ulrich Weigand
@ 2014-12-22 10:27   ` Dominik Vogt
  2014-12-22 16:31     ` Richard Henderson
  1 sibling, 1 reply; 7+ messages in thread
From: Dominik Vogt @ 2014-12-22 10:27 UTC (permalink / raw)
  To: libffi-discuss

On Thu, Dec 18, 2014 at 03:29:01PM -0600, Richard Henderson wrote:
> On 12/16/2014 09:05 AM, Dominik Vogt wrote:
> >  	lm	%r6,%r15,48+24(%r11)
> > +	.cfi_remember_state
> > +	.cfi_restore 15
> > +	.cfi_restore 14
> > +	.cfi_restore 13
> > +	.cfi_restore 12
> > +	.cfi_restore 11
> > +	.cfi_restore 10
> > +	.cfi_restore 9
> > +	.cfi_restore 8
> > +	.cfi_restore 7
> > +	.cfi_restore 6
> > +	.cfi_def_cfa r15, 96
> >  	br	%r4
> > +	.cfi_restore_state
> 
> I'm not really sure you need any of these restores, since the data is still on
> the stack, unclobbered.  I think you'd really only need to reset the cfa here.

I thought about that but left the cif-restores in the code for
documentation purposes.  Actually its probably better to remove
them as they cost performance if they get executed.

Sould I make a new patch, or is that something you wanted to post
as part of your cleanup?

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Go closures for s390[x]
  2014-12-22 10:27   ` Dominik Vogt
@ 2014-12-22 16:31     ` Richard Henderson
  0 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2014-12-22 16:31 UTC (permalink / raw)
  To: libffi-discuss

On 12/22/2014 02:27 AM, Dominik Vogt wrote:
> Sould I make a new patch, or is that something you wanted to post
> as part of your cleanup?

Done as part of the cleanup.


r~

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-12-22 16:31 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-16 15:05 [PATCH] Go closures for s390[x] Dominik Vogt
2014-12-18 21:29 ` Richard Henderson
2014-12-19 13:13   ` Ulrich Weigand
2014-12-19 14:10     ` Richard Henderson
2014-12-19 14:48       ` Ulrich Weigand
2014-12-22 10:27   ` Dominik Vogt
2014-12-22 16:31     ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).