public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 03/03] PowerPC: abort transaction in syscalls
       [not found] <53FB3642.6010302@linux.vnet.ibm.com>
  2014-08-25 13:36 ` [PATCH 02/03] PowerPC: Add adaptive elision to rwlocks Adhemerval Zanella
@ 2014-08-25 13:36 ` Adhemerval Zanella
  1 sibling, 0 replies; 2+ messages in thread
From: Adhemerval Zanella @ 2014-08-25 13:36 UTC (permalink / raw)
  To: GNU C. Library

Linux kernel powerpc documentation [1] states issuing a syscall inside a
transaction is not recommended and may lead to undefined behavior. It
also states syscalls do not abort transactions neither they run in
transactional state.  This may lead to side-effects begin visible outside
transactions, specially when related to 'futex' syscalls.

I see constantly deadlock issues in testcases that might trigger a futex
sycalls inside a transaction.  Although they are related to timing issue, both
pthread_cond and pthread_barrier are very susceptible to such issue.  To avoid
side-effects being visible outside transactions, GLIBC powerpc with lock elision
enable issue a transaction abort instruction just before any syscall.

I have added a new TCB fields named tm_capable that is initially set to 1
if system supports hardware transaction memory (it is set from hwcap2 field).
If GLIBC is built with lock elision support, every syscall made in libc.so
and other libraries (except the loader itself) will check the field value
and abort the transaction if the field is set to 1. I have chose to use a
TCB field instead to rely on a accessing a global variable (hwcap2 for instance)
for some reasons:

1. The TCB field logic would be simple to implement, it avoid the requirement of
   creating a TOC field in every syscall to access a global variable and also
   simplifies the implementation of syscall wrappers in assembly.

2. The TCB field simplifies syscalls wrapper done by other libraries (there is no
   need to export private symbols).

3. It also provides a much more simpler way in the future to specific that a thread
   should not abort on syscalls (by setting its TCB tm_capable field to 0).

4. There is some internal discussions about enabling transaction abort in syscalls
   for powerpc within kernel.  This adds an early abort, potentially improving
   performance by avoid the syscall call (since the transaction would be aborted 
   anyway).

The performance implications on syscalls for such modification are negligible (
around 3-5 cycles in POWER8).


[1] https://www.kernel.org/doc/Documentation/powerpc/transactional_memory.txt

--

	* sysdeps/powerpc/nptl/tls.h (tcbhead_t): Add tm_capable field.
	(TLS_INIT_TP): Add tm_capable initialization.
	(TLS_DEFINE_INIT_TP): Likewise.
	(THREAD_GET_TM_CAPABLE): New file: get tm_capable field value from
	TCB.
	(THREAD_SET_TM_CAPABLE): New file: set tm_capable field value in TCB.
	* sysdeps/powerpc/nptl/tcb-offsets.sym (TM_CAPABLE): Add field offset
	calculation.
	* sysdeps/powerpc/powerpc32/sysdep.h (DO_CALL): Abort hardware
	transactoion is lock elision is built and TCB tm_capable is set.
	* sysdeps/powerpc/powerpc64/sysdep.h (DO_CALL): Likewise.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
	(INTERNAL_SYSCALL_NCS): Likewise.
	* sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
	(INTERNAL_SYSCALL_NCS): Likewise.
	* sysdeps/powerpc/sysdep.h (ABORT_TRANSACTION): New define.

---

diff --git a/sysdeps/powerpc/nptl/tcb-offsets.sym b/sysdeps/powerpc/nptl/tcb-offsets.sym
index f996759..d955142 100644
--- a/sysdeps/powerpc/nptl/tcb-offsets.sym
+++ b/sysdeps/powerpc/nptl/tcb-offsets.sym
@@ -19,6 +19,7 @@ POINTER_GUARD			(offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (
 TAR_SAVE			(offsetof (tcbhead_t, tar_save) - TLS_TCB_OFFSET - sizeof (tcbhead_t))
 DSO_SLOT1			(offsetof (tcbhead_t, dso_slot1) - TLS_TCB_OFFSET - sizeof (tcbhead_t))
 DSO_SLOT2			(offsetof (tcbhead_t, dso_slot2) - TLS_TCB_OFFSET - sizeof (tcbhead_t))
+TM_CAPABLE			(offsetof (tcbhead_t, tm_capable) - TLS_TCB_OFFSET - sizeof (tcbhead_t))
 #ifndef __ASSUME_PRIVATE_FUTEX
 PRIVATE_FUTEX_OFFSET		thread_offsetof (header.private_futex)
 #endif
diff --git a/sysdeps/powerpc/nptl/tls.h b/sysdeps/powerpc/nptl/tls.h
index b80a5fb..37280d2 100644
--- a/sysdeps/powerpc/nptl/tls.h
+++ b/sysdeps/powerpc/nptl/tls.h
@@ -63,6 +63,8 @@ typedef union dtv
    are private.  */
 typedef struct
 {
+  /* Indicate if hwcap2 has PPC_FEATURE2_HAS_HTM capability.  */
+  int tm_capable;
   /* Reservation for Dynamic System Optimizer ABI.  */
   uintptr_t dso_slot2;
   uintptr_t dso_slot1;
@@ -130,11 +132,17 @@ register void *__thread_register __asm__ ("r13");
    special attention since 'errno' is not yet available and if the
    operation can cause a failure 'errno' must not be touched.  */
 # define TLS_INIT_TP(tcbp) \
-    (__thread_register = (void *) (tcbp) + TLS_TCB_OFFSET, NULL)
+  ({ 									      \
+    __thread_register = (void *) (tcbp) + TLS_TCB_OFFSET;		      \
+    THREAD_SET_TM_CAPABLE (GLRO (dl_hwcap2) & PPC_FEATURE2_HAS_HTM ? 1 : 0);  \
+    NULL;								      \
+  })
 
 /* Value passed to 'clone' for initialization of the thread register.  */
 # define TLS_DEFINE_INIT_TP(tp, pd) \
-  void *tp = (void *) (pd) + TLS_TCB_OFFSET + TLS_PRE_TCB_SIZE
+    void *tp = (void *) (pd) + TLS_TCB_OFFSET + TLS_PRE_TCB_SIZE;	      \
+    (((tcbhead_t *) ((char *) tp - TLS_TCB_OFFSET))[-1].tm_capable) =	      \
+      THREAD_GET_TM_CAPABLE ();
 
 /* Return the address of the dtv for the current thread.  */
 # define THREAD_DTV() \
@@ -188,6 +196,13 @@ register void *__thread_register __asm__ ("r13");
 		     + TLS_PRE_TCB_SIZE))[-1].pointer_guard		      \
      = THREAD_GET_POINTER_GUARD())
 
+/* tm_capable field in TCB head.  */
+# define THREAD_GET_TM_CAPABLE() \
+    (((tcbhead_t *) ((char *) __thread_register				      \
+		     - TLS_TCB_OFFSET))[-1].tm_capable)
+# define THREAD_SET_TM_CAPABLE(value) \
+    (THREAD_GET_TM_CAPABLE () = (value))
+
 /* l_tls_offset == 0 is perfectly valid on PPC, so we have to use some
    different value to mean unset l_tls_offset.  */
 # define NO_TLS_OFFSET		-1
diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h
index c8a56aa..c4b3ca8 100644
--- a/sysdeps/powerpc/powerpc32/sysdep.h
+++ b/sysdeps/powerpc/powerpc32/sysdep.h
@@ -88,7 +88,23 @@ GOT_LABEL:			;					      \
   cfi_endproc;								      \
   ASM_SIZE_DIRECTIVE(name)
 
+#if !defined IS_IN_rtld && defined (ENABLE_LOCK_ELISION)
+# define ABORT_TRANSACTION \
+    cmpwi    2,0;		\
+    beq      1f;		\
+    lwz      0,TM_CAPABLE(2);	\
+    cmpwi    0,0;		\
+    beq	     1f;		\
+    li	     0,_ABORT_SYSCALL;	\
+    tabort.  0;			\
+    .align 4;			\
+1:
+#else
+# define ABORT_TRANSACTION
+#endif
+
 #define DO_CALL(syscall)						      \
+    ABORT_TRANSACTION							      \
     li 0,syscall;							      \
     sc
 
diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h
index b28fb9d..78722c6 100644
--- a/sysdeps/powerpc/powerpc64/sysdep.h
+++ b/sysdeps/powerpc/powerpc64/sysdep.h
@@ -283,7 +283,23 @@ LT_LABELSUFFIX(name,_name_end): ; \
   TRACEBACK_MASK(name,mask)	\
   END_2(name)
 
+#if !defined IS_IN_rtld && defined (ENABLE_LOCK_ELISION)
+# define ABORT_TRANSACTION \
+    cmpdi    13,0;		\
+    beq      1f;		\
+    lwz      0,TM_CAPABLE(13);	\
+    cmpwi    0,0;		\
+    beq	     1f;		\
+    li	     0,_ABORT_SYSCALL;	\
+    tabort.  0;			\
+    .align 4;                   \
+1:
+#else
+# define ABORT_TRANSACTION
+#endif
+
 #define DO_CALL(syscall) \
+    ABORT_TRANSACTION \
     li 0,syscall; \
     sc
 
diff --git a/sysdeps/powerpc/sysdep.h b/sysdeps/powerpc/sysdep.h
index e6627c0..c683066 100644
--- a/sysdeps/powerpc/sysdep.h
+++ b/sysdeps/powerpc/sysdep.h
@@ -21,6 +21,10 @@
  */
 #define _SYSDEPS_SYSDEP_H 1
 #include <bits/hwcap.h>
+#ifdef ENABLE_LOCK_ELISION
+#include <tls.h>
+#include <htm.h>
+#endif
 
 #define PPC_FEATURE_970 (PPC_FEATURE_POWER4 + PPC_FEATURE_HAS_ALTIVEC)
 
@@ -164,4 +168,21 @@
 #define ALIGNARG(log2) log2
 #define ASM_SIZE_DIRECTIVE(name) .size name,.-name
 
+#else
+
+/* Linux kernel powerpc documentation states issuing a syscall inside a
+   transaction is not recommended and may lead to undefined behavior. It
+   also states syscalls does not abort transactoin neither run in
+   transactional state.  To avoid such traps, we abort transaction just
+   before syscalls.  */
+#if !defined IS_IN_rtld && defined (ENABLE_LOCK_ELISION)
+# define ABORT_TRANSACTION \
+  ({ 						\
+    if (THREAD_GET_TM_CAPABLE ())		\
+      __builtin_tabort (_ABORT_SYSCALL);	\
+  })
+#else
+# define ABORT_TRANSACTION
+#endif
+
 #endif	/* __ASSEMBLER__ */
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
index 1a5e37a..0947ca3 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
@@ -194,6 +194,7 @@
     register long int r11 __asm__ ("r11");				\
     register long int r12 __asm__ ("r12");				\
     LOADARGS_##nr(name, args);						\
+    ABORT_TRANSACTION;							\
     __asm__ __volatile__						\
       ("sc   \n\t"							\
        "mfcr %0"							\
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
index 93e454e..a3cc302 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
@@ -201,6 +201,7 @@
     register long int r7  __asm__ ("r7");				\
     register long int r8  __asm__ ("r8");				\
     LOADARGS_##nr (name, ##args);					\
+    ABORT_TRANSACTION;							\
     __asm__ __volatile__						\
       ("sc\n\t"								\
        "mfcr  %0\n\t"							\

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH 02/03] PowerPC: Add adaptive elision to rwlocks
       [not found] <53FB3642.6010302@linux.vnet.ibm.com>
@ 2014-08-25 13:36 ` Adhemerval Zanella
  2014-08-25 13:36 ` [PATCH 03/03] PowerPC: abort transaction in syscalls Adhemerval Zanella
  1 sibling, 0 replies; 2+ messages in thread
From: Adhemerval Zanella @ 2014-08-25 13:36 UTC (permalink / raw)
  To: GNU C. Library

This patch adds support for lock elision using ISA 2.07 hardware
transactional memory for rwlocks.  The logic is similar to the
one presented in pthread_mutex lock elision.

--

	* sysdeps/powerpc/nptl/elide.h: New file: generic lock elision support
	for powerpc.
	* sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
	[pthread_rwlock_t] (__pad1): Change size to 7 bytes in 64 bits case
	and remove it for 32 bits case.
	[pthread_rwlock_t] (__rwelision): New field for lock elision.
	(__PTHREAD_RWLOCK_ELISION_EXTRA): Adjust for new lock elision field
	initialization.
	* sysdeps/unix/sysv/linux/powerpc/elision-conf.c (elision_init):
	Disable lock elision with rdlocks if elision is not available.

---

diff --git a/sysdeps/powerpc/nptl/elide.h b/sysdeps/powerpc/nptl/elide.h
new file mode 100644
index 0000000..5cc47a3
--- /dev/null
+++ b/sysdeps/powerpc/nptl/elide.h
@@ -0,0 +1,111 @@
+/* elide.h: Generic lock elision support for powerpc.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef ELIDE_PPC_H
+# define ELIDE_PPC_H
+
+#ifdef ENABLE_LOCK_ELISION
+# include <htm.h>
+# include <elision-conf.h>
+
+/* Returns true if lock defined by IS_LOCK_FREE was elided.
+   ADAPT_COUNT is a pointer to per-lock state variable. */
+
+static inline bool
+__elide_lock (uint8_t *adapt_count, int is_lock_free)
+{
+  if (*adapt_count > 0)
+    {
+      (*adapt_count)--;
+      return false;
+    }
+
+  for (int i = __elision_aconf.try_tbegin; i > 0; i--)
+    {
+      if (__builtin_tbegin (0))
+	{
+	  if (is_lock_free)
+	    return true;
+	  /* Lock was busy.  */
+	  __builtin_tabort (_ABORT_LOCK_BUSY);
+	}
+      else
+	{
+	  /* A persistent failure indicates that a retry will probably
+	     result in another failure.  Use normal locking now and
+	     for the next couple of calls.  */
+	  if (_TEXASRU_FAILURE_PERSISTENT (__builtin_get_texasru ()))
+	    {
+	      if (__elision_aconf.skip_lock_internal_abort > 0)
+		*adapt_count = __elision_aconf.skip_lock_internal_abort;
+	      break;
+	    }
+	  /* Same logic as above, but for for a number of temporary failures in a
+	     a row.  */
+	  else if (__elision_aconf.skip_lock_out_of_tbegin_retries > 0
+		   && __elision_aconf.try_tbegin > 0)
+	    *adapt_count = __elision_aconf.skip_lock_out_of_tbegin_retries;
+	}
+     }
+
+  return false;
+}
+
+# define ELIDE_LOCK(adapt_count, is_lock_free) \
+  __elide_lock (&(adapt_count), is_lock_free)
+
+
+static inline bool
+__elide_trylock (uint8_t *adapt_count, int is_lock_free, int write)
+{
+  if (__elision_aconf.try_tbegin > 0)
+    {
+      if (write)
+	__builtin_tabort (_ABORT_NESTED_TRYLOCK);
+      return __elide_lock (adapt_count, is_lock_free);
+    }
+  return false;
+}
+
+# define ELIDE_TRYLOCK(adapt_count, is_lock_free, write)	\
+  __elide_trylock (&(adapt_count), is_lock_free, write)
+
+
+static inline bool
+__elide_unlock (int is_lock_free)
+{
+  if (is_lock_free)
+    {
+      __builtin_tend (0);
+      return true;
+    }
+  return false;
+}
+
+# define ELIDE_UNLOCK(is_lock_free) \
+  __elide_unlock (is_lock_free)
+
+# else
+
+# define ELIDE_LOCK(adapt_count, is_lock_free) 0
+# define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) 0
+# define ELIDE_UNLOCK(is_lock_free) 0
+
+#endif /* ENABLE_LOCK_ELISION  */
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h b/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
index 396a3d9..998f6d4 100644
--- a/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
@@ -172,11 +172,13 @@ typedef union
     unsigned int __nr_writers_queued;
     int __writer;
     int __shared;
-    unsigned long int __pad1;
+    unsigned char __rwelision;
+    unsigned char __pad1[7];
     unsigned long int __pad2;
     /* FLAGS must stay at this position in the structure to maintain
        binary compatibility.  */
     unsigned int __flags;
+# define __PTHREAD_RWLOCK_ELISION_EXTRA 0, {0, 0, 0, 0, 0, 0, 0 }
   } __data;
 # else
   struct
@@ -187,20 +189,20 @@ typedef union
     unsigned int __writer_wakeup;
     unsigned int __nr_readers_queued;
     unsigned int __nr_writers_queued;
-    unsigned char __pad1;
+    unsigned char __rwelision;
     unsigned char __pad2;
     unsigned char __shared;
     /* FLAGS must stay at this position in the structure to maintain
        binary compatibility.  */
     unsigned char __flags;
     int __writer;
+#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
   } __data;
 # endif
   char __size[__SIZEOF_PTHREAD_RWLOCK_T];
   long int __align;
 } pthread_rwlock_t;
 
-#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
 
 typedef union
 {
diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-conf.c b/sysdeps/unix/sysv/linux/powerpc/elision-conf.c
index 60a7900..febcf4f 100644
--- a/sysdeps/unix/sysv/linux/powerpc/elision-conf.c
+++ b/sysdeps/unix/sysv/linux/powerpc/elision-conf.c
@@ -64,6 +64,9 @@ elision_init (int argc __attribute__ ((unused)),
 #ifdef ENABLE_LOCK_ELISION
   __pthread_force_elision = __libc_enable_secure ? 0 : elision_available;
 #endif
+  if (!elision_available)
+    /* Disable elision on rwlocks.  */
+    __elision_aconf.try_tbegin = 0;
 }
 
 #ifdef SHARED

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2014-08-25 13:36 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <53FB3642.6010302@linux.vnet.ibm.com>
2014-08-25 13:36 ` [PATCH 02/03] PowerPC: Add adaptive elision to rwlocks Adhemerval Zanella
2014-08-25 13:36 ` [PATCH 03/03] PowerPC: abort transaction in syscalls Adhemerval Zanella

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).