public inbox for fortran@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] libgfortran: Replace mutex with rwlock
@ 2022-12-22  2:19 Lipeng Zhu
  0 siblings, 0 replies; 5+ messages in thread
From: Lipeng Zhu @ 2022-12-22  2:19 UTC (permalink / raw)
  To: fortran, gcc-patches; +Cc: hongjiu.lu, tianyou.li, pan.deng, lipeng.zhu

This patch try to introduce the rwlock and split the read/write to
unit_root tree and unit_cache with rwlock instead of the mutex to
increase CPU efficiency. In the get_gfc_unit function, the percentage
to step into the insert_unit function is around 30%, in most instances,
we can get the unit in the phase of reading the unit_cache or unit_root
tree. So split the read/write phase by rwlock would be an approach to
make it more parallel.

BTW, the IPC metrics can gain around 9x in our test
server with 220 cores. The benchmark we used is
https://github.com/rwesson/NEAT

libgcc/ChangeLog:

    * gthr-posix.h (__GTHREAD_RWLOCK_INIT): New macro
    (__gthrw): New function
    (__gthread_rwlock_rdlock): New function
    (__gthread_rwlock_tryrdlock): New function
    (__gthread_rwlock_wrlock): New function
    (__gthread_rwlock_trywrlock): New function
    (__gthread_rwlock_unlock): New function

libgfortran/ChangeLog:

    * io/async.c (DEBUG_LINE): New
    * io/async.h (RWLOCK_DEBUG_ADD): New macro
    (CHECK_RDLOCK): New macro
    (CHECK_WRLOCK): New macro
    (TAIL_RWLOCK_DEBUG_QUEUE): New macro
    (IN_RWLOCK_DEBUG_QUEUE): New macro
    (RDLOCK): New macro
    (WRLOCK): New macro
    (RWUNLOCK): New macro
    (RD_TO_WRLOCK): New macro
    (INTERN_RDLOCK): New macro
    (INTERN_WRLOCK): New macro
    (INTERN_RWUNLOCK): New macro
    * io/io.h (internal_proto): Define unit_rwlock
    * io/transfer.c (st_read_done_worker): Relace unit_lock with unit_rwlock
    (st_write_done_worker): Relace unit_lock with unit_rwlock
    * io/unit.c (get_gfc_unit): Relace unit_lock with unit_rwlock
    (if): Relace unit_lock with unit_rwlock
    (close_unit_1): Relace unit_lock with unit_rwlock
    (close_units): Relace unit_lock with unit_rwlock
    (newunit_alloc): Relace unit_lock with unit_rwlock
    * io/unix.c (flush_all_units): Relace unit_lock with unit_rwlock
---
 libgcc/gthr-posix.h       |  52 +++++++++++++
 libgfortran/io/async.c    |   4 +
 libgfortran/io/async.h    | 151 ++++++++++++++++++++++++++++++++++++++
 libgfortran/io/io.h       |  15 ++--
 libgfortran/io/transfer.c |   8 +-
 libgfortran/io/unit.c     |  65 ++++++++--------
 libgfortran/io/unix.c     |  16 ++--
 7 files changed, 265 insertions(+), 46 deletions(-)

diff --git a/libgcc/gthr-posix.h b/libgcc/gthr-posix.h
index f1a5ab8e075..358948e8ae8 100644
--- a/libgcc/gthr-posix.h
+++ b/libgcc/gthr-posix.h
@@ -48,6 +48,7 @@ typedef pthread_t __gthread_t;
 typedef pthread_key_t __gthread_key_t;
 typedef pthread_once_t __gthread_once_t;
 typedef pthread_mutex_t __gthread_mutex_t;
+typedef pthread_rwlock_t __gthread_rwlock_t;
 typedef pthread_mutex_t __gthread_recursive_mutex_t;
 typedef pthread_cond_t __gthread_cond_t;
 typedef struct timespec __gthread_time_t;
@@ -58,6 +59,7 @@ typedef struct timespec __gthread_time_t;
 
 #define __GTHREAD_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER
 #define __GTHREAD_MUTEX_INIT_FUNCTION __gthread_mutex_init_function
+#define __GTHREAD_RWLOCK_INIT PTHREAD_RWLOCK_INITIALIZER
 #define __GTHREAD_ONCE_INIT PTHREAD_ONCE_INIT
 #if defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER)
 #define __GTHREAD_RECURSIVE_MUTEX_INIT PTHREAD_RECURSIVE_MUTEX_INITIALIZER
@@ -135,6 +137,11 @@ __gthrw(pthread_mutexattr_init)
 __gthrw(pthread_mutexattr_settype)
 __gthrw(pthread_mutexattr_destroy)
 
+__gthrw(pthread_rwlock_rdlock)
+__gthrw(pthread_rwlock_tryrdlock)
+__gthrw(pthread_rwlock_wrlock)
+__gthrw(pthread_rwlock_trywrlock)
+__gthrw(pthread_rwlock_unlock)
 
 #if defined(_LIBOBJC) || defined(_LIBOBJC_WEAK)
 /* Objective-C.  */
@@ -885,6 +892,51 @@ __gthread_cond_destroy (__gthread_cond_t* __cond)
   return __gthrw_(pthread_cond_destroy) (__cond);
 }
 
+static inline int
+__gthread_rwlock_rdlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+    return __gthrw_(pthread_rwlock_rdlock) (__rwlock);
+  else
+    return 0;
+}
+
+static inline int
+__gthread_rwlock_tryrdlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+    return __gthrw_(pthread_rwlock_tryrdlock) (__rwlock);
+  else
+    return 0;
+}
+
+static inline int
+__gthread_rwlock_wrlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+    return __gthrw_(pthread_rwlock_wrlock) (__rwlock);
+  else
+    return 0;
+}
+
+static inline int
+__gthread_rwlock_trywrlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+    return __gthrw_(pthread_rwlock_trywrlock) (__rwlock);
+  else
+    return 0;
+}
+
+static inline int
+__gthread_rwlock_unlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+    return __gthrw_(pthread_rwlock_unlock) (__rwlock);
+  else
+    return 0;
+}
+
 #endif /* _LIBOBJC */
 
 #endif /* ! GCC_GTHR_POSIX_H */
diff --git a/libgfortran/io/async.c b/libgfortran/io/async.c
index 912b39ea302..f0bde979da4 100644
--- a/libgfortran/io/async.c
+++ b/libgfortran/io/async.c
@@ -42,6 +42,10 @@ DEBUG_LINE (__thread const char *aio_prefix = MPREFIX);
 
 DEBUG_LINE (__gthread_mutex_t debug_queue_lock = __GTHREAD_MUTEX_INIT;)
 DEBUG_LINE (aio_lock_debug *aio_debug_head = NULL;)
+#ifdef __GTHREAD_RWLOCK_INIT
+DEBUG_LINE (aio_rwlock_debug *aio_rwlock_debug_head = NULL;)
+DEBUG_LINE (__gthread_rwlock_t debug_queue_rwlock = __GTHREAD_RWLOCK_INIT;)
+#endif
 
 /* Current unit for asynchronous I/O.  Needed for error reporting.  */
 
diff --git a/libgfortran/io/async.h b/libgfortran/io/async.h
index d57722a95e4..e54b65295fc 100644
--- a/libgfortran/io/async.h
+++ b/libgfortran/io/async.h
@@ -210,6 +210,128 @@
     DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", aio_prefix, #mutex, mutex); \
   } while (0)
 
+#ifdef __GTHREAD_RWLOCK_INIT
+#define RWLOCK_DEBUG_ADD(rwlock) do {		\
+    aio_rwlock_debug *n;				\
+    n = malloc (sizeof(aio_rwlock_debug));	\
+    n->prev = TAIL_RWLOCK_DEBUG_QUEUE;			\
+    if (n->prev)				\
+      n->prev->next = n;			\
+    n->next = NULL;				\
+    n->line = __LINE__;				\
+    n->func = __FUNCTION__;			\
+    n->rw = rwlock;				\
+    if (!aio_rwlock_debug_head) {			\
+      aio_rwlock_debug_head = n;			\
+    }						\
+  } while (0)
+
+#define CHECK_RDLOCK(rwlock, status) do {					\
+    aio_rwlock_debug *curr;						\
+    INTERN_WRLOCK (&debug_queue_rwlock);					\
+    if (__gthread_rwlock_tryrdlock (rwlock)) {				\
+      if ((curr = IN_RWLOCK_DEBUG_QUEUE (rwlock))) {				\
+	sprintf (status, DEBUG_RED "%s():%d" DEBUG_NORM, curr->func, curr->line); \
+      } else								\
+	sprintf (status, DEBUG_RED "unknown" DEBUG_NORM);			\
+    }									\
+    else {								\
+      __gthread_rwlock_unlock (rwlock);					\
+      sprintf (status, DEBUG_GREEN "rwunlocked" DEBUG_NORM);			\
+    }									\
+    INTERN_RWUNLOCK (&debug_queue_rwlock);					\
+  }while (0)
+
+#define CHECK_WRLOCK(rwlock, status) do {					\
+    aio_rwlock_debug *curr;						\
+    INTERN_WRLOCK (&debug_queue_rwlock);					\
+    if (__gthread_rwlock_trywrlock (rwlock)) {				\
+      if ((curr = IN_RWLOCK_DEBUG_QUEUE (rwlock))) {				\
+	sprintf (status, DEBUG_RED "%s():%d" DEBUG_NORM, curr->func, curr->line); \
+      } else								\
+	sprintf (status, DEBUG_RED "unknown" DEBUG_NORM);			\
+    }									\
+    else {								\
+      __gthread_rwlock_unlock (rwlock);					\
+      sprintf (status, DEBUG_GREEN "rwunlocked" DEBUG_NORM);			\
+    }									\
+    INTERN_RWUNLOCK (&debug_queue_rwlock);					\
+  }while (0)
+
+#define TAIL_RWLOCK_DEBUG_QUEUE ({			\
+      aio_rwlock_debug *curr = aio_rwlock_debug_head;	\
+      while (curr && curr->next) {		\
+	curr = curr->next;			\
+      }						\
+      curr;					\
+    })
+
+#define IN_RWLOCK_DEBUG_QUEUE(rwlock) ({		\
+      __label__ end;				\
+      aio_rwlock_debug *curr = aio_rwlock_debug_head;	\
+      while (curr) {				\
+	if (curr->rw == rwlock) {			\
+	  goto end;				\
+	}					\
+	curr = curr->next;			\
+      }						\
+    end:;					\
+      curr;					\
+    })
+
+#define RDLOCK(rwlock) do {						\
+    char status[200];							\
+    CHECK_RDLOCK (rwlock, status);						\
+    DEBUG_PRINTF ("%s%-42s prev: %-35s %20s():%-5d %18p\n", aio_prefix,	\
+		 DEBUG_RED "RDLOCK: " DEBUG_NORM #rwlock, status, __FUNCTION__, __LINE__, (void *) rwlock); \
+    INTERN_RDLOCK (rwlock);							\
+    INTERN_WRLOCK (&debug_queue_rwlock);					\
+    RWLOCK_DEBUG_ADD (rwlock);						\
+    INTERN_RWUNLOCK (&debug_queue_rwlock);					\
+    DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", aio_prefix, #rwlock, rwlock); \
+  } while (0)
+
+#define WRLOCK(rwlock) do {						\
+    char status[200];							\
+    CHECK_WRLOCK (rwlock, status);						\
+    DEBUG_PRINTF ("%s%-42s prev: %-35s %20s():%-5d %18p\n", aio_prefix,	\
+		 DEBUG_RED "WRLOCK: " DEBUG_NORM #rwlock, status, __FUNCTION__, __LINE__, (void *) rwlock); \
+    INTERN_WRLOCK (rwlock);							\
+    INTERN_WRLOCK (&debug_queue_rwlock);					\
+    RWLOCK_DEBUG_ADD (rwlock);						\
+    INTERN_RWUNLOCK (&debug_queue_rwlock);					\
+    DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", aio_prefix, #rwlock, rwlock); \
+  } while (0)
+
+#define RWUNLOCK(rwlock) do {						\
+    aio_rwlock_debug *curr;						\
+    DEBUG_PRINTF ("%s%-75s %20s():%-5d %18p\n", aio_prefix, DEBUG_GREEN "RWUNLOCK: " DEBUG_NORM #rwlock, \
+		 __FUNCTION__, __LINE__, (void *) rwlock);		\
+    INTERN_WRLOCK (&debug_queue_rwlock);					\
+    curr = IN_RWLOCK_DEBUG_QUEUE (rwlock);					\
+    if (curr)								\
+      {									\
+	if (curr->prev)							\
+	  curr->prev->next = curr->next;				\
+	if (curr->next) {						\
+	  curr->next->prev = curr->prev;				\
+	  if (curr == aio_rwlock_debug_head)					\
+	    aio_rwlock_debug_head = curr->next;				\
+	} else {							\
+	  if (curr == aio_rwlock_debug_head)					\
+	    aio_rwlock_debug_head = NULL;					\
+	}								\
+	free (curr);							\
+      }									\
+    INTERN_RWUNLOCK (&debug_queue_rwlock);					\
+    INTERN_RWUNLOCK (rwlock);						\
+  } while (0)
+
+#define RD_TO_WRLOCK(rwlock) \
+  RWUNLOCK (rwlock);\
+  WRLOCK (rwlock);
+#endif
+
 #define DEBUG_LINE(...) __VA_ARGS__
 
 #else
@@ -221,12 +343,31 @@
 #define LOCK(mutex) INTERN_LOCK (mutex)
 #define UNLOCK(mutex) INTERN_UNLOCK (mutex)
 #define TRYLOCK(mutex) (__gthread_mutex_trylock (mutex))
+#ifdef __GTHREAD_RWLOCK_INIT
+#define RDLOCK(rwlock) INTERN_RDLOCK (rwlock)
+#define WRLOCK(rwlock) INTERN_WRLOCK (rwlock)
+#define RWUNLOCK(rwlock) INTERN_RWUNLOCK (rwlock)
+#define RD_TO_WRLOCK(rwlock) \
+  RWUNLOCK (rwlock);\
+  WRLOCK (rwlock);
+#endif
+#endif
+
+#ifndef __GTHREAD_RWLOCK_INIT
+#define RDLOCK(rwlock) LOCK (rwlock)
+#define WRLOCK(rwlock) LOCK (rwlock)
+#define RWUNLOCK(rwlock) UNLOCK (rwlock)
+#define RD_TO_WRLOCK(rwlock) {}
 #endif
 
 #define INTERN_LOCK(mutex) T_ERROR (__gthread_mutex_lock, mutex);
 
 #define INTERN_UNLOCK(mutex) T_ERROR (__gthread_mutex_unlock, mutex);
 
+#define INTERN_RDLOCK(rwlock) T_ERROR (__gthread_rwlock_rdlock, rwlock);
+#define INTERN_WRLOCK(rwlock) T_ERROR (__gthread_rwlock_wrlock, rwlock);
+#define INTERN_RWUNLOCK(rwlock) T_ERROR (__gthread_rwlock_unlock, rwlock);
+
 #if ASYNC_IO
 
 /* au->lock has to be held when calling this macro.  */
@@ -288,8 +429,18 @@ DEBUG_LINE (typedef struct aio_lock_debug{
   struct aio_lock_debug *prev;
 } aio_lock_debug;)
 
+DEBUG_LINE (typedef struct aio_rwlock_debug{
+  __gthread_rwlock_t *rw;
+  int line;
+  const char *func;
+  struct aio_rwlock_debug *next;
+  struct aio_rwlock_debug *prev;
+} aio_rwlock_debug;)
+
 DEBUG_LINE (extern aio_lock_debug *aio_debug_head;)
 DEBUG_LINE (extern __gthread_mutex_t debug_queue_lock;)
+DEBUG_LINE (extern aio_rwlock_debug *aio_rwlock_debug_head;)
+DEBUG_LINE (extern __gthread_rwlock_t debug_queue_rwlock;)
 
 /* Thread - local storage of the current unit we are looking at. Needed for
    error reporting.  */
diff --git a/libgfortran/io/io.h b/libgfortran/io/io.h
index 23f63d4593c..6f9e42c64fc 100644
--- a/libgfortran/io/io.h
+++ b/libgfortran/io/io.h
@@ -690,7 +690,7 @@ typedef struct gfc_unit
      from the UNIT_ROOT tree, but doesn't free it and the
      last of the waiting threads will do that.
      This must be either atomically increased/decreased, or
-     always guarded by UNIT_LOCK.  */
+     always guarded by UNIT_RWLOCK.  */
   int waiting;
   /* Flag set by close_unit if the unit as been closed.
      Must be manipulated under unit's lock.  */
@@ -769,8 +769,13 @@ internal_proto(default_recl);
 extern gfc_unit *unit_root;
 internal_proto(unit_root);
 
-extern __gthread_mutex_t unit_lock;
-internal_proto(unit_lock);
+#ifdef __GTHREAD_RWLOCK_INIT
+extern __gthread_rwlock_t unit_rwlock;
+internal_proto(unit_rwlock);
+#else
+extern __gthread_mutex_t unit_rwlock;
+internal_proto(unit_rwlock);
+#endif
 
 extern int close_unit (gfc_unit *);
 internal_proto(close_unit);
@@ -1015,9 +1020,9 @@ dec_waiting_unlocked (gfc_unit *u)
 #ifdef HAVE_ATOMIC_FETCH_ADD
   (void) __atomic_fetch_add (&u->waiting, -1, __ATOMIC_RELAXED);
 #else
-  __gthread_mutex_lock (&unit_lock);
+  WRLOCK (&unit_rwlock);
   u->waiting--;
-  __gthread_mutex_unlock (&unit_lock);
+  RWUNLOCK (&unit_rwlock);
 #endif
 }
 
diff --git a/libgfortran/io/transfer.c b/libgfortran/io/transfer.c
index 2760929a1e9..5fcdab57537 100644
--- a/libgfortran/io/transfer.c
+++ b/libgfortran/io/transfer.c
@@ -4539,9 +4539,9 @@ st_read_done_worker (st_parameter_dt *dtp, bool unlock)
    if (free_newunit)
      {
        /* Avoid inverse lock issues by placing after unlock_unit.  */
-       LOCK (&unit_lock);
+       WRLOCK (&unit_rwlock);
        newunit_free (dtp->common.unit);
-       UNLOCK (&unit_lock);
+       RWUNLOCK (&unit_rwlock);
      }
 }
 
@@ -4636,9 +4636,9 @@ st_write_done_worker (st_parameter_dt *dtp, bool unlock)
    if (free_newunit)
      {
        /* Avoid inverse lock issues by placing after unlock_unit.  */
-       LOCK (&unit_lock);
+       WRLOCK (&unit_rwlock);
        newunit_free (dtp->common.unit);
-       UNLOCK (&unit_lock);
+       RWUNLOCK (&unit_rwlock);
      }
 }
 
diff --git a/libgfortran/io/unit.c b/libgfortran/io/unit.c
index 4d32e361a21..1565069139f 100644
--- a/libgfortran/io/unit.c
+++ b/libgfortran/io/unit.c
@@ -33,34 +33,36 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 
 /* IO locking rules:
-   UNIT_LOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE.
+   UNIT_RWLOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE.
+   And use the rwlock to spilt read and write phase to UNIT_ROOT tree
+   and UNIT_CACHE to increase CPU efficiency.
    Concurrent use of different units should be supported, so
    each unit has its own lock, LOCK.
    Open should be atomic with its reopening of units and list_read.c
    in several places needs find_unit another unit while holding stdin
-   unit's lock, so it must be possible to acquire UNIT_LOCK while holding
+   unit's lock, so it must be possible to acquire UNIT_RWLOCK while holding
    some unit's lock.  Therefore to avoid deadlocks, it is forbidden
-   to acquire unit's private locks while holding UNIT_LOCK, except
+   to acquire unit's private locks while holding UNIT_RWLOCK, except
    for freshly created units (where no other thread can get at their
    address yet) or when using just trylock rather than lock operation.
    In addition to unit's private lock each unit has a WAITERS counter
    and CLOSED flag.  WAITERS counter must be either only
    atomically incremented/decremented in all places (if atomic builtins
-   are supported), or protected by UNIT_LOCK in all places (otherwise).
+   are supported), or protected by UNIT_RWLOCK in all places (otherwise).
    CLOSED flag must be always protected by unit's LOCK.
-   After finding a unit in UNIT_CACHE or UNIT_ROOT with UNIT_LOCK held,
+   After finding a unit in UNIT_CACHE or UNIT_ROOT with UNIT_RWLOCK held,
    WAITERS must be incremented to avoid concurrent close from freeing
-   the unit between unlocking UNIT_LOCK and acquiring unit's LOCK.
-   Unit freeing is always done under UNIT_LOCK.  If close_unit sees any
+   the unit between unlocking UNIT_RWLOCK and acquiring unit's LOCK.
+   Unit freeing is always done under UNIT_RWLOCK.  If close_unit sees any
    WAITERS, it doesn't free the unit but instead sets the CLOSED flag
    and the thread that decrements WAITERS to zero while CLOSED flag is
-   set is responsible for freeing it (while holding UNIT_LOCK).
+   set is responsible for freeing it (while holding UNIT_RWLOCK).
    flush_all_units operation is iterating over the unit tree with
-   increasing UNIT_NUMBER while holding UNIT_LOCK and attempting to
+   increasing UNIT_NUMBER while holding UNIT_RWLOCK and attempting to
    flush each unit (and therefore needs the unit's LOCK held as well).
    To avoid deadlocks, it just trylocks the LOCK and if unsuccessful,
-   remembers the current unit's UNIT_NUMBER, unlocks UNIT_LOCK, acquires
-   unit's LOCK and after flushing reacquires UNIT_LOCK and restarts with
+   remembers the current unit's UNIT_NUMBER, unlocks UNIT_RWLOCK, acquires
+   unit's LOCK and after flushing reacquires UNIT_RWLOCK and restarts with
    the smallest UNIT_NUMBER above the last one flushed.
 
    If find_unit/find_or_create_unit/find_file/get_unit routines return
@@ -101,10 +103,14 @@ gfc_offset max_offset;
 gfc_offset default_recl;
 
 gfc_unit *unit_root;
+#ifdef __GTHREAD_RWLOCK_INIT
+__gthread_rwlock_t unit_rwlock = __GTHREAD_RWLOCK_INIT;
+#else
 #ifdef __GTHREAD_MUTEX_INIT
-__gthread_mutex_t unit_lock = __GTHREAD_MUTEX_INIT;
+__gthread_mutex_t unit_rwlock = __GTHREAD_MUTEX_INIT;
 #else
-__gthread_mutex_t unit_lock;
+__gthread_mutex_t unit_rwlock;
+#endif
 #endif
 
 /* We use these filenames for error reporting.  */
@@ -329,7 +335,7 @@ get_gfc_unit (int n, int do_create)
   int c, created = 0;
 
   NOTE ("Unit n=%d, do_create = %d", n, do_create);
-  LOCK (&unit_lock);
+  RDLOCK (&unit_rwlock);
 
 retry:
   for (c = 0; c < CACHE_SIZE; c++)
@@ -350,6 +356,7 @@ retry:
       if (c == 0)
 	break;
     }
+  RD_TO_WRLOCK (&unit_rwlock);
 
   if (p == NULL && do_create)
     {
@@ -368,8 +375,8 @@ retry:
   if (created)
     {
       /* Newly created units have their lock held already
-	 from insert_unit.  Just unlock UNIT_LOCK and return.  */
-      UNLOCK (&unit_lock);
+	 from insert_unit.  Just unlock UNIT_RWLOCK and return.  */
+      RWUNLOCK (&unit_rwlock);
       return p;
     }
 
@@ -380,7 +387,7 @@ found:
       if (! TRYLOCK (&p->lock))
 	{
 	  /* assert (p->closed == 0); */
-	  UNLOCK (&unit_lock);
+	  RWUNLOCK (&unit_rwlock);
 	  return p;
 	}
 
@@ -388,14 +395,14 @@ found:
     }
 
 
-  UNLOCK (&unit_lock);
+  RWUNLOCK (&unit_rwlock);
 
   if (p != NULL && (p->child_dtio == 0))
     {
       LOCK (&p->lock);
       if (p->closed)
 	{
-	  LOCK (&unit_lock);
+	  WRLOCK (&unit_rwlock);
 	  UNLOCK (&p->lock);
 	  if (predec_waiting_locked (p) == 0)
 	    destroy_unit_mutex (p);
@@ -593,8 +600,8 @@ init_units (void)
 #endif
 #endif
 
-#ifndef __GTHREAD_MUTEX_INIT
-  __GTHREAD_MUTEX_INIT_FUNCTION (&unit_lock);
+#if (!defined(__GTHREAD_RWLOCK_INIT) && !defined(__GTHREAD_MUTEX_INIT))
+  __GTHREAD_MUTEX_INIT_FUNCTION (&unit_rwlock);
 #endif
 
   if (sizeof (max_offset) == 8)
@@ -731,7 +738,7 @@ close_unit_1 (gfc_unit *u, int locked)
 
   u->closed = 1;
   if (!locked)
-    LOCK (&unit_lock);
+    WRLOCK (&unit_rwlock);
 
   for (i = 0; i < CACHE_SIZE; i++)
     if (unit_cache[i] == u)
@@ -758,7 +765,7 @@ close_unit_1 (gfc_unit *u, int locked)
     destroy_unit_mutex (u);
 
   if (!locked)
-    UNLOCK (&unit_lock);
+    RWUNLOCK (&unit_rwlock);
 
   return rc;
 }
@@ -795,10 +802,10 @@ close_unit (gfc_unit *u)
 void
 close_units (void)
 {
-  LOCK (&unit_lock);
+  WRLOCK (&unit_rwlock);
   while (unit_root != NULL)
     close_unit_1 (unit_root, 1);
-  UNLOCK (&unit_lock);
+  RWUNLOCK (&unit_rwlock);
 
   free (newunits);
 
@@ -905,7 +912,7 @@ finish_last_advance_record (gfc_unit *u)
 int
 newunit_alloc (void)
 {
-  LOCK (&unit_lock);
+  WRLOCK (&unit_rwlock);
   if (!newunits)
     {
       newunits = xcalloc (16, 1);
@@ -919,7 +926,7 @@ newunit_alloc (void)
         {
           newunits[ii] = true;
           newunit_lwi = ii + 1;
-	  UNLOCK (&unit_lock);
+	  RWUNLOCK (&unit_rwlock);
           return -ii + NEWUNIT_START;
         }
     }
@@ -932,12 +939,12 @@ newunit_alloc (void)
   memset (newunits + old_size, 0, old_size);
   newunits[old_size] = true;
   newunit_lwi = old_size + 1;
-    UNLOCK (&unit_lock);
+    RWUNLOCK (&unit_rwlock);
   return -old_size + NEWUNIT_START;
 }
 
 
-/* Free a previously allocated newunit= unit number.  unit_lock must
+/* Free a previously allocated newunit= unit number.  unit_rwlock must
    be held when calling.  */
 
 void
diff --git a/libgfortran/io/unix.c b/libgfortran/io/unix.c
index 616c1aab166..38eef76a8db 100644
--- a/libgfortran/io/unix.c
+++ b/libgfortran/io/unix.c
@@ -1774,7 +1774,7 @@ find_file (const char *file, gfc_charlen_type file_len)
   id = id_from_path (path);
 #endif
 
-  LOCK (&unit_lock);
+  RDLOCK (&unit_rwlock);
 retry:
   u = find_file0 (unit_root, FIND_FILE0_ARGS);
   if (u != NULL)
@@ -1783,19 +1783,19 @@ retry:
       if (! __gthread_mutex_trylock (&u->lock))
 	{
 	  /* assert (u->closed == 0); */
-	  UNLOCK (&unit_lock);
+	  RWUNLOCK (&unit_rwlock);
 	  goto done;
 	}
 
       inc_waiting_locked (u);
     }
-  UNLOCK (&unit_lock);
+  RWUNLOCK (&unit_rwlock);
   if (u != NULL)
     {
       LOCK (&u->lock);
       if (u->closed)
 	{
-	  LOCK (&unit_lock);
+	  RDLOCK (&unit_rwlock);
 	  UNLOCK (&u->lock);
 	  if (predec_waiting_locked (u) == 0)
 	    free (u);
@@ -1839,13 +1839,13 @@ flush_all_units (void)
   gfc_unit *u;
   int min_unit = 0;
 
-  LOCK (&unit_lock);
+  WRLOCK (&unit_rwlock);
   do
     {
       u = flush_all_units_1 (unit_root, min_unit);
       if (u != NULL)
 	inc_waiting_locked (u);
-      UNLOCK (&unit_lock);
+      RWUNLOCK (&unit_rwlock);
       if (u == NULL)
 	return;
 
@@ -1856,13 +1856,13 @@ flush_all_units (void)
       if (u->closed == 0)
 	{
 	  sflush (u->s);
-	  LOCK (&unit_lock);
+	  WRLOCK (&unit_rwlock);
 	  UNLOCK (&u->lock);
 	  (void) predec_waiting_locked (u);
 	}
       else
 	{
-	  LOCK (&unit_lock);
+	  WRLOCK (&unit_rwlock);
 	  UNLOCK (&u->lock);
 	  if (predec_waiting_locked (u) == 0)
 	    free (u);
-- 
2.31.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] libgfortran: Replace mutex with rwlock
  2022-12-27 16:33   ` H.J. Lu
@ 2022-12-28 10:00     ` Zhu, Lipeng
  0 siblings, 0 replies; 5+ messages in thread
From: Zhu, Lipeng @ 2022-12-28 10:00 UTC (permalink / raw)
  To: H.J. Lu, sgk; +Cc: Lipeng Zhu via Fortran, gcc-patches, Li, Tianyou, Deng, Pan

> libstdc++ implements shared mutex with pthread_rwlock, which can 
> libstdc++ conflict
> with the pthread_rwlock usage in libgcc.  Lipeng, please limit the pthread_rwlock usage in libgcc only when __cplusplus isn't defined.
> 
> 
> --
> H.J.

Thanks for suggestion, send a V2 patch. 

--
Lipeng Zhu

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] libgfortran: Replace mutex with rwlock
  2022-12-26  0:57 ` Steve Kargl
@ 2022-12-27 16:33   ` H.J. Lu
  2022-12-28 10:00     ` Zhu, Lipeng
  0 siblings, 1 reply; 5+ messages in thread
From: H.J. Lu @ 2022-12-27 16:33 UTC (permalink / raw)
  To: sgk; +Cc: Lipeng Zhu via Fortran, gcc-patches, tianyou.li, pan.deng, lipeng.zhu

On Sun, Dec 25, 2022 at 4:58 PM Steve Kargl via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Wed, Dec 21, 2022 at 07:27:11PM -0500, Lipeng Zhu via Fortran wrote:
> > This patch try to introduce the rwlock and split the read/write to
> > unit_root tree and unit_cache with rwlock instead of the mutex to
> > increase CPU efficiency. In the get_gfc_unit function, the percentage
> > to step into the insert_unit function is around 30%, in most instances,
> > we can get the unit in the phase of reading the unit_cache or unit_root
> > tree. So split the read/write phase by rwlock would be an approach to
> > make it more parallel.
> >
> > BTW, the IPC metrics can increase from 0.25 to 2.2 in the Intel
> > SRP server with 220 cores. The benchmark we used is
> > https://github.com/rwesson/NEAT
> >
>
> The patch fails bootstrap on x86_64-*-freebsd.
>
> gmake[6]: Entering directory '/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src/c++17'
> /bin/sh ../../libtool --tag CXX --tag disable-shared   --mode=compile /home/kargl/gcc/obj/./gcc/xgcc -shared-libgcc -B/home/kargl/gcc/obj/./gcc -nostdinc++ -L/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src -L/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src/.libs -L/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/libsupc++/.libs -B/home/kargl/work/x86_64-unknown-freebsd14.0/bin/ -B/home/kargl/work/x86_64-unknown-freebsd14.0/lib/ -isystem /home/kargl/work/x86_64-unknown-freebsd14.0/include -isystem /home/kargl/work/x86_64-unknown-freebsd14.0/sys-include   -fno-checking -I/home/kargl/gcc/gcc/libstdc++-v3/../libgcc -I/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0 -I/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include -I/home/kargl/gcc/gcc/libstdc++-v3/libsupc++   -std=gnu++17 -nostdinc++ -prefer-pic -D_GLIBCXX_SHARED -fno-implicit-templates  -Wall -Wextra -Wwrite-strings -Wcast-qual -Wabi=2  -fdiagnostics-show-location=once   -ffunction-sections -fdata-sections  -frandom-seed=floating_from_chars.lo  -fimplicit-templates -g -O2  -c -o floating_from_chars.lo ../../../../../gcc/libstdc++-v3/src/c++17/floating_from_chars.cc
> libtool: compile:  /home/kargl/gcc/obj/./gcc/xgcc -shared-libgcc -B/home/kargl/gcc/obj/./gcc -nostdinc++ -L/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src -L/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src/.libs -L/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/libsupc++/.libs -B/home/kargl/work/x86_64-unknown-freebsd14.0/bin/ -B/home/kargl/work/x86_64-unknown-freebsd14.0/lib/ -isystem /home/kargl/work/x86_64-unknown-freebsd14.0/include -isystem /home/kargl/work/x86_64-unknown-freebsd14.0/sys-include -fno-checking -I/home/kargl/gcc/gcc/libstdc++-v3/../libgcc -I/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0 -I/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include -I/home/kargl/gcc/gcc/libstdc++-v3/libsupc++ -std=gnu++17 -nostdinc++ -D_GLIBCXX_SHARED -fno-implicit-templates -Wall -Wextra -Wwrite-strings -Wcast-qual -Wabi=2 -fdiagnostics-show-location=once -ffunction-sections -fdata-sections -frandom-seed=floating_from_chars.lo -fimplicit-templates -g -O2 -c ../../../../../gcc/libstdc++-v3/src/c++17/floating_from_chars.cc  -fPIC -DPIC -D_GLIBCXX_SHARED -o floating_from_chars.o
> In file included from /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/memory_resource:40,
>                  from ../../../../../gcc/libstdc++-v3/src/c++17/floating_from_chars.cc:37:
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex: In function 'int std::__glibcxx_rwlock_rdlock(pthread_rwlock**)':
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:80:3: error: call of overloaded '__gthrw_pthread_rwlock_rdlock(pthread_rwlock**&)' is ambiguous
>    80 |   _GLIBCXX_GTHRW(rwlock_rdlock)
>       |   ^
> In file included from /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0/bits/gthr.h:148,
>                  from /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/bits/std_mutex.h:41,
>                  from /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:41:
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:80:3: note: candidate: 'int std::__gthrw_pthread_rwlock_rdlock(pthread_rwlock**)'
>    80 |   _GLIBCXX_GTHRW(rwlock_rdlock)
>       |   ^~~~~~~~~~~~~~
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0/bits/gthr-default.h:140:1: note: candidate: 'int __gthrw_pthread_rwlock_rdlock(pthread_rwlock**)'
>   140 | __gthrw(pthread_rwlock_rdlock)
>       | ^~~~~~~
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex: In function 'int std::__glibcxx_rwlock_tryrdlock(pthread_rwlock**)':
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:81:3: error: call of overloaded '__gthrw_pthread_rwlock_tryrdlock(pthread_rwlock**&)' is ambiguous
>    81 |   _GLIBCXX_GTHRW(rwlock_tryrdlock)
>       |   ^
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:81:3: note: candidate: 'int std::__gthrw_pthread_rwlock_tryrdlock(pthread_rwlock**)'
>    81 |   _GLIBCXX_GTHRW(rwlock_tryrdlock)
>       |   ^~~~~~~~~~~~~~
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0/bits/gthr-default.h:141:1: note: candidate: 'int __gthrw_pthread_rwlock_tryrdlock(pthread_rwlock**)'
>   141 | __gthrw(pthread_rwlock_tryrdlock)
>       | ^~~~~~~
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex: In function 'int std::__glibcxx_rwlock_wrlock(pthread_rwlock**)':
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:82:3: error: call of overloaded '__gthrw_pthread_rwlock_wrlock(pthread_rwlock**&)' is ambiguous
>    82 |   _GLIBCXX_GTHRW(rwlock_wrlock)
>       |   ^
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:82:3: note: candidate: 'int std::__gthrw_pthread_rwlock_wrlock(pthread_rwlock**)'
>    82 |   _GLIBCXX_GTHRW(rwlock_wrlock)
>       |   ^~~~~~~~~~~~~~
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0/bits/gthr-default.h:142:1: note: candidate: 'int __gthrw_pthread_rwlock_wrlock(pthread_rwlock**)'
>   142 | __gthrw(pthread_rwlock_wrlock)
>       | ^~~~~~~
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex: In function 'int std::__glibcxx_rwlock_trywrlock(pthread_rwlock**)':
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:83:3: error: call of overloaded '__gthrw_pthread_rwlock_trywrlock(pthread_rwlock**&)' is ambiguous
>    83 |   _GLIBCXX_GTHRW(rwlock_trywrlock)
>       |   ^
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:83:3: note: candidate: 'int std::__gthrw_pthread_rwlock_trywrlock(pthread_rwlock**)'
>    83 |   _GLIBCXX_GTHRW(rwlock_trywrlock)
>       |   ^~~~~~~~~~~~~~
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0/bits/gthr-default.h:143:1: note: candidate: 'int __gthrw_pthread_rwlock_trywrlock(pthread_rwlock**)'
>   143 | __gthrw(pthread_rwlock_trywrlock)
>       | ^~~~~~~
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex: In function 'int std::__glibcxx_rwlock_unlock(pthread_rwlock**)':
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:84:3: error: call of overloaded '__gthrw_pthread_rwlock_unlock(pthread_rwlock**&)' is ambiguous
>    84 |   _GLIBCXX_GTHRW(rwlock_unlock)
>       |   ^
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:84:3: note: candidate: 'int std::__gthrw_pthread_rwlock_unlock(pthread_rwlock**)'
>    84 |   _GLIBCXX_GTHRW(rwlock_unlock)
>       |   ^~~~~~~~~~~~~~
> /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0/bits/gthr-default.h:144:1: note: candidate: 'int __gthrw_pthread_rwlock_unlock(pthread_rwlock**)'
>   144 | __gthrw(pthread_rwlock_unlock)
>       | ^~~~~~~
> gmake[6]: *** [Makefile:585: floating_from_chars.lo] Error 1
> gmake[6]: Leaving directory '/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src/c++17'
> gmake[5]: *** [Makefile:784: all-recursive] Error 1
> gmake[5]: Leaving directory '/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src'
> gmake[4]: *** [Makefile:576: all-recursive] Error 1
> gmake[4]: Leaving directory '/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3'
> gmake[3]: *** [Makefile:501: all] Error 2
> gmake[3]: Leaving directory '/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3'
> gmake[2]: *** [Makefile:18284: all-stage1-target-libstdc++-v3] Error 2
> gmake[2]: Leaving directory '/home/kargl/gcc/obj'
> gmake[1]: *** [Makefile:26171: stage1-bubble] Error 2
> gmake[1]: Leaving directory '/home/kargl/gcc/obj'
> gmake: *** [Makefile:26524: bootstrap] Error 2
>
> --
> steve

libstdc++ implements shared mutex with pthread_rwlock, which can conflict
with the pthread_rwlock usage in libgcc.  Lipeng, please limit the
pthread_rwlock
usage in libgcc only when __cplusplus isn't defined.


-- 
H.J.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] libgfortran: Replace mutex with rwlock
  2022-12-22  0:27 Lipeng Zhu
@ 2022-12-26  0:57 ` Steve Kargl
  2022-12-27 16:33   ` H.J. Lu
  0 siblings, 1 reply; 5+ messages in thread
From: Steve Kargl @ 2022-12-26  0:57 UTC (permalink / raw)
  To: Lipeng Zhu via Fortran
  Cc: gcc-patches, hongjiu.lu, tianyou.li, pan.deng, lipeng.zhu

On Wed, Dec 21, 2022 at 07:27:11PM -0500, Lipeng Zhu via Fortran wrote:
> This patch try to introduce the rwlock and split the read/write to
> unit_root tree and unit_cache with rwlock instead of the mutex to
> increase CPU efficiency. In the get_gfc_unit function, the percentage
> to step into the insert_unit function is around 30%, in most instances,
> we can get the unit in the phase of reading the unit_cache or unit_root
> tree. So split the read/write phase by rwlock would be an approach to
> make it more parallel.
> 
> BTW, the IPC metrics can increase from 0.25 to 2.2 in the Intel
> SRP server with 220 cores. The benchmark we used is
> https://github.com/rwesson/NEAT
> 

The patch fails bootstrap on x86_64-*-freebsd.

gmake[6]: Entering directory '/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src/c++17'
/bin/sh ../../libtool --tag CXX --tag disable-shared   --mode=compile /home/kargl/gcc/obj/./gcc/xgcc -shared-libgcc -B/home/kargl/gcc/obj/./gcc -nostdinc++ -L/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src -L/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src/.libs -L/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/libsupc++/.libs -B/home/kargl/work/x86_64-unknown-freebsd14.0/bin/ -B/home/kargl/work/x86_64-unknown-freebsd14.0/lib/ -isystem /home/kargl/work/x86_64-unknown-freebsd14.0/include -isystem /home/kargl/work/x86_64-unknown-freebsd14.0/sys-include   -fno-checking -I/home/kargl/gcc/gcc/libstdc++-v3/../libgcc -I/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0 -I/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include -I/home/kargl/gcc/gcc/libstdc++-v3/libsupc++   -std=gnu++17 -nostdinc++ -prefer-pic -D_GLIBCXX_SHARED -fno-implicit-templates  -Wall -Wextra -Wwrite-strings -Wcast-qual -Wabi=2  -fdiagnostics-show-location=once   -ffunction-sections -fdata-sections  -frandom-seed=floating_from_chars.lo  -fimplicit-templates -g -O2  -c -o floating_from_chars.lo ../../../../../gcc/libstdc++-v3/src/c++17/floating_from_chars.cc
libtool: compile:  /home/kargl/gcc/obj/./gcc/xgcc -shared-libgcc -B/home/kargl/gcc/obj/./gcc -nostdinc++ -L/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src -L/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src/.libs -L/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/libsupc++/.libs -B/home/kargl/work/x86_64-unknown-freebsd14.0/bin/ -B/home/kargl/work/x86_64-unknown-freebsd14.0/lib/ -isystem /home/kargl/work/x86_64-unknown-freebsd14.0/include -isystem /home/kargl/work/x86_64-unknown-freebsd14.0/sys-include -fno-checking -I/home/kargl/gcc/gcc/libstdc++-v3/../libgcc -I/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0 -I/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include -I/home/kargl/gcc/gcc/libstdc++-v3/libsupc++ -std=gnu++17 -nostdinc++ -D_GLIBCXX_SHARED -fno-implicit-templates -Wall -Wextra -Wwrite-strings -Wcast-qual -Wabi=2 -fdiagnostics-show-location=once -ffunction-sections -fdata-sections -frandom-seed=floating_from_chars.lo -fimplicit-templates -g -O2 -c ../../../../../gcc/libstdc++-v3/src/c++17/floating_from_chars.cc  -fPIC -DPIC -D_GLIBCXX_SHARED -o floating_from_chars.o
In file included from /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/memory_resource:40,
                 from ../../../../../gcc/libstdc++-v3/src/c++17/floating_from_chars.cc:37:
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex: In function 'int std::__glibcxx_rwlock_rdlock(pthread_rwlock**)':
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:80:3: error: call of overloaded '__gthrw_pthread_rwlock_rdlock(pthread_rwlock**&)' is ambiguous
   80 |   _GLIBCXX_GTHRW(rwlock_rdlock)
      |   ^
In file included from /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0/bits/gthr.h:148,
                 from /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/bits/std_mutex.h:41,
                 from /home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:41:
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:80:3: note: candidate: 'int std::__gthrw_pthread_rwlock_rdlock(pthread_rwlock**)'
   80 |   _GLIBCXX_GTHRW(rwlock_rdlock)
      |   ^~~~~~~~~~~~~~
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0/bits/gthr-default.h:140:1: note: candidate: 'int __gthrw_pthread_rwlock_rdlock(pthread_rwlock**)'
  140 | __gthrw(pthread_rwlock_rdlock)
      | ^~~~~~~
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex: In function 'int std::__glibcxx_rwlock_tryrdlock(pthread_rwlock**)':
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:81:3: error: call of overloaded '__gthrw_pthread_rwlock_tryrdlock(pthread_rwlock**&)' is ambiguous
   81 |   _GLIBCXX_GTHRW(rwlock_tryrdlock)
      |   ^
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:81:3: note: candidate: 'int std::__gthrw_pthread_rwlock_tryrdlock(pthread_rwlock**)'
   81 |   _GLIBCXX_GTHRW(rwlock_tryrdlock)
      |   ^~~~~~~~~~~~~~
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0/bits/gthr-default.h:141:1: note: candidate: 'int __gthrw_pthread_rwlock_tryrdlock(pthread_rwlock**)'
  141 | __gthrw(pthread_rwlock_tryrdlock)
      | ^~~~~~~
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex: In function 'int std::__glibcxx_rwlock_wrlock(pthread_rwlock**)':
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:82:3: error: call of overloaded '__gthrw_pthread_rwlock_wrlock(pthread_rwlock**&)' is ambiguous
   82 |   _GLIBCXX_GTHRW(rwlock_wrlock)
      |   ^
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:82:3: note: candidate: 'int std::__gthrw_pthread_rwlock_wrlock(pthread_rwlock**)'
   82 |   _GLIBCXX_GTHRW(rwlock_wrlock)
      |   ^~~~~~~~~~~~~~
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0/bits/gthr-default.h:142:1: note: candidate: 'int __gthrw_pthread_rwlock_wrlock(pthread_rwlock**)'
  142 | __gthrw(pthread_rwlock_wrlock)
      | ^~~~~~~
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex: In function 'int std::__glibcxx_rwlock_trywrlock(pthread_rwlock**)':
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:83:3: error: call of overloaded '__gthrw_pthread_rwlock_trywrlock(pthread_rwlock**&)' is ambiguous
   83 |   _GLIBCXX_GTHRW(rwlock_trywrlock)
      |   ^
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:83:3: note: candidate: 'int std::__gthrw_pthread_rwlock_trywrlock(pthread_rwlock**)'
   83 |   _GLIBCXX_GTHRW(rwlock_trywrlock)
      |   ^~~~~~~~~~~~~~
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0/bits/gthr-default.h:143:1: note: candidate: 'int __gthrw_pthread_rwlock_trywrlock(pthread_rwlock**)'
  143 | __gthrw(pthread_rwlock_trywrlock)
      | ^~~~~~~
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex: In function 'int std::__glibcxx_rwlock_unlock(pthread_rwlock**)':
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:84:3: error: call of overloaded '__gthrw_pthread_rwlock_unlock(pthread_rwlock**&)' is ambiguous
   84 |   _GLIBCXX_GTHRW(rwlock_unlock)
      |   ^
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/shared_mutex:84:3: note: candidate: 'int std::__gthrw_pthread_rwlock_unlock(pthread_rwlock**)'
   84 |   _GLIBCXX_GTHRW(rwlock_unlock)
      |   ^~~~~~~~~~~~~~
/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/include/x86_64-unknown-freebsd14.0/bits/gthr-default.h:144:1: note: candidate: 'int __gthrw_pthread_rwlock_unlock(pthread_rwlock**)'
  144 | __gthrw(pthread_rwlock_unlock)
      | ^~~~~~~
gmake[6]: *** [Makefile:585: floating_from_chars.lo] Error 1
gmake[6]: Leaving directory '/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src/c++17'
gmake[5]: *** [Makefile:784: all-recursive] Error 1
gmake[5]: Leaving directory '/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3/src'
gmake[4]: *** [Makefile:576: all-recursive] Error 1
gmake[4]: Leaving directory '/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3'
gmake[3]: *** [Makefile:501: all] Error 2
gmake[3]: Leaving directory '/home/kargl/gcc/obj/x86_64-unknown-freebsd14.0/libstdc++-v3'
gmake[2]: *** [Makefile:18284: all-stage1-target-libstdc++-v3] Error 2
gmake[2]: Leaving directory '/home/kargl/gcc/obj'
gmake[1]: *** [Makefile:26171: stage1-bubble] Error 2
gmake[1]: Leaving directory '/home/kargl/gcc/obj'
gmake: *** [Makefile:26524: bootstrap] Error 2

--
steve

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] libgfortran: Replace mutex with rwlock
@ 2022-12-22  0:27 Lipeng Zhu
  2022-12-26  0:57 ` Steve Kargl
  0 siblings, 1 reply; 5+ messages in thread
From: Lipeng Zhu @ 2022-12-22  0:27 UTC (permalink / raw)
  To: fortran, gcc-patches; +Cc: hongjiu.lu, tianyou.li, pan.deng, lipeng.zhu

This patch try to introduce the rwlock and split the read/write to
unit_root tree and unit_cache with rwlock instead of the mutex to
increase CPU efficiency. In the get_gfc_unit function, the percentage
to step into the insert_unit function is around 30%, in most instances,
we can get the unit in the phase of reading the unit_cache or unit_root
tree. So split the read/write phase by rwlock would be an approach to
make it more parallel.

BTW, the IPC metrics can increase from 0.25 to 2.2 in the Intel
SRP server with 220 cores. The benchmark we used is
https://github.com/rwesson/NEAT

libgcc/ChangeLog:

    * gthr-posix.h (__GTHREAD_RWLOCK_INIT): New macro
    (__gthrw): New function
    (__gthread_rwlock_rdlock): New function
    (__gthread_rwlock_tryrdlock): New function
    (__gthread_rwlock_wrlock): New function
    (__gthread_rwlock_trywrlock): New function
    (__gthread_rwlock_unlock): New function

libgfortran/ChangeLog:

    * io/async.c (DEBUG_LINE): New
    * io/async.h (RWLOCK_DEBUG_ADD): New macro
    (CHECK_RDLOCK): New macro
    (CHECK_WRLOCK): New macro
    (TAIL_RWLOCK_DEBUG_QUEUE): New macro
    (IN_RWLOCK_DEBUG_QUEUE): New macro
    (RDLOCK): New macro
    (WRLOCK): New macro
    (RWUNLOCK): New macro
    (RD_TO_WRLOCK): New macro
    (INTERN_RDLOCK): New macro
    (INTERN_WRLOCK): New macro
    (INTERN_RWUNLOCK): New macro
    * io/io.h (internal_proto): Define unit_rwlock
    * io/transfer.c (st_read_done_worker): Relace unit_lock with unit_rwlock
    (st_write_done_worker): Relace unit_lock with unit_rwlock
    * io/unit.c (get_gfc_unit): Relace unit_lock with unit_rwlock
    (if): Relace unit_lock with unit_rwlock
    (close_unit_1): Relace unit_lock with unit_rwlock
    (close_units): Relace unit_lock with unit_rwlock
    (newunit_alloc): Relace unit_lock with unit_rwlock
    * io/unix.c (flush_all_units): Relace unit_lock with unit_rwlock
---
 libgcc/gthr-posix.h       |  52 +++++++++++++
 libgfortran/io/async.c    |   4 +
 libgfortran/io/async.h    | 151 ++++++++++++++++++++++++++++++++++++++
 libgfortran/io/io.h       |  15 ++--
 libgfortran/io/transfer.c |   8 +-
 libgfortran/io/unit.c     |  65 ++++++++--------
 libgfortran/io/unix.c     |  16 ++--
 7 files changed, 265 insertions(+), 46 deletions(-)

diff --git a/libgcc/gthr-posix.h b/libgcc/gthr-posix.h
index f1a5ab8e075..358948e8ae8 100644
--- a/libgcc/gthr-posix.h
+++ b/libgcc/gthr-posix.h
@@ -48,6 +48,7 @@ typedef pthread_t __gthread_t;
 typedef pthread_key_t __gthread_key_t;
 typedef pthread_once_t __gthread_once_t;
 typedef pthread_mutex_t __gthread_mutex_t;
+typedef pthread_rwlock_t __gthread_rwlock_t;
 typedef pthread_mutex_t __gthread_recursive_mutex_t;
 typedef pthread_cond_t __gthread_cond_t;
 typedef struct timespec __gthread_time_t;
@@ -58,6 +59,7 @@ typedef struct timespec __gthread_time_t;
 
 #define __GTHREAD_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER
 #define __GTHREAD_MUTEX_INIT_FUNCTION __gthread_mutex_init_function
+#define __GTHREAD_RWLOCK_INIT PTHREAD_RWLOCK_INITIALIZER
 #define __GTHREAD_ONCE_INIT PTHREAD_ONCE_INIT
 #if defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER)
 #define __GTHREAD_RECURSIVE_MUTEX_INIT PTHREAD_RECURSIVE_MUTEX_INITIALIZER
@@ -135,6 +137,11 @@ __gthrw(pthread_mutexattr_init)
 __gthrw(pthread_mutexattr_settype)
 __gthrw(pthread_mutexattr_destroy)
 
+__gthrw(pthread_rwlock_rdlock)
+__gthrw(pthread_rwlock_tryrdlock)
+__gthrw(pthread_rwlock_wrlock)
+__gthrw(pthread_rwlock_trywrlock)
+__gthrw(pthread_rwlock_unlock)
 
 #if defined(_LIBOBJC) || defined(_LIBOBJC_WEAK)
 /* Objective-C.  */
@@ -885,6 +892,51 @@ __gthread_cond_destroy (__gthread_cond_t* __cond)
   return __gthrw_(pthread_cond_destroy) (__cond);
 }
 
+static inline int
+__gthread_rwlock_rdlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+    return __gthrw_(pthread_rwlock_rdlock) (__rwlock);
+  else
+    return 0;
+}
+
+static inline int
+__gthread_rwlock_tryrdlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+    return __gthrw_(pthread_rwlock_tryrdlock) (__rwlock);
+  else
+    return 0;
+}
+
+static inline int
+__gthread_rwlock_wrlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+    return __gthrw_(pthread_rwlock_wrlock) (__rwlock);
+  else
+    return 0;
+}
+
+static inline int
+__gthread_rwlock_trywrlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+    return __gthrw_(pthread_rwlock_trywrlock) (__rwlock);
+  else
+    return 0;
+}
+
+static inline int
+__gthread_rwlock_unlock (__gthread_rwlock_t *__rwlock)
+{
+  if (__gthread_active_p ())
+    return __gthrw_(pthread_rwlock_unlock) (__rwlock);
+  else
+    return 0;
+}
+
 #endif /* _LIBOBJC */
 
 #endif /* ! GCC_GTHR_POSIX_H */
diff --git a/libgfortran/io/async.c b/libgfortran/io/async.c
index 912b39ea302..f0bde979da4 100644
--- a/libgfortran/io/async.c
+++ b/libgfortran/io/async.c
@@ -42,6 +42,10 @@ DEBUG_LINE (__thread const char *aio_prefix = MPREFIX);
 
 DEBUG_LINE (__gthread_mutex_t debug_queue_lock = __GTHREAD_MUTEX_INIT;)
 DEBUG_LINE (aio_lock_debug *aio_debug_head = NULL;)
+#ifdef __GTHREAD_RWLOCK_INIT
+DEBUG_LINE (aio_rwlock_debug *aio_rwlock_debug_head = NULL;)
+DEBUG_LINE (__gthread_rwlock_t debug_queue_rwlock = __GTHREAD_RWLOCK_INIT;)
+#endif
 
 /* Current unit for asynchronous I/O.  Needed for error reporting.  */
 
diff --git a/libgfortran/io/async.h b/libgfortran/io/async.h
index d57722a95e4..e54b65295fc 100644
--- a/libgfortran/io/async.h
+++ b/libgfortran/io/async.h
@@ -210,6 +210,128 @@
     DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", aio_prefix, #mutex, mutex); \
   } while (0)
 
+#ifdef __GTHREAD_RWLOCK_INIT
+#define RWLOCK_DEBUG_ADD(rwlock) do {		\
+    aio_rwlock_debug *n;				\
+    n = malloc (sizeof(aio_rwlock_debug));	\
+    n->prev = TAIL_RWLOCK_DEBUG_QUEUE;			\
+    if (n->prev)				\
+      n->prev->next = n;			\
+    n->next = NULL;				\
+    n->line = __LINE__;				\
+    n->func = __FUNCTION__;			\
+    n->rw = rwlock;				\
+    if (!aio_rwlock_debug_head) {			\
+      aio_rwlock_debug_head = n;			\
+    }						\
+  } while (0)
+
+#define CHECK_RDLOCK(rwlock, status) do {					\
+    aio_rwlock_debug *curr;						\
+    INTERN_WRLOCK (&debug_queue_rwlock);					\
+    if (__gthread_rwlock_tryrdlock (rwlock)) {				\
+      if ((curr = IN_RWLOCK_DEBUG_QUEUE (rwlock))) {				\
+	sprintf (status, DEBUG_RED "%s():%d" DEBUG_NORM, curr->func, curr->line); \
+      } else								\
+	sprintf (status, DEBUG_RED "unknown" DEBUG_NORM);			\
+    }									\
+    else {								\
+      __gthread_rwlock_unlock (rwlock);					\
+      sprintf (status, DEBUG_GREEN "rwunlocked" DEBUG_NORM);			\
+    }									\
+    INTERN_RWUNLOCK (&debug_queue_rwlock);					\
+  }while (0)
+
+#define CHECK_WRLOCK(rwlock, status) do {					\
+    aio_rwlock_debug *curr;						\
+    INTERN_WRLOCK (&debug_queue_rwlock);					\
+    if (__gthread_rwlock_trywrlock (rwlock)) {				\
+      if ((curr = IN_RWLOCK_DEBUG_QUEUE (rwlock))) {				\
+	sprintf (status, DEBUG_RED "%s():%d" DEBUG_NORM, curr->func, curr->line); \
+      } else								\
+	sprintf (status, DEBUG_RED "unknown" DEBUG_NORM);			\
+    }									\
+    else {								\
+      __gthread_rwlock_unlock (rwlock);					\
+      sprintf (status, DEBUG_GREEN "rwunlocked" DEBUG_NORM);			\
+    }									\
+    INTERN_RWUNLOCK (&debug_queue_rwlock);					\
+  }while (0)
+
+#define TAIL_RWLOCK_DEBUG_QUEUE ({			\
+      aio_rwlock_debug *curr = aio_rwlock_debug_head;	\
+      while (curr && curr->next) {		\
+	curr = curr->next;			\
+      }						\
+      curr;					\
+    })
+
+#define IN_RWLOCK_DEBUG_QUEUE(rwlock) ({		\
+      __label__ end;				\
+      aio_rwlock_debug *curr = aio_rwlock_debug_head;	\
+      while (curr) {				\
+	if (curr->rw == rwlock) {			\
+	  goto end;				\
+	}					\
+	curr = curr->next;			\
+      }						\
+    end:;					\
+      curr;					\
+    })
+
+#define RDLOCK(rwlock) do {						\
+    char status[200];							\
+    CHECK_RDLOCK (rwlock, status);						\
+    DEBUG_PRINTF ("%s%-42s prev: %-35s %20s():%-5d %18p\n", aio_prefix,	\
+		 DEBUG_RED "RDLOCK: " DEBUG_NORM #rwlock, status, __FUNCTION__, __LINE__, (void *) rwlock); \
+    INTERN_RDLOCK (rwlock);							\
+    INTERN_WRLOCK (&debug_queue_rwlock);					\
+    RWLOCK_DEBUG_ADD (rwlock);						\
+    INTERN_RWUNLOCK (&debug_queue_rwlock);					\
+    DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", aio_prefix, #rwlock, rwlock); \
+  } while (0)
+
+#define WRLOCK(rwlock) do {						\
+    char status[200];							\
+    CHECK_WRLOCK (rwlock, status);						\
+    DEBUG_PRINTF ("%s%-42s prev: %-35s %20s():%-5d %18p\n", aio_prefix,	\
+		 DEBUG_RED "WRLOCK: " DEBUG_NORM #rwlock, status, __FUNCTION__, __LINE__, (void *) rwlock); \
+    INTERN_WRLOCK (rwlock);							\
+    INTERN_WRLOCK (&debug_queue_rwlock);					\
+    RWLOCK_DEBUG_ADD (rwlock);						\
+    INTERN_RWUNLOCK (&debug_queue_rwlock);					\
+    DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", aio_prefix, #rwlock, rwlock); \
+  } while (0)
+
+#define RWUNLOCK(rwlock) do {						\
+    aio_rwlock_debug *curr;						\
+    DEBUG_PRINTF ("%s%-75s %20s():%-5d %18p\n", aio_prefix, DEBUG_GREEN "RWUNLOCK: " DEBUG_NORM #rwlock, \
+		 __FUNCTION__, __LINE__, (void *) rwlock);		\
+    INTERN_WRLOCK (&debug_queue_rwlock);					\
+    curr = IN_RWLOCK_DEBUG_QUEUE (rwlock);					\
+    if (curr)								\
+      {									\
+	if (curr->prev)							\
+	  curr->prev->next = curr->next;				\
+	if (curr->next) {						\
+	  curr->next->prev = curr->prev;				\
+	  if (curr == aio_rwlock_debug_head)					\
+	    aio_rwlock_debug_head = curr->next;				\
+	} else {							\
+	  if (curr == aio_rwlock_debug_head)					\
+	    aio_rwlock_debug_head = NULL;					\
+	}								\
+	free (curr);							\
+      }									\
+    INTERN_RWUNLOCK (&debug_queue_rwlock);					\
+    INTERN_RWUNLOCK (rwlock);						\
+  } while (0)
+
+#define RD_TO_WRLOCK(rwlock) \
+  RWUNLOCK (rwlock);\
+  WRLOCK (rwlock);
+#endif
+
 #define DEBUG_LINE(...) __VA_ARGS__
 
 #else
@@ -221,12 +343,31 @@
 #define LOCK(mutex) INTERN_LOCK (mutex)
 #define UNLOCK(mutex) INTERN_UNLOCK (mutex)
 #define TRYLOCK(mutex) (__gthread_mutex_trylock (mutex))
+#ifdef __GTHREAD_RWLOCK_INIT
+#define RDLOCK(rwlock) INTERN_RDLOCK (rwlock)
+#define WRLOCK(rwlock) INTERN_WRLOCK (rwlock)
+#define RWUNLOCK(rwlock) INTERN_RWUNLOCK (rwlock)
+#define RD_TO_WRLOCK(rwlock) \
+  RWUNLOCK (rwlock);\
+  WRLOCK (rwlock);
+#endif
+#endif
+
+#ifndef __GTHREAD_RWLOCK_INIT
+#define RDLOCK(rwlock) LOCK (rwlock)
+#define WRLOCK(rwlock) LOCK (rwlock)
+#define RWUNLOCK(rwlock) UNLOCK (rwlock)
+#define RD_TO_WRLOCK(rwlock) {}
 #endif
 
 #define INTERN_LOCK(mutex) T_ERROR (__gthread_mutex_lock, mutex);
 
 #define INTERN_UNLOCK(mutex) T_ERROR (__gthread_mutex_unlock, mutex);
 
+#define INTERN_RDLOCK(rwlock) T_ERROR (__gthread_rwlock_rdlock, rwlock);
+#define INTERN_WRLOCK(rwlock) T_ERROR (__gthread_rwlock_wrlock, rwlock);
+#define INTERN_RWUNLOCK(rwlock) T_ERROR (__gthread_rwlock_unlock, rwlock);
+
 #if ASYNC_IO
 
 /* au->lock has to be held when calling this macro.  */
@@ -288,8 +429,18 @@ DEBUG_LINE (typedef struct aio_lock_debug{
   struct aio_lock_debug *prev;
 } aio_lock_debug;)
 
+DEBUG_LINE (typedef struct aio_rwlock_debug{
+  __gthread_rwlock_t *rw;
+  int line;
+  const char *func;
+  struct aio_rwlock_debug *next;
+  struct aio_rwlock_debug *prev;
+} aio_rwlock_debug;)
+
 DEBUG_LINE (extern aio_lock_debug *aio_debug_head;)
 DEBUG_LINE (extern __gthread_mutex_t debug_queue_lock;)
+DEBUG_LINE (extern aio_rwlock_debug *aio_rwlock_debug_head;)
+DEBUG_LINE (extern __gthread_rwlock_t debug_queue_rwlock;)
 
 /* Thread - local storage of the current unit we are looking at. Needed for
    error reporting.  */
diff --git a/libgfortran/io/io.h b/libgfortran/io/io.h
index 23f63d4593c..6f9e42c64fc 100644
--- a/libgfortran/io/io.h
+++ b/libgfortran/io/io.h
@@ -690,7 +690,7 @@ typedef struct gfc_unit
      from the UNIT_ROOT tree, but doesn't free it and the
      last of the waiting threads will do that.
      This must be either atomically increased/decreased, or
-     always guarded by UNIT_LOCK.  */
+     always guarded by UNIT_RWLOCK.  */
   int waiting;
   /* Flag set by close_unit if the unit as been closed.
      Must be manipulated under unit's lock.  */
@@ -769,8 +769,13 @@ internal_proto(default_recl);
 extern gfc_unit *unit_root;
 internal_proto(unit_root);
 
-extern __gthread_mutex_t unit_lock;
-internal_proto(unit_lock);
+#ifdef __GTHREAD_RWLOCK_INIT
+extern __gthread_rwlock_t unit_rwlock;
+internal_proto(unit_rwlock);
+#else
+extern __gthread_mutex_t unit_rwlock;
+internal_proto(unit_rwlock);
+#endif
 
 extern int close_unit (gfc_unit *);
 internal_proto(close_unit);
@@ -1015,9 +1020,9 @@ dec_waiting_unlocked (gfc_unit *u)
 #ifdef HAVE_ATOMIC_FETCH_ADD
   (void) __atomic_fetch_add (&u->waiting, -1, __ATOMIC_RELAXED);
 #else
-  __gthread_mutex_lock (&unit_lock);
+  WRLOCK (&unit_rwlock);
   u->waiting--;
-  __gthread_mutex_unlock (&unit_lock);
+  RWUNLOCK (&unit_rwlock);
 #endif
 }
 
diff --git a/libgfortran/io/transfer.c b/libgfortran/io/transfer.c
index 2760929a1e9..5fcdab57537 100644
--- a/libgfortran/io/transfer.c
+++ b/libgfortran/io/transfer.c
@@ -4539,9 +4539,9 @@ st_read_done_worker (st_parameter_dt *dtp, bool unlock)
    if (free_newunit)
      {
        /* Avoid inverse lock issues by placing after unlock_unit.  */
-       LOCK (&unit_lock);
+       WRLOCK (&unit_rwlock);
        newunit_free (dtp->common.unit);
-       UNLOCK (&unit_lock);
+       RWUNLOCK (&unit_rwlock);
      }
 }
 
@@ -4636,9 +4636,9 @@ st_write_done_worker (st_parameter_dt *dtp, bool unlock)
    if (free_newunit)
      {
        /* Avoid inverse lock issues by placing after unlock_unit.  */
-       LOCK (&unit_lock);
+       WRLOCK (&unit_rwlock);
        newunit_free (dtp->common.unit);
-       UNLOCK (&unit_lock);
+       RWUNLOCK (&unit_rwlock);
      }
 }
 
diff --git a/libgfortran/io/unit.c b/libgfortran/io/unit.c
index 4d32e361a21..1565069139f 100644
--- a/libgfortran/io/unit.c
+++ b/libgfortran/io/unit.c
@@ -33,34 +33,36 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 
 /* IO locking rules:
-   UNIT_LOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE.
+   UNIT_RWLOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE.
+   And use the rwlock to spilt read and write phase to UNIT_ROOT tree
+   and UNIT_CACHE to increase CPU efficiency.
    Concurrent use of different units should be supported, so
    each unit has its own lock, LOCK.
    Open should be atomic with its reopening of units and list_read.c
    in several places needs find_unit another unit while holding stdin
-   unit's lock, so it must be possible to acquire UNIT_LOCK while holding
+   unit's lock, so it must be possible to acquire UNIT_RWLOCK while holding
    some unit's lock.  Therefore to avoid deadlocks, it is forbidden
-   to acquire unit's private locks while holding UNIT_LOCK, except
+   to acquire unit's private locks while holding UNIT_RWLOCK, except
    for freshly created units (where no other thread can get at their
    address yet) or when using just trylock rather than lock operation.
    In addition to unit's private lock each unit has a WAITERS counter
    and CLOSED flag.  WAITERS counter must be either only
    atomically incremented/decremented in all places (if atomic builtins
-   are supported), or protected by UNIT_LOCK in all places (otherwise).
+   are supported), or protected by UNIT_RWLOCK in all places (otherwise).
    CLOSED flag must be always protected by unit's LOCK.
-   After finding a unit in UNIT_CACHE or UNIT_ROOT with UNIT_LOCK held,
+   After finding a unit in UNIT_CACHE or UNIT_ROOT with UNIT_RWLOCK held,
    WAITERS must be incremented to avoid concurrent close from freeing
-   the unit between unlocking UNIT_LOCK and acquiring unit's LOCK.
-   Unit freeing is always done under UNIT_LOCK.  If close_unit sees any
+   the unit between unlocking UNIT_RWLOCK and acquiring unit's LOCK.
+   Unit freeing is always done under UNIT_RWLOCK.  If close_unit sees any
    WAITERS, it doesn't free the unit but instead sets the CLOSED flag
    and the thread that decrements WAITERS to zero while CLOSED flag is
-   set is responsible for freeing it (while holding UNIT_LOCK).
+   set is responsible for freeing it (while holding UNIT_RWLOCK).
    flush_all_units operation is iterating over the unit tree with
-   increasing UNIT_NUMBER while holding UNIT_LOCK and attempting to
+   increasing UNIT_NUMBER while holding UNIT_RWLOCK and attempting to
    flush each unit (and therefore needs the unit's LOCK held as well).
    To avoid deadlocks, it just trylocks the LOCK and if unsuccessful,
-   remembers the current unit's UNIT_NUMBER, unlocks UNIT_LOCK, acquires
-   unit's LOCK and after flushing reacquires UNIT_LOCK and restarts with
+   remembers the current unit's UNIT_NUMBER, unlocks UNIT_RWLOCK, acquires
+   unit's LOCK and after flushing reacquires UNIT_RWLOCK and restarts with
    the smallest UNIT_NUMBER above the last one flushed.
 
    If find_unit/find_or_create_unit/find_file/get_unit routines return
@@ -101,10 +103,14 @@ gfc_offset max_offset;
 gfc_offset default_recl;
 
 gfc_unit *unit_root;
+#ifdef __GTHREAD_RWLOCK_INIT
+__gthread_rwlock_t unit_rwlock = __GTHREAD_RWLOCK_INIT;
+#else
 #ifdef __GTHREAD_MUTEX_INIT
-__gthread_mutex_t unit_lock = __GTHREAD_MUTEX_INIT;
+__gthread_mutex_t unit_rwlock = __GTHREAD_MUTEX_INIT;
 #else
-__gthread_mutex_t unit_lock;
+__gthread_mutex_t unit_rwlock;
+#endif
 #endif
 
 /* We use these filenames for error reporting.  */
@@ -329,7 +335,7 @@ get_gfc_unit (int n, int do_create)
   int c, created = 0;
 
   NOTE ("Unit n=%d, do_create = %d", n, do_create);
-  LOCK (&unit_lock);
+  RDLOCK (&unit_rwlock);
 
 retry:
   for (c = 0; c < CACHE_SIZE; c++)
@@ -350,6 +356,7 @@ retry:
       if (c == 0)
 	break;
     }
+  RD_TO_WRLOCK (&unit_rwlock);
 
   if (p == NULL && do_create)
     {
@@ -368,8 +375,8 @@ retry:
   if (created)
     {
       /* Newly created units have their lock held already
-	 from insert_unit.  Just unlock UNIT_LOCK and return.  */
-      UNLOCK (&unit_lock);
+	 from insert_unit.  Just unlock UNIT_RWLOCK and return.  */
+      RWUNLOCK (&unit_rwlock);
       return p;
     }
 
@@ -380,7 +387,7 @@ found:
       if (! TRYLOCK (&p->lock))
 	{
 	  /* assert (p->closed == 0); */
-	  UNLOCK (&unit_lock);
+	  RWUNLOCK (&unit_rwlock);
 	  return p;
 	}
 
@@ -388,14 +395,14 @@ found:
     }
 
 
-  UNLOCK (&unit_lock);
+  RWUNLOCK (&unit_rwlock);
 
   if (p != NULL && (p->child_dtio == 0))
     {
       LOCK (&p->lock);
       if (p->closed)
 	{
-	  LOCK (&unit_lock);
+	  WRLOCK (&unit_rwlock);
 	  UNLOCK (&p->lock);
 	  if (predec_waiting_locked (p) == 0)
 	    destroy_unit_mutex (p);
@@ -593,8 +600,8 @@ init_units (void)
 #endif
 #endif
 
-#ifndef __GTHREAD_MUTEX_INIT
-  __GTHREAD_MUTEX_INIT_FUNCTION (&unit_lock);
+#if (!defined(__GTHREAD_RWLOCK_INIT) && !defined(__GTHREAD_MUTEX_INIT))
+  __GTHREAD_MUTEX_INIT_FUNCTION (&unit_rwlock);
 #endif
 
   if (sizeof (max_offset) == 8)
@@ -731,7 +738,7 @@ close_unit_1 (gfc_unit *u, int locked)
 
   u->closed = 1;
   if (!locked)
-    LOCK (&unit_lock);
+    WRLOCK (&unit_rwlock);
 
   for (i = 0; i < CACHE_SIZE; i++)
     if (unit_cache[i] == u)
@@ -758,7 +765,7 @@ close_unit_1 (gfc_unit *u, int locked)
     destroy_unit_mutex (u);
 
   if (!locked)
-    UNLOCK (&unit_lock);
+    RWUNLOCK (&unit_rwlock);
 
   return rc;
 }
@@ -795,10 +802,10 @@ close_unit (gfc_unit *u)
 void
 close_units (void)
 {
-  LOCK (&unit_lock);
+  WRLOCK (&unit_rwlock);
   while (unit_root != NULL)
     close_unit_1 (unit_root, 1);
-  UNLOCK (&unit_lock);
+  RWUNLOCK (&unit_rwlock);
 
   free (newunits);
 
@@ -905,7 +912,7 @@ finish_last_advance_record (gfc_unit *u)
 int
 newunit_alloc (void)
 {
-  LOCK (&unit_lock);
+  WRLOCK (&unit_rwlock);
   if (!newunits)
     {
       newunits = xcalloc (16, 1);
@@ -919,7 +926,7 @@ newunit_alloc (void)
         {
           newunits[ii] = true;
           newunit_lwi = ii + 1;
-	  UNLOCK (&unit_lock);
+	  RWUNLOCK (&unit_rwlock);
           return -ii + NEWUNIT_START;
         }
     }
@@ -932,12 +939,12 @@ newunit_alloc (void)
   memset (newunits + old_size, 0, old_size);
   newunits[old_size] = true;
   newunit_lwi = old_size + 1;
-    UNLOCK (&unit_lock);
+    RWUNLOCK (&unit_rwlock);
   return -old_size + NEWUNIT_START;
 }
 
 
-/* Free a previously allocated newunit= unit number.  unit_lock must
+/* Free a previously allocated newunit= unit number.  unit_rwlock must
    be held when calling.  */
 
 void
diff --git a/libgfortran/io/unix.c b/libgfortran/io/unix.c
index 616c1aab166..38eef76a8db 100644
--- a/libgfortran/io/unix.c
+++ b/libgfortran/io/unix.c
@@ -1774,7 +1774,7 @@ find_file (const char *file, gfc_charlen_type file_len)
   id = id_from_path (path);
 #endif
 
-  LOCK (&unit_lock);
+  RDLOCK (&unit_rwlock);
 retry:
   u = find_file0 (unit_root, FIND_FILE0_ARGS);
   if (u != NULL)
@@ -1783,19 +1783,19 @@ retry:
       if (! __gthread_mutex_trylock (&u->lock))
 	{
 	  /* assert (u->closed == 0); */
-	  UNLOCK (&unit_lock);
+	  RWUNLOCK (&unit_rwlock);
 	  goto done;
 	}
 
       inc_waiting_locked (u);
     }
-  UNLOCK (&unit_lock);
+  RWUNLOCK (&unit_rwlock);
   if (u != NULL)
     {
       LOCK (&u->lock);
       if (u->closed)
 	{
-	  LOCK (&unit_lock);
+	  RDLOCK (&unit_rwlock);
 	  UNLOCK (&u->lock);
 	  if (predec_waiting_locked (u) == 0)
 	    free (u);
@@ -1839,13 +1839,13 @@ flush_all_units (void)
   gfc_unit *u;
   int min_unit = 0;
 
-  LOCK (&unit_lock);
+  WRLOCK (&unit_rwlock);
   do
     {
       u = flush_all_units_1 (unit_root, min_unit);
       if (u != NULL)
 	inc_waiting_locked (u);
-      UNLOCK (&unit_lock);
+      RWUNLOCK (&unit_rwlock);
       if (u == NULL)
 	return;
 
@@ -1856,13 +1856,13 @@ flush_all_units (void)
       if (u->closed == 0)
 	{
 	  sflush (u->s);
-	  LOCK (&unit_lock);
+	  WRLOCK (&unit_rwlock);
 	  UNLOCK (&u->lock);
 	  (void) predec_waiting_locked (u);
 	}
       else
 	{
-	  LOCK (&unit_lock);
+	  WRLOCK (&unit_rwlock);
 	  UNLOCK (&u->lock);
 	  if (predec_waiting_locked (u) == 0)
 	    free (u);
-- 
2.31.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-12-28 10:01 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-22  2:19 [PATCH] libgfortran: Replace mutex with rwlock Lipeng Zhu
  -- strict thread matches above, loose matches on Subject: below --
2022-12-22  0:27 Lipeng Zhu
2022-12-26  0:57 ` Steve Kargl
2022-12-27 16:33   ` H.J. Lu
2022-12-28 10:00     ` Zhu, Lipeng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).