public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [patch, libfortran] Fix race condition in asynchronous I/O
@ 2020-02-17 22:02 Thomas Koenig
  2020-02-18 20:02 ` Thomas Koenig
  0 siblings, 1 reply; 2+ messages in thread
From: Thomas Koenig @ 2020-02-17 22:02 UTC (permalink / raw)
  To: fortran, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1719 bytes --]

Hello world,

as you can see from the PR, async_io_4.f90 exhibited a race condition
every few thousands to tens of thousands of cases.

This was difficult to track down, and took some thinking, discussion
with Nicolas and enabling of the debugging feature of the async I/O,
which finally produced a failure which could then be analyzed.

The solution was to remove the mutexes for the different conditions that
can happen in the async library, relying on the lock that protects the
async I/O queue instead.

Tested by Bill Seurer (thanks!) with a previous version of the patch,
and with valgrind --tool=helgrind and valgrind --tool=drd, both of
which showed no (new) failures for all of the async_io_*.f90 tests.

So, OK for trunk and gcc-9?

Regards

	Thomas

2020-02-17  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/93599
	* io/async.c (destroy_adv_cond): Do not destroy lock.
	(async_io): Make sure au->lock is locked for finishing of thread.
	Do not lock/unlock around signalling emptysignal. Unlock au->lock
	before return.
	(init_adv_cond): Do not initialize lock.
	(enqueue_transfer): Unlock after signal.
	(enqueue_done_id): Likewise.
	(enqueue_done): Likewise.
	(enqueue_close): Likewise.
	(enqueue_data_transfer): Likewise.
	(async_wait_id): Do not lock/unlock around signalling au->work.
	(async_wait): Unlock after signal.
	* io/async.h (SIGNAL): Add comment about needed au->lock.
	Remove locking/unlocking of advcond->lock.
	(WAIT_SIGNAL_MUTEX): Add comment. Remove locking/unlocking of
	advcond->lock.  Unlock mutex only at the end.  Loop on
	__ghread_cond_wait returning zero.
	(REVOKE_SIGNAL): Add comment. Remove locking/unlocking of
	advcond->lock.
	(struct adv_cond): Remove mutex from struct.

[-- Attachment #2: p5.diff --]
[-- Type: text/x-patch, Size: 5845 bytes --]

diff --git a/libgfortran/io/async.c b/libgfortran/io/async.c
index ab214af8a66..63b9158c0ba 100644
--- a/libgfortran/io/async.c
+++ b/libgfortran/io/async.c
@@ -80,7 +80,6 @@ update_pdt (st_parameter_dt **old, st_parameter_dt *new) {
 static void
 destroy_adv_cond (struct adv_cond *ac)
 {
-  T_ERROR (__gthread_mutex_destroy, &ac->lock);
   T_ERROR (__gthread_cond_destroy, &ac->signal);
 }
 
@@ -156,6 +155,7 @@ async_io (void *arg)
 
 		case AIO_CLOSE:
 		  NOTE ("Received AIO_CLOSE");
+		  LOCK (&au->lock);
 		  goto finish_thread;
 
 		default:
@@ -175,7 +175,6 @@ async_io (void *arg)
 	      else if (ctq->type == AIO_CLOSE)
 		{
 		  NOTE ("Received AIO_CLOSE during error condition");
-		  UNLOCK (&au->lock);
 		  goto finish_thread;
 		}
 	    }
@@ -189,9 +188,7 @@ async_io (void *arg)
       au->tail = NULL;
       au->head = NULL;
       au->empty = 1;
-      UNLOCK (&au->lock);
       SIGNAL (&au->emptysignal);
-      LOCK (&au->lock);
     }
  finish_thread:
   au->tail = NULL;
@@ -199,6 +196,7 @@ async_io (void *arg)
   au->empty = 1;
   SIGNAL (&au->emptysignal);
   free (ctq);
+  UNLOCK (&au->lock);
   return NULL;
 }
 
@@ -223,7 +221,6 @@ static void
 init_adv_cond (struct adv_cond *ac)
 {
   ac->pending = 0;
-  __GTHREAD_MUTEX_INIT_FUNCTION (&ac->lock);
   __GTHREAD_COND_INIT_FUNCTION (&ac->signal);
 }
 
@@ -279,8 +276,8 @@ enqueue_transfer (async_unit *au, transfer_args *arg, enum aio_do type)
   au->tail = tq;
   REVOKE_SIGNAL (&(au->emptysignal));
   au->empty = false;
-  UNLOCK (&au->lock);
   SIGNAL (&au->work);
+  UNLOCK (&au->lock);
 }
 
 /* Enqueue an st_write_done or st_read_done which contains an ID.  */
@@ -303,8 +300,8 @@ enqueue_done_id (async_unit *au, enum aio_do type)
   au->empty = false;
   ret = au->id.high++;
   NOTE ("Enqueue id: %d", ret);
-  UNLOCK (&au->lock);
   SIGNAL (&au->work);
+  UNLOCK (&au->lock);
   return ret;
 }
 
@@ -324,8 +321,8 @@ enqueue_done (async_unit *au, enum aio_do type)
   au->tail = tq;
   REVOKE_SIGNAL (&(au->emptysignal));
   au->empty = false;
-  UNLOCK (&au->lock);
   SIGNAL (&au->work);
+  UNLOCK (&au->lock);
 }
 
 /* Enqueue a CLOSE statement.  */
@@ -344,8 +341,8 @@ enqueue_close (async_unit *au)
   au->tail = tq;
   REVOKE_SIGNAL (&(au->emptysignal));
   au->empty = false;
-  UNLOCK (&au->lock);
   SIGNAL (&au->work);
+  UNLOCK (&au->lock);
 }
 
 /* The asynchronous unit keeps the currently active PDT around.
@@ -374,9 +371,9 @@ enqueue_data_transfer_init (async_unit *au, st_parameter_dt *dt, int read_flag)
     au->tail->next = tq;
   au->tail = tq;
   REVOKE_SIGNAL (&(au->emptysignal));
-  au->empty = 0;
-  UNLOCK (&au->lock);
+  au->empty = false;
   SIGNAL (&au->work);
+  UNLOCK (&au->lock);
 }
 
 /* Collect the errors that may have happened asynchronously.  Return true if
@@ -430,9 +427,7 @@ async_wait_id (st_parameter_common *cmp, async_unit *au, int i)
   NOTE ("Waiting for id %d", i);
   if (au->id.waiting < i)
     au->id.waiting = i;
-  UNLOCK (&au->lock);
   SIGNAL (&(au->work));
-  LOCK (&au->lock);
   WAIT_SIGNAL_MUTEX (&(au->id.done),
 		     (au->id.low >= au->id.waiting || au->empty), &au->lock);
   LOCK (&au->lock);
@@ -454,8 +449,8 @@ async_wait (st_parameter_common *cmp, async_unit *au)
   if (cmp == NULL)
     cmp = au->error.cmp;
 
-  SIGNAL (&(au->work));
   LOCK (&(au->lock));
+  SIGNAL (&(au->work));
 
   if (au->empty)
     {
diff --git a/libgfortran/io/async.h b/libgfortran/io/async.h
index c6b2e0f94bd..17d303c127b 100644
--- a/libgfortran/io/async.h
+++ b/libgfortran/io/async.h
@@ -229,44 +229,44 @@
 
 #if ASYNC_IO
 
+/* au->lock has to be held when calling this macro.  */
+
 #define SIGNAL(advcond) do{						\
-    INTERN_LOCK (&(advcond)->lock);					\
     (advcond)->pending = 1;						\
     DEBUG_PRINTF ("%s%-75s %20s():%-5d %18p\n", aio_prefix, DEBUG_ORANGE "SIGNAL: " DEBUG_NORM \
 		 #advcond, __FUNCTION__, __LINE__, (void *) advcond);	\
     T_ERROR (__gthread_cond_broadcast, &(advcond)->signal);			\
-    INTERN_UNLOCK (&(advcond)->lock);					\
   } while (0)
 
+/* Has to be entered with mutex locked.  */
+
 #define WAIT_SIGNAL_MUTEX(advcond, condition, mutex) do{		\
     __label__ finish;		       					\
-    INTERN_LOCK (&((advcond)->lock));					\
     DEBUG_PRINTF ("%s%-75s %20s():%-5d %18p\n", aio_prefix, DEBUG_BLUE "WAITING: " DEBUG_NORM \
 		 #advcond, __FUNCTION__, __LINE__, (void *) advcond);	\
-    if ((advcond)->pending || (condition)){				\
-      UNLOCK (mutex);							\
+    if ((advcond)->pending || (condition))				\
       goto finish;							\
-    }									\
-    UNLOCK (mutex);							\
-     while (!__gthread_cond_wait(&(advcond)->signal, &(advcond)->lock)) {	\
-       { int cond;							\
-	 LOCK (mutex); cond = condition; UNLOCK (mutex);	\
-	   if (cond){							\
-	     DEBUG_PRINTF ("%s%-75s %20s():%-5d %18p\n", aio_prefix, DEBUG_ORANGE "REC: " DEBUG_NORM \
+    while (1)								\
+      {									\
+	int err_ret = __gthread_cond_wait(&(advcond)->signal, mutex);	\
+	if (err_ret) internal_error (NULL, "WAIT_SIGNAL_MUTEX failed");	\
+	if (condition)							\
+	  {								\
+	    DEBUG_PRINTF ("%s%-75s %20s():%-5d %18p\n", aio_prefix, DEBUG_ORANGE \
+			  "REC: " DEBUG_NORM				\
 			  #advcond,  __FUNCTION__, __LINE__, (void *)advcond); \
 	    break;				      			\
 	  }								\
       }									\
-    }									\
   finish:								\
     (advcond)->pending = 0;						\
-		 INTERN_UNLOCK (&((advcond)->lock));			\
+    UNLOCK (mutex);							\
   } while (0)
 
+/* au->lock has to be held when calling this macro.  */
+
 #define REVOKE_SIGNAL(advcond) do{		\
-    INTERN_LOCK (&(advcond)->lock);		\
     (advcond)->pending = 0;			\
-    INTERN_UNLOCK (&(advcond)->lock);		\
   } while (0)
 
 #else
@@ -330,7 +330,6 @@ struct adv_cond
 {
 #if ASYNC_IO
   int pending;
-  __gthread_mutex_t lock;
   __gthread_cond_t signal;
 #endif
 };

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [patch, libfortran] Fix race condition in asynchronous I/O
  2020-02-17 22:02 [patch, libfortran] Fix race condition in asynchronous I/O Thomas Koenig
@ 2020-02-18 20:02 ` Thomas Koenig
  0 siblings, 0 replies; 2+ messages in thread
From: Thomas Koenig @ 2020-02-18 20:02 UTC (permalink / raw)
  To: fortran, gcc-patches

Now committed to trunk after Jerry's OK, as
https://gcc.gnu.org/g:3fe1910509e32d611b3a7b8503502103bc53b5e4

I have to say I still have trouble establishing a git workflow
which works smoothly.  You will notice an "asdf" in the commit
message which was not really meant to be there...

Regards

	Thomas

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-02-18 20:02 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-17 22:02 [patch, libfortran] Fix race condition in asynchronous I/O Thomas Koenig
2020-02-18 20:02 ` Thomas Koenig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).