public inbox for libc-ports@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 2/3, MIPS] Rewrite MIPS' pthread_spin_[try]lock using __atomic_* builtins.
@ 2012-06-14  4:36 Maxim Kuvyrkov
  2012-06-28 23:10 ` Joseph S. Myers
  0 siblings, 1 reply; 34+ messages in thread
From: Maxim Kuvyrkov @ 2012-06-14  4:36 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: Richard Sandiford, libc-ports

This is a follow-up patch to convert assembly implementations of MIPS' pthread_spin_[try]lock routines using __atomic_* builtins.  Using the builtins the compiler can generate optimal code for a particular MIPS processor for which GLIBC is being built.

OK to apply once 2.16 branches?

Thank you,

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics


2012-06-13  Tom de Vries  <vries@codesourcery.com>
	    Maxim Kuvyrkov  <maxim@codesourcery.com>

	* sysdeps/mips/nptl/pthread_spin_[try]lock.S: Remove.
	* sysdeps/mips/nptl/pthread_spin_[try]lock.c: New files.
---
 sysdeps/mips/nptl/pthread_spin_lock.S    |   36 -------------------------
 sysdeps/mips/nptl/pthread_spin_lock.c    |   43 ++++++++++++++++++++++++++++++
 sysdeps/mips/nptl/pthread_spin_trylock.S |   40 ---------------------------
 sysdeps/mips/nptl/pthread_spin_trylock.c |   26 ++++++++++++++++++
 4 files changed, 69 insertions(+), 76 deletions(-)
 delete mode 100644 sysdeps/mips/nptl/pthread_spin_lock.S
 create mode 100644 sysdeps/mips/nptl/pthread_spin_lock.c
 delete mode 100644 sysdeps/mips/nptl/pthread_spin_trylock.S
 create mode 100644 sysdeps/mips/nptl/pthread_spin_trylock.c

diff --git a/sysdeps/mips/nptl/pthread_spin_lock.S b/sysdeps/mips/nptl/pthread_spin_lock.S
deleted file mode 100644
index a8504f1..0000000
--- a/sysdeps/mips/nptl/pthread_spin_lock.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sys/asm.h>
-#include <sysdep.h>
-#include <sgidefs.h>
-
-ENTRY (pthread_spin_lock)
-	.set	push
-#if _MIPS_SIM == _ABIO32
-	.set	mips2
-#endif
-1:	ll	a2, 0(a0)
-	li	a1, 1
-	bnez	a2, 1b
-	sc	a1, 0(a0)
-	beqz	a1, 1b
-	MIPS_SYNC
-	.set	pop
-	li	v0, 0
-	ret
-PSEUDO_END (pthread_spin_lock)
diff --git a/sysdeps/mips/nptl/pthread_spin_lock.c b/sysdeps/mips/nptl/pthread_spin_lock.c
new file mode 100644
index 0000000..42dcb8d
--- /dev/null
+++ b/sysdeps/mips/nptl/pthread_spin_lock.c
@@ -0,0 +1,43 @@
+/* Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#include <atomic.h>
+#include "pthreadP.h"
+
+int
+pthread_spin_lock (pthread_spinlock_t *lock)
+{
+
+  /* atomic_exchange takes less instructions than atomic_compare_and_exchange
+     for MIPS, in particular when the MIPS ISA supports an atomic exchange
+     instruction.  On the other hand, atomic_exchange potentially generates more
+     bus traffic in case the value already present is written back.
+     We assume that the first try mostly will be successful, so we use
+     atomic_exchange.  For the other tries, we use
+     atomic_compare_and_exchange.  */
+  if (atomic_exchange_acq (lock, 1) == 0)
+    return 0;
+
+  do
+    {
+      while (*lock != 0)
+	;
+    }
+  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0);
+
+  return 0;
+}
diff --git a/sysdeps/mips/nptl/pthread_spin_trylock.S b/sysdeps/mips/nptl/pthread_spin_trylock.S
deleted file mode 100644
index 95b55c3..0000000
--- a/sysdeps/mips/nptl/pthread_spin_trylock.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sys/asm.h>
-#include <sysdep.h>
-#define _ERRNO_H 1
-#include <bits/errno.h>
-#include <sgidefs.h>
-
-ENTRY (pthread_spin_trylock)
-	.set	push
-#if _MIPS_SIM == _ABIO32
-	.set	mips2
-#endif
-	ll	a2, 0(a0)
-	li	a1, 1
-	bnez	a2, 1f
-	sc	a1, 0(a0)
-	beqz	a1, 1f
-	MIPS_SYNC
-	.set	pop
-	li	v0, 0
-	ret
-1:	li	v0, EBUSY
-	ret
-PSEUDO_END (pthread_spin_trylock)
diff --git a/sysdeps/mips/nptl/pthread_spin_trylock.c b/sysdeps/mips/nptl/pthread_spin_trylock.c
new file mode 100644
index 0000000..ed75813
--- /dev/null
+++ b/sysdeps/mips/nptl/pthread_spin_trylock.c
@@ -0,0 +1,26 @@
+/* Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <atomic.h>
+#include "pthreadP.h"
+
+int
+pthread_spin_trylock (pthread_spinlock_t *lock)
+{
+  return atomic_compare_and_exchange_val_acq (lock, 1, 0) ? EBUSY : 0;
+}
-- 
1.7.4.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/3, MIPS] Rewrite MIPS' pthread_spin_[try]lock using __atomic_* builtins.
  2012-06-14  4:36 [PATCH 2/3, MIPS] Rewrite MIPS' pthread_spin_[try]lock using __atomic_* builtins Maxim Kuvyrkov
@ 2012-06-28 23:10 ` Joseph S. Myers
  2012-07-11  5:58   ` [PATCH] Unify pthread_spin_[try]lock implementations Maxim Kuvyrkov
  0 siblings, 1 reply; 34+ messages in thread
From: Joseph S. Myers @ 2012-06-28 23:10 UTC (permalink / raw)
  To: Maxim Kuvyrkov; +Cc: Richard Sandiford, libc-ports

On Thu, 14 Jun 2012, Maxim Kuvyrkov wrote:

> 2012-06-13  Tom de Vries  <vries@codesourcery.com>
> 	    Maxim Kuvyrkov  <maxim@codesourcery.com>
> 
> 	* sysdeps/mips/nptl/pthread_spin_[try]lock.S: Remove.
> 	* sysdeps/mips/nptl/pthread_spin_[try]lock.c: New files.

You need to list each file individually rather than writing 
"pthread_spin_[try]lock.S".  See the GNU Coding Standards.

> diff --git a/sysdeps/mips/nptl/pthread_spin_trylock.c b/sysdeps/mips/nptl/pthread_spin_trylock.c
> new file mode 100644
> index 0000000..ed75813
> --- /dev/null
> +++ b/sysdeps/mips/nptl/pthread_spin_trylock.c

This file looks generic and essentially the same as the ARM and M68K 
versions.  I think it would be better to put such a generic version in a 
generic directory (maybe directly in nptl/), and remove the ARM and M68K 
copies, rather than adding a third copy of the same generic 
implementation.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-06-28 23:10 ` Joseph S. Myers
@ 2012-07-11  5:58   ` Maxim Kuvyrkov
  2012-07-11  8:15     ` Roland McGrath
  0 siblings, 1 reply; 34+ messages in thread
From: Maxim Kuvyrkov @ 2012-07-11  5:58 UTC (permalink / raw)
  To: Joseph S.Myers; +Cc: Richard Sandiford, libc-ports, libc-alpha Devel

On 29/06/2012, at 11:10 AM, Joseph S. Myers wrote:

>> diff --git a/sysdeps/mips/nptl/pthread_spin_trylock.c b/sysdeps/mips/nptl/pthread_spin_trylock.c
>> new file mode 100644
>> index 0000000..ed75813
>> --- /dev/null
>> +++ b/sysdeps/mips/nptl/pthread_spin_trylock.c
> 
> This file looks generic and essentially the same as the ARM and M68K 
> versions.  I think it would be better to put such a generic version in a 
> generic directory (maybe directly in nptl/), and remove the ARM and M68K 
> copies, rather than adding a third copy of the same generic 
> implementation.

Indeed.  The attached patches for glibc proper and ports unify pthread_spin_lock and pthread_spin_trylock for ARM, HPPA, M68K and MIPS.  I've tested these on MIPS, and the changes for other targets are only mechanical.

Any comments?

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics

Add generic versions of pthread_spin_lock and pthread_spin_trylock.

	2012-07-09  Maxim Kuvyrkov  <maxim@codesourcery.com>

	* nptl/pthread_spin_lock.c: New file.
	* nptl/pthread_spin_trylock.c: New file.
---
 nptl/pthread_spin_lock.c    |   30 ++++++++++++++++++++++++++++++
 nptl/pthread_spin_trylock.c |   27 +++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 0 deletions(-)
 create mode 100644 nptl/pthread_spin_lock.c
 create mode 100644 nptl/pthread_spin_trylock.c

diff --git a/nptl/pthread_spin_lock.c b/nptl/pthread_spin_lock.c
new file mode 100644
index 0000000..1daa43d
--- /dev/null
+++ b/nptl/pthread_spin_lock.c
@@ -0,0 +1,30 @@
+/* pthread_spin_lock -- lock a spin lock.  Generic version.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <atomic.h>
+#include "pthreadP.h"
+
+int
+pthread_spin_lock (pthread_spinlock_t *lock)
+{
+  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
+    while (*lock != 0)
+      ;
+
+  return 0;
+}
diff --git a/nptl/pthread_spin_trylock.c b/nptl/pthread_spin_trylock.c
new file mode 100644
index 0000000..a097a86
--- /dev/null
+++ b/nptl/pthread_spin_trylock.c
@@ -0,0 +1,27 @@
+/* pthread_spin_trylock -- trylock a spin lock.  Generic version.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <atomic.h>
+#include "pthreadP.h"
+
+int
+pthread_spin_trylock (pthread_spinlock_t *lock)
+{
+  return atomic_compare_and_exchange_val_acq (lock, 1, 0) ? EBUSY : 0;
+}
-- 
1.7.4.1

Unify ARM, HPPA, M68K and MIPS pthread_spin_lock and pthread_spin_trylock.

2012-06-13  Maxim Kuvyrkov  <maxim@codesourcery.com>

	* sysdeps/arm/nptl/pthread_spin_lock.c: Remove, use generic version.
	* sysdeps/arm/nptl/pthread_spin_trylock.c: Same.
	* sysdeps/hppa/nptl/pthread_spin_lock.c: Same.
	* sysdeps/hppa/nptl/pthread_spin_trylock.c: Same.
	* sysdeps/m68k/nptl/pthread_spin_lock.c: Same.
	* sysdeps/m68k/nptl/pthread_spin_trylock.c: Same.
	* sysdeps/mips/nptl/pthread_spin_lock.S: Same.
	* sysdeps/mips/nptl/pthread_spin_trylock.S: Same.
---
 sysdeps/arm/nptl/pthread_spin_lock.c     |   29 ---------------------
 sysdeps/arm/nptl/pthread_spin_trylock.c  |   26 -------------------
 sysdeps/hppa/nptl/pthread_spin_lock.c    |   37 ---------------------------
 sysdeps/hppa/nptl/pthread_spin_trylock.c |   33 ------------------------
 sysdeps/m68k/nptl/pthread_spin_lock.c    |   30 ----------------------
 sysdeps/m68k/nptl/pthread_spin_trylock.c |   27 --------------------
 sysdeps/mips/nptl/pthread_spin_lock.S    |   36 ---------------------------
 sysdeps/mips/nptl/pthread_spin_trylock.S |   40 ------------------------------
 8 files changed, 0 insertions(+), 258 deletions(-)
 delete mode 100644 sysdeps/arm/nptl/pthread_spin_lock.c
 delete mode 100644 sysdeps/arm/nptl/pthread_spin_trylock.c
 delete mode 100644 sysdeps/hppa/nptl/pthread_spin_lock.c
 delete mode 100644 sysdeps/hppa/nptl/pthread_spin_trylock.c
 delete mode 100644 sysdeps/m68k/nptl/pthread_spin_lock.c
 delete mode 100644 sysdeps/m68k/nptl/pthread_spin_trylock.c
 delete mode 100644 sysdeps/mips/nptl/pthread_spin_lock.S
 delete mode 100644 sysdeps/mips/nptl/pthread_spin_trylock.S

diff --git a/sysdeps/arm/nptl/pthread_spin_lock.c b/sysdeps/arm/nptl/pthread_spin_lock.c
deleted file mode 100644
index 3a23bd3..0000000
--- a/sysdeps/arm/nptl/pthread_spin_lock.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Copyright (C) 2008 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <atomic.h>
-#include "pthreadP.h"
-
-int
-pthread_spin_lock (pthread_spinlock_t *lock)
-{
-  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
-   while (*lock != 0)
-    ;
-
-  return 0;
-}
diff --git a/sysdeps/arm/nptl/pthread_spin_trylock.c b/sysdeps/arm/nptl/pthread_spin_trylock.c
deleted file mode 100644
index 7d31180..0000000
--- a/sysdeps/arm/nptl/pthread_spin_trylock.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* Copyright (C) 2008 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <errno.h>
-#include <atomic.h>
-#include "pthreadP.h"
-
-int
-pthread_spin_trylock (pthread_spinlock_t *lock)
-{
-  return atomic_compare_and_exchange_val_acq (lock, 1, 0) ? EBUSY : 0;
-}
diff --git a/sysdeps/hppa/nptl/pthread_spin_lock.c b/sysdeps/hppa/nptl/pthread_spin_lock.c
deleted file mode 100644
index bcf2240..0000000
--- a/sysdeps/hppa/nptl/pthread_spin_lock.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <atomic.h>
-#include "pthreadP.h"
-
-int
-pthread_spin_lock (pthread_spinlock_t *lock)
-{
-#if 0
-  volatile unsigned int *addr = __ldcw_align (lock);
-
-  while (__ldcw (addr) == 0)
-    while (*addr == 0) ;
-
-  return 0;
-#endif
-
-  while (atomic_compare_and_exchange_val_acq(lock, 1, 0) == 1)
-    while (*lock == 1);
-  
-  return 0;
-}
diff --git a/sysdeps/hppa/nptl/pthread_spin_trylock.c b/sysdeps/hppa/nptl/pthread_spin_trylock.c
deleted file mode 100644
index a8028610..0000000
--- a/sysdeps/hppa/nptl/pthread_spin_trylock.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <errno.h>
-#include <atomic.h>
-#include "pthreadP.h"
-
-int
-pthread_spin_trylock (pthread_spinlock_t *lock)
-{
-#if 0
-  volatile unsigned int *a = __ldcw_align (lock);
-
-  return __ldcw (a) ? 0 : EBUSY;
-#endif
-
-  return atomic_compare_and_exchange_val_acq(lock, 1, 0) ? EBUSY : 0;
-
-}
diff --git a/sysdeps/m68k/nptl/pthread_spin_lock.c b/sysdeps/m68k/nptl/pthread_spin_lock.c
deleted file mode 100644
index 90a8262..0000000
--- a/sysdeps/m68k/nptl/pthread_spin_lock.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright (C) 2010 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <atomic.h>
-#include "pthreadP.h"
-
-int
-pthread_spin_lock (pthread_spinlock_t *lock)
-{
-  while (atomic_compare_and_exchange_val_acq(lock, 1, 0) != 0)
-   while (*lock != 0)
-    ;
-
-  return 0;
-}
diff --git a/sysdeps/m68k/nptl/pthread_spin_trylock.c b/sysdeps/m68k/nptl/pthread_spin_trylock.c
deleted file mode 100644
index f4b0c0d..0000000
--- a/sysdeps/m68k/nptl/pthread_spin_trylock.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Copyright (C) 2010 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <errno.h>
-#include <atomic.h>
-#include "pthreadP.h"
-
-int
-pthread_spin_trylock (pthread_spinlock_t *lock)
-{
-  return atomic_compare_and_exchange_val_acq(lock, 1, 0) ? EBUSY : 0;
-}
diff --git a/sysdeps/mips/nptl/pthread_spin_lock.S b/sysdeps/mips/nptl/pthread_spin_lock.S
deleted file mode 100644
index a8504f1..0000000
--- a/sysdeps/mips/nptl/pthread_spin_lock.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sys/asm.h>
-#include <sysdep.h>
-#include <sgidefs.h>
-
-ENTRY (pthread_spin_lock)
-	.set	push
-#if _MIPS_SIM == _ABIO32
-	.set	mips2
-#endif
-1:	ll	a2, 0(a0)
-	li	a1, 1
-	bnez	a2, 1b
-	sc	a1, 0(a0)
-	beqz	a1, 1b
-	MIPS_SYNC
-	.set	pop
-	li	v0, 0
-	ret
-PSEUDO_END (pthread_spin_lock)
diff --git a/sysdeps/mips/nptl/pthread_spin_trylock.S b/sysdeps/mips/nptl/pthread_spin_trylock.S
deleted file mode 100644
index 95b55c3..0000000
--- a/sysdeps/mips/nptl/pthread_spin_trylock.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sys/asm.h>
-#include <sysdep.h>
-#define _ERRNO_H 1
-#include <bits/errno.h>
-#include <sgidefs.h>
-
-ENTRY (pthread_spin_trylock)
-	.set	push
-#if _MIPS_SIM == _ABIO32
-	.set	mips2
-#endif
-	ll	a2, 0(a0)
-	li	a1, 1
-	bnez	a2, 1f
-	sc	a1, 0(a0)
-	beqz	a1, 1f
-	MIPS_SYNC
-	.set	pop
-	li	v0, 0
-	ret
-1:	li	v0, EBUSY
-	ret
-PSEUDO_END (pthread_spin_trylock)
-- 
1.7.4.1



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-11  5:58   ` [PATCH] Unify pthread_spin_[try]lock implementations Maxim Kuvyrkov
@ 2012-07-11  8:15     ` Roland McGrath
  2012-07-11  8:25       ` David Miller
  2012-07-11  8:26       ` Maxim Kuvyrkov
  0 siblings, 2 replies; 34+ messages in thread
From: Roland McGrath @ 2012-07-11  8:15 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Joseph S.Myers, Richard Sandiford, libc-ports, libc-alpha Devel

> +int
> +pthread_spin_lock (pthread_spinlock_t *lock)
> +{
> +  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
> +    while (*lock != 0)
> +      ;

What's the inner loop for?

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-11  8:15     ` Roland McGrath
@ 2012-07-11  8:25       ` David Miller
  2012-07-11  8:44         ` Andrew Haley
  2012-07-11  8:26       ` Maxim Kuvyrkov
  1 sibling, 1 reply; 34+ messages in thread
From: David Miller @ 2012-07-11  8:25 UTC (permalink / raw)
  To: roland; +Cc: maxim, joseph, rdsandiford, libc-ports, libc-alpha

From: Roland McGrath <roland@hack.frob.com>
Date: Wed, 11 Jul 2012 01:14:41 -0700 (PDT)

>> +int
>> +pthread_spin_lock (pthread_spinlock_t *lock)
>> +{
>> +  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
>> +    while (*lock != 0)
>> +      ;
> 
> What's the inner loop for?

I guess the idea is to spin with non-atomic reads when the lock is
contended so we don't do expensive bus cycles grabbing the cache line
in exclusive state over and over again.

If we spun using only the atomic it would be very expensive.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-11  8:15     ` Roland McGrath
  2012-07-11  8:25       ` David Miller
@ 2012-07-11  8:26       ` Maxim Kuvyrkov
  1 sibling, 0 replies; 34+ messages in thread
From: Maxim Kuvyrkov @ 2012-07-11  8:26 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Joseph S.Myers, Richard Sandiford, libc-ports, libc-alpha Devel

On 11/07/2012, at 8:14 PM, Roland McGrath wrote:

>> +int
>> +pthread_spin_lock (pthread_spinlock_t *lock)
>> +{
>> +  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
>> +    while (*lock != 0)
>> +      ;
> 
> What's the inner loop for?

My understanding is that this is an optimization.  On many architectures atomic_compare_and_exchange will synchronize memory across all CPUs, which will penalize other running threads.  Spinning in a simple (*lock != 0) loop will allow those threads to finish whatever they are doing faster and release the lock.

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-11  8:25       ` David Miller
@ 2012-07-11  8:44         ` Andrew Haley
  2012-07-11  8:54           ` Maxim Kuvyrkov
  0 siblings, 1 reply; 34+ messages in thread
From: Andrew Haley @ 2012-07-11  8:44 UTC (permalink / raw)
  To: David Miller; +Cc: roland, maxim, joseph, rdsandiford, libc-ports, libc-alpha

On 07/11/2012 09:25 AM, David Miller wrote:
> From: Roland McGrath <roland@hack.frob.com>
> Date: Wed, 11 Jul 2012 01:14:41 -0700 (PDT)
> 
>>> +int
>>> +pthread_spin_lock (pthread_spinlock_t *lock)
>>> +{
>>> +  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
>>> +    while (*lock != 0)
>>> +      ;
>>
>> What's the inner loop for?
> 
> I guess the idea is to spin with non-atomic reads when the lock is
> contended so we don't do expensive bus cycles grabbing the cache line
> in exclusive state over and over again.
> 
> If we spun using only the atomic it would be very expensive.

Sure, but on ARM at least there's no guarantee that the local processor
will see changes to the state of the lock when another processor frees
it.

Andrew.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-11  8:44         ` Andrew Haley
@ 2012-07-11  8:54           ` Maxim Kuvyrkov
  2012-07-11  9:02             ` Andrew Haley
  0 siblings, 1 reply; 34+ messages in thread
From: Maxim Kuvyrkov @ 2012-07-11  8:54 UTC (permalink / raw)
  To: Andrew Haley
  Cc: David Miller, roland, joseph, rdsandiford, libc-ports, libc-alpha

On 11/07/2012, at 8:44 PM, Andrew Haley wrote:

> On 07/11/2012 09:25 AM, David Miller wrote:
>> From: Roland McGrath <roland@hack.frob.com>
>> Date: Wed, 11 Jul 2012 01:14:41 -0700 (PDT)
>> 
>>>> +int
>>>> +pthread_spin_lock (pthread_spinlock_t *lock)
>>>> +{
>>>> +  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
>>>> +    while (*lock != 0)
>>>> +      ;
>>> 
>>> What's the inner loop for?
>> 
>> I guess the idea is to spin with non-atomic reads when the lock is
>> contended so we don't do expensive bus cycles grabbing the cache line
>> in exclusive state over and over again.
>> 
>> If we spun using only the atomic it would be very expensive.
> 
> Sure, but on ARM at least there's no guarantee that the local processor
> will see changes to the state of the lock when another processor frees
> it.

Hm, but this is exactly what ARM port does and did for a while.  I guess the memory on the local processor eventually gets updated and the loop breaks.

This is an interesting point and maybe introducing a counter like below will improve spinlocks for ARM.

+  int counter = 123456;
+  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
+    while (*lock != 0 && --counter)
+      ;

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-11  8:54           ` Maxim Kuvyrkov
@ 2012-07-11  9:02             ` Andrew Haley
  2012-07-11  9:05               ` Maxim Kuvyrkov
  0 siblings, 1 reply; 34+ messages in thread
From: Andrew Haley @ 2012-07-11  9:02 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: David Miller, roland, joseph, rdsandiford, libc-ports, libc-alpha

On 07/11/2012 09:53 AM, Maxim Kuvyrkov wrote:
> On 11/07/2012, at 8:44 PM, Andrew Haley wrote:
> 
>> On 07/11/2012 09:25 AM, David Miller wrote:
>>> From: Roland McGrath <roland@hack.frob.com>
>>> Date: Wed, 11 Jul 2012 01:14:41 -0700 (PDT)
>>>
>>>>> +int
>>>>> +pthread_spin_lock (pthread_spinlock_t *lock)
>>>>> +{
>>>>> +  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
>>>>> +    while (*lock != 0)
>>>>> +      ;
>>>>
>>>> What's the inner loop for?
>>>
>>> I guess the idea is to spin with non-atomic reads when the lock is
>>> contended so we don't do expensive bus cycles grabbing the cache line
>>> in exclusive state over and over again.
>>>
>>> If we spun using only the atomic it would be very expensive.
>>
>> Sure, but on ARM at least there's no guarantee that the local processor
>> will see changes to the state of the lock when another processor frees
>> it.
> 
> Hm, but this is exactly what ARM port does and did for a while.  I
> guess the memory on the local processor eventually gets updated and
> the loop breaks.

Maybe the thread ends its time slice and gets interrupted.  There
aren't any guarantees.

> This is an interesting point and maybe introducing a counter like
> below will improve spinlocks for ARM.
> 
> +  int counter = 123456;
> +  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
> +    while (*lock != 0 && --counter)
> +      ;

This is a much better idea than an unbounded spin.

Andrew.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-11  9:02             ` Andrew Haley
@ 2012-07-11  9:05               ` Maxim Kuvyrkov
  2012-07-11 11:22                 ` Roland McGrath
  0 siblings, 1 reply; 34+ messages in thread
From: Maxim Kuvyrkov @ 2012-07-11  9:05 UTC (permalink / raw)
  To: Andrew Haley
  Cc: David Miller, roland, joseph, rdsandiford, libc-ports, libc-alpha

On 11/07/2012, at 9:02 PM, Andrew Haley wrote:

> On 07/11/2012 09:53 AM, Maxim Kuvyrkov wrote:
> 
...
>> This is an interesting point and maybe introducing a counter like
>> below will improve spinlocks for ARM.
>> 
>> +  int counter = 123456;
>> +  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
>> +    while (*lock != 0 && --counter)
>> +      ;
> 
> This is a much better idea than an unbounded spin.

OK, but this will be a change on its own in a follow up patch.  I want to keep this patch just a mechanical change.

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-11  9:05               ` Maxim Kuvyrkov
@ 2012-07-11 11:22                 ` Roland McGrath
  2012-07-11 14:52                   ` Chris Metcalf
  2012-07-11 22:16                   ` Maxim Kuvyrkov
  0 siblings, 2 replies; 34+ messages in thread
From: Roland McGrath @ 2012-07-11 11:22 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Andrew Haley, David Miller, joseph, rdsandiford, libc-ports, libc-alpha

> OK, but this will be a change on its own in a follow up patch.  I want to
> keep this patch just a mechanical change.

I don't want to put anything in the generic code that isn't clearly correct
or isn't clearly justified or doesn't have thorough comments explaining any
nonobvious issues.  So get it right first before the generic code goes in.
Each machine dropping its old code in favor of the new generic code can
wait as long as each machine maintainer wants.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-11 11:22                 ` Roland McGrath
@ 2012-07-11 14:52                   ` Chris Metcalf
  2012-07-11 22:16                   ` Maxim Kuvyrkov
  1 sibling, 0 replies; 34+ messages in thread
From: Chris Metcalf @ 2012-07-11 14:52 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Maxim Kuvyrkov, Andrew Haley, David Miller, joseph, rdsandiford,
	libc-ports, libc-alpha

On 7/11/2012 7:22 AM, Roland McGrath wrote:
>> OK, but this will be a change on its own in a follow up patch.  I want to
>> keep this patch just a mechanical change.
> I don't want to put anything in the generic code that isn't clearly correct
> or isn't clearly justified or doesn't have thorough comments explaining any
> nonobvious issues.  So get it right first before the generic code goes in.
> Each machine dropping its old code in favor of the new generic code can
> wait as long as each machine maintainer wants.

And this kind of thing definitely varies by architecture.  For example, on
TILE-Gx the atomic operation is almost as fast as a regular store or load,
since there is no bus locking involved: the atomic instruction is
dispatched over the memory interconnect to the home cache for the address
in question, and returns a result.  However, you definitely don't want to
have dozens and dozens of cores all hammering compare-exchanges into the
memory network, so some kind of backoff turns out to be critical.

In addition, the "test and test-and-set" idiom [1] that is being proposed
here is actually an anti-pattern for Tilera (and likely for any other
cache-coherent mesh-based memory interconnect).  The problem is that each
"test" instruction (i.e. a load) pulls a shared copy of the cache line into
the tester's local cache, and then when some other core does the
test-and-set operation, the first thing done by the home cache is to
invalidate all the shared copies, which is slow and also loads the memory
network unnecessarily.  So plain test-and-set in a loop, with some kind of
backoff (we use bounded exponential backoff) ends up being the best thing
to do.

This isn't to say that the generic version might not be plausible for many
existing architectures, just that it certainly isn't going to be right for
all of them.

[1] http://en.wikipedia.org/wiki/Test_and_Test-and-set

-- 
Chris Metcalf, Tilera Corp.
http://www.tilera.com

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-11 11:22                 ` Roland McGrath
  2012-07-11 14:52                   ` Chris Metcalf
@ 2012-07-11 22:16                   ` Maxim Kuvyrkov
  2012-07-25 18:13                     ` Roland McGrath
  1 sibling, 1 reply; 34+ messages in thread
From: Maxim Kuvyrkov @ 2012-07-11 22:16 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Andrew Haley, David Miller, joseph, rdsandiford, libc-ports, libc-alpha

On 11/07/2012, at 11:22 PM, Roland McGrath wrote:

>> OK, but this will be a change on its own in a follow up patch.  I want to
>> keep this patch just a mechanical change.
> 
> I don't want to put anything in the generic code that isn't clearly correct
> or isn't clearly justified or doesn't have thorough comments explaining any
> nonobvious issues.  So get it right first before the generic code goes in.

This code _is_ right.  It may not be optimal for all machines, but it is correct.  I agree that the code is not obvious and the attached patch clears it up.

> Each machine dropping its old code in favor of the new generic code can
> wait as long as each machine maintainer wants.

This is not new code.  It is the exactly same code that ARM, MIPS, HPPA and M68K have all been using for several years, it is just moved into the generic directory.

Attached is the patch based on Andrew's and Chris's comments.  It bounds the wait loop by a reasonable constant and makes atomic_compare_and_exchange update memory on the local processor.  Assuming that cmpxchg is about 100 times more expensive than a simple load, we spend 90% of the wait cycle in "while (*lock != 0)" loop and 10% in atomic_compare_and_exchange to update the memory.

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics


Bound wait loop in pthread_spin_lock.

	2012-07-12  Maxim Kuvyrkov  <maxim@codesourcery.com>

	* nptl/pthread_spin_lock.c (PTHREAD_SPIN_LOCK_WAIT): New macro.
	(pthread_spin_lock): Bound wait cycle with PTHREAD_SPIN_LOCK_WAIT.
---
 nptl/pthread_spin_lock.c |   27 +++++++++++++++++++++++++--
 1 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/nptl/pthread_spin_lock.c b/nptl/pthread_spin_lock.c
index 1daa43d..ac59117 100644
--- a/nptl/pthread_spin_lock.c
+++ b/nptl/pthread_spin_lock.c
@@ -19,12 +19,35 @@
 #include <atomic.h>
 #include "pthreadP.h"
 
+#ifndef PTHREAD_SPIN_LOCK_WAIT
+# define PTHREAD_SPIN_LOCK_WAIT 1000
+#endif
+
 int
 pthread_spin_lock (pthread_spinlock_t *lock)
 {
   while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
-    while (*lock != 0)
-      ;
+    /* The lock is contended and we need to wait.  Going straight back
+       to cmpxchg is not a good idea on many targets as that will force
+       expensive memory synchronizations among processors and penalize other
+       running threads.
+       On the other hand, we do want to update memory state on the local core
+       once in a while to avoid spinning indefinitely until some event that
+       will happen to update local memory as a side-effect.  */
+    {
+      if (PTHREAD_SPIN_LOCK_WAIT)
+	{
+	  int wait = PTHREAD_SPIN_LOCK_WAIT;
+
+	  while (*lock != 0 && --wait)
+	    ;
+	}
+      else
+	{
+	  while (*lock != 0)
+	    ;
+	}
+    }
 
   return 0;
 }
-- 
1.7.4.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-11 22:16                   ` Maxim Kuvyrkov
@ 2012-07-25 18:13                     ` Roland McGrath
  2012-07-25 19:04                       ` Chris Metcalf
  2012-08-15  3:17                       ` Maxim Kuvyrkov
  0 siblings, 2 replies; 34+ messages in thread
From: Roland McGrath @ 2012-07-25 18:13 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Andrew Haley, David Miller, joseph, rdsandiford, libc-ports, libc-alpha

> This code _is_ right.  It may not be optimal for all machines, but it is
> correct.  I agree that the code is not obvious and the attached patch
> clears it up.

It is sufficiently suboptimal on some machines that it does not qualify as
generically correct for a synchronization primitive.

> This is not new code.  It is the exactly same code that ARM, MIPS, HPPA
> and M68K have all been using for several years, it is just moved into the
> generic directory.

Sharing code is good.  I'm all for it.  But generic is not for code that
happens to be about right on several machines.  It's for code that is truly
generic--either it's a stub with a link warning, or it's adequate for any
configuration, only to be overridden by sysdeps versions that are optimized
for a particular configuration.

Here I think the reasonable thing to do is:

/* A machine-specific version can define SPIN_LOCK_READS_BETWEEN_CMPXCHG
   to the number of plain reads that it's optimal to spin on between uses
   of atomic_compare_and_exchange_val_acq.  If spinning forever is optimal
   then use -1.  If no plain reads here would ever be optimal, use 0.  */
#ifndef SPIN_LOCK_READS_BETWEEN_CMPXCHG
# warning machine-dependent file should define SPIN_LOCK_READS_BETWEEN_CMPXCHG
# define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
#endif

Then ARM et al can do:

/* Machine-dependent rationale about the selection of this value.  */
#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
#include <nptl/pthread_spin_lock.c>

while Teil will use -1.

> +      if (PTHREAD_SPIN_LOCK_WAIT)

Don't use implicit boolean coercion.
Use "if (SPIN_LOCK_READS_BETWEEN_CMPXCHG >= 0)".

> +	{
> +	  int wait = PTHREAD_SPIN_LOCK_WAIT;
> +
> +	  while (*lock != 0 && --wait)
> +	    ;

Write it:
	while (wait > 0 && *lock != 0)
	  --wait;

That handles the SPIN_LOCK_READS_BETWEEN_CMPXCHG==0 case implicitly,
avoids the ugly empty statement, and doesn't use implicit coercion.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-25 18:13                     ` Roland McGrath
@ 2012-07-25 19:04                       ` Chris Metcalf
  2012-07-25 20:22                         ` Roland McGrath
  2012-08-15  3:17                       ` Maxim Kuvyrkov
  1 sibling, 1 reply; 34+ messages in thread
From: Chris Metcalf @ 2012-07-25 19:04 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Maxim Kuvyrkov, Andrew Haley, David Miller, joseph, rdsandiford,
	libc-ports, libc-alpha

On 7/25/2012 2:13 PM, Roland McGrath wrote:
> Here I think the reasonable thing to do is:
>
> /* A machine-specific version can define SPIN_LOCK_READS_BETWEEN_CMPXCHG
>    to the number of plain reads that it's optimal to spin on between uses
>    of atomic_compare_and_exchange_val_acq.  If spinning forever is optimal
>    then use -1.  If no plain reads here would ever be optimal, use 0.  */
> #ifndef SPIN_LOCK_READS_BETWEEN_CMPXCHG
> # warning machine-dependent file should define SPIN_LOCK_READS_BETWEEN_CMPXCHG
> # define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
> #endif
>
> Then ARM et al can do:
>
> /* Machine-dependent rationale about the selection of this value.  */
> #define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
> #include <nptl/pthread_spin_lock.c>
>
> while Teil will use -1.

The tile architecture is unlikely to use this generic version no matter
what; see http://sourceware.org/ml/libc-ports/2012-07/msg00030.html for the
details, but the primary point is that in a mesh-based architecture it's a
bad idea to ever end up in a situation where all the cores can be spinning
issues loads or cmpxchg as fast as they can, so some kind of backoff is
necessary.

-- 
Chris Metcalf, Tilera Corp.
http://www.tilera.com

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-25 19:04                       ` Chris Metcalf
@ 2012-07-25 20:22                         ` Roland McGrath
  2012-07-25 20:29                           ` Chris Metcalf
  0 siblings, 1 reply; 34+ messages in thread
From: Roland McGrath @ 2012-07-25 20:22 UTC (permalink / raw)
  To: Chris Metcalf
  Cc: Maxim Kuvyrkov, Andrew Haley, David Miller, joseph, rdsandiford,
	libc-ports, libc-alpha

> The tile architecture is unlikely to use this generic version no matter
> what; see http://sourceware.org/ml/libc-ports/2012-07/msg00030.html for the
> details, but the primary point is that in a mesh-based architecture it's a
> bad idea to ever end up in a situation where all the cores can be spinning
> issues loads or cmpxchg as fast as they can, so some kind of backoff is
> necessary.

I had read that before but only noticed the explanation that the plain
reads were bad.  (Hence in my suggestion you'd share the code but with a
#define that means the loop of plain reads would be elided entirely at
compile time by constant folding.)  What kind of "backoff" do you mean?
It's probably appropriate on every machine to use "atomic_delay ();" inside
such loops.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-25 20:22                         ` Roland McGrath
@ 2012-07-25 20:29                           ` Chris Metcalf
  2012-07-25 20:43                             ` Roland McGrath
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Metcalf @ 2012-07-25 20:29 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Maxim Kuvyrkov, Andrew Haley, David Miller, joseph, rdsandiford,
	libc-ports, libc-alpha

On 7/25/2012 4:22 PM, Roland McGrath wrote:
>> The tile architecture is unlikely to use this generic version no matter
>> what; see http://sourceware.org/ml/libc-ports/2012-07/msg00030.html for the
>> details, but the primary point is that in a mesh-based architecture it's a
>> bad idea to ever end up in a situation where all the cores can be spinning
>> issues loads or cmpxchg as fast as they can, so some kind of backoff is
>> necessary.
> I had read that before but only noticed the explanation that the plain
> reads were bad.  (Hence in my suggestion you'd share the code but with a
> #define that means the loop of plain reads would be elided entirely at
> compile time by constant folding.)  What kind of "backoff" do you mean?
> It's probably appropriate on every machine to use "atomic_delay ();" inside
> such loops.

Some work we did a while back found that bounded exponential delay tended
to work best for any kind of spinlock.  So if we fail to acquire the lock
the first time around, we wait a few cycles and try again, then keep
doubling the wait time up to some ceiling (around 1000 cycles or so); see
ports/sysdeps/tile/nptl/pthread_spin_lock.c.

This way in worst-case situation when you have (say) 100 cores all trying
to acquire the lock at once, you don't hose the memory network with
traffic.  This also helps somewhat to avoid unfairness where closer cores
have a dramatically better chance of acquiring the lock due to how wormhole
routing allocates links in the mesh to memory messages.

Of course, the real answer tends to be "don't use simple spinlocks", so in
the kernel, for example, we use ticket locks instead.  But with pthread
spinlocks that's not a great option since if any thread waiting for the
lock is scheduled out for a while, no later thread can acquire the lock either.

-- 
Chris Metcalf, Tilera Corp.
http://www.tilera.com

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-25 20:29                           ` Chris Metcalf
@ 2012-07-25 20:43                             ` Roland McGrath
  0 siblings, 0 replies; 34+ messages in thread
From: Roland McGrath @ 2012-07-25 20:43 UTC (permalink / raw)
  To: Chris Metcalf
  Cc: Maxim Kuvyrkov, Andrew Haley, David Miller, joseph, rdsandiford,
	libc-ports, libc-alpha

> Some work we did a while back found that bounded exponential delay tended
> to work best for any kind of spinlock.  So if we fail to acquire the lock
> the first time around, we wait a few cycles and try again, then keep
> doubling the wait time up to some ceiling (around 1000 cycles or so); see
> ports/sysdeps/tile/nptl/pthread_spin_lock.c.

I see.  Some sort of exponential backoff like that is probably a good idea
generally.  But in generic code it certainly wouldn't be one that's based
on taking cycle counter samples.  It would have to be just counting
iterations of calling atomic_delay.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-07-25 18:13                     ` Roland McGrath
  2012-07-25 19:04                       ` Chris Metcalf
@ 2012-08-15  3:17                       ` Maxim Kuvyrkov
  2012-08-15 16:27                         ` Roland McGrath
                                           ` (4 more replies)
  1 sibling, 5 replies; 34+ messages in thread
From: Maxim Kuvyrkov @ 2012-08-15  3:17 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Andrew Haley, David Miller, Joseph S. Myers, Richard Sandiford,
	libc-ports, GLIBC Devel, Chris Metcalf

On 26/07/2012, at 6:13 AM, Roland McGrath wrote:

> Here I think the reasonable thing to do is:
> 
> /* A machine-specific version can define SPIN_LOCK_READS_BETWEEN_CMPXCHG
>   to the number of plain reads that it's optimal to spin on between uses
>   of atomic_compare_and_exchange_val_acq.  If spinning forever is optimal
>   then use -1.  If no plain reads here would ever be optimal, use 0.  */
> #ifndef SPIN_LOCK_READS_BETWEEN_CMPXCHG
> # warning machine-dependent file should define SPIN_LOCK_READS_BETWEEN_CMPXCHG
> # define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
> #endif
> 
> Then ARM et al can do:
> 
> /* Machine-dependent rationale about the selection of this value.  */
> #define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
> #include <nptl/pthread_spin_lock.c>
> 
> while Teil will use -1.
> 
>> +      if (PTHREAD_SPIN_LOCK_WAIT)
> 
> Don't use implicit boolean coercion.
> Use "if (SPIN_LOCK_READS_BETWEEN_CMPXCHG >= 0)".
> 
>> +	{
>> +	  int wait = PTHREAD_SPIN_LOCK_WAIT;
>> +
>> +	  while (*lock != 0 && --wait)
>> +	    ;
> 
> Write it:
> 	while (wait > 0 && *lock != 0)
> 	  --wait;
> 
> That handles the SPIN_LOCK_READS_BETWEEN_CMPXCHG==0 case implicitly,
> avoids the ugly empty statement, and doesn't use implicit coercion.

OK, the updated patch attached.  Is this what you had in mind?

As before the patch touches ARM, HPPA, M68K and MIPS.

The patch also adds another optimization to use atomic_exchange_acq for the first attempt at acquiring the lock.  For a free lock atomic_exchange is, generally, faster; while atomic_compare_and_exchange is better for waiting on a contended lock.  Same rationale applies to pthread_spin_trylock.

This patch builds on MIPS and regression testing is in progress.  OK to apply is tests are fine?

Thank you,

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics


Add generic versions of pthread_spin_lock and pthread_spin_trylock.

	2012-07-09  Maxim Kuvyrkov  <maxim@codesourcery.com>

	* nptl/pthread_spin_lock.c: New file.
	* nptl/pthread_spin_trylock.c: New file.

	ports/
	* sysdeps/arm/nptl/pthread_spin_lock.c: Use generic code.
	* sysdeps/arm/nptl/pthread_spin_trylock.c: Remove, use generic version.

	* sysdeps/hppa/nptl/pthread_spin_lock.c: Use generic code.
	* sysdeps/hppa/nptl/pthread_spin_trylock.c: Remove, use generic version.

	* sysdeps/m68k/nptl/pthread_spin_lock.c: Use generic code.
	* sysdeps/m68k/nptl/pthread_spin_trylock.c: Remove, use generic version.

	* sysdeps/mips/nptl/pthread_spin_lock.S: Remove, use generic version.
	* sysdeps/mips/nptl/pthread_spin_lock.c: New file.
	* sysdeps/mips/nptl/pthread_spin_trylock.S: Remove, use generic version.
---
 nptl/pthread_spin_lock.c                       |   69 ++++++++++++++++++++++++
 nptl/pthread_spin_trylock.c                    |   27 +++++++++
 ports/sysdeps/arm/nptl/pthread_spin_lock.c     |   16 +-----
 ports/sysdeps/arm/nptl/pthread_spin_trylock.c  |   26 ---------
 ports/sysdeps/hppa/nptl/pthread_spin_lock.c    |   24 +-------
 ports/sysdeps/hppa/nptl/pthread_spin_trylock.c |   33 -----------
 ports/sysdeps/m68k/nptl/pthread_spin_lock.c    |   16 +-----
 ports/sysdeps/m68k/nptl/pthread_spin_trylock.c |   27 ---------
 ports/sysdeps/mips/nptl/pthread_spin_lock.S    |   36 ------------
 ports/sysdeps/mips/nptl/pthread_spin_lock.c    |   19 +++++++
 ports/sysdeps/mips/nptl/pthread_spin_trylock.S |   40 --------------
 11 files changed, 124 insertions(+), 209 deletions(-)
 create mode 100644 nptl/pthread_spin_lock.c
 create mode 100644 nptl/pthread_spin_trylock.c
 delete mode 100644 ports/sysdeps/arm/nptl/pthread_spin_trylock.c
 delete mode 100644 ports/sysdeps/hppa/nptl/pthread_spin_trylock.c
 delete mode 100644 ports/sysdeps/m68k/nptl/pthread_spin_trylock.c
 delete mode 100644 ports/sysdeps/mips/nptl/pthread_spin_lock.S
 create mode 100644 ports/sysdeps/mips/nptl/pthread_spin_lock.c
 delete mode 100644 ports/sysdeps/mips/nptl/pthread_spin_trylock.S

diff --git a/nptl/pthread_spin_lock.c b/nptl/pthread_spin_lock.c
new file mode 100644
index 0000000..91733fb
--- /dev/null
+++ b/nptl/pthread_spin_lock.c
@@ -0,0 +1,69 @@
+/* pthread_spin_lock -- lock a spin lock.  Generic version.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <atomic.h>
+#include "pthreadP.h"
+
+/* A machine-specific version can define SPIN_LOCK_READS_BETWEEN_CMPXCHG
+  to the number of plain reads that it's optimal to spin on between uses
+  of atomic_compare_and_exchange_val_acq.  If spinning forever is optimal
+  then use -1.  If no plain reads here would ever be optimal, use 0.  */
+#ifndef SPIN_LOCK_READS_BETWEEN_CMPXCHG
+# warning machine-dependent file should define SPIN_LOCK_READS_BETWEEN_CMPXCHG
+# define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
+#endif
+
+int
+pthread_spin_lock (pthread_spinlock_t *lock)
+{
+  /* atomic_exchange usually takes less instructions than
+     atomic_compare_and_exchange.  On the other hand,
+     atomic_compare_and_exchange potentially generates less bus traffic
+     when the lock is locked.
+     We assume that the first try mostly will be successful, and we use
+     atomic_exchange.  For the subsequent tries we use
+     atomic_compare_and_exchange.  */
+  if (atomic_exchange_acq (lock, 1) == 0)
+    return 0;
+
+  do
+    {
+      /* The lock is contended and we need to wait.  Going straight back
+	 to cmpxchg is not a good idea on many targets as that will force
+	 expensive memory synchronizations among processors and penalize other
+	 running threads.
+	 On the other hand, we do want to update memory state on the local core
+	 once in a while to avoid spinning indefinitely until some event that
+	 will happen to update local memory as a side-effect.  */
+      if (SPIN_LOCK_READS_BETWEEN_CMPXCHG >= 0)
+	{
+	  int wait = SPIN_LOCK_READS_BETWEEN_CMPXCHG;
+
+	  while (*lock != 0 && wait > 0)
+	    --wait;
+	}
+      else
+	{
+	  while (*lock != 0)
+	    ;
+	}
+    }
+  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0);
+
+  return 0;
+}
diff --git a/nptl/pthread_spin_trylock.c b/nptl/pthread_spin_trylock.c
new file mode 100644
index 0000000..db9372c
--- /dev/null
+++ b/nptl/pthread_spin_trylock.c
@@ -0,0 +1,27 @@
+/* pthread_spin_trylock -- trylock a spin lock.  Generic version.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <atomic.h>
+#include "pthreadP.h"
+
+int
+pthread_spin_trylock (pthread_spinlock_t *lock)
+{
+  return atomic_exchange_acq (lock, 1) ? EBUSY : 0;
+}
diff --git a/ports/sysdeps/arm/nptl/pthread_spin_lock.c b/ports/sysdeps/arm/nptl/pthread_spin_lock.c
index 3a23bd3..7ccfbfa 100644
--- a/ports/sysdeps/arm/nptl/pthread_spin_lock.c
+++ b/ports/sysdeps/arm/nptl/pthread_spin_lock.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,15 +15,5 @@
    License along with the GNU C Library.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <atomic.h>
-#include "pthreadP.h"
-
-int
-pthread_spin_lock (pthread_spinlock_t *lock)
-{
-  while (atomic_compare_and_exchange_val_acq (lock, 1, 0) != 0)
-   while (*lock != 0)
-    ;
-
-  return 0;
-}
+#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
+#include_next <nptl/pthread_spin_lock.c>
diff --git a/ports/sysdeps/arm/nptl/pthread_spin_trylock.c b/ports/sysdeps/arm/nptl/pthread_spin_trylock.c
deleted file mode 100644
index 7d31180..0000000
--- a/ports/sysdeps/arm/nptl/pthread_spin_trylock.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* Copyright (C) 2008 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <errno.h>
-#include <atomic.h>
-#include "pthreadP.h"
-
-int
-pthread_spin_trylock (pthread_spinlock_t *lock)
-{
-  return atomic_compare_and_exchange_val_acq (lock, 1, 0) ? EBUSY : 0;
-}
diff --git a/ports/sysdeps/hppa/nptl/pthread_spin_lock.c b/ports/sysdeps/hppa/nptl/pthread_spin_lock.c
index bcf2240..7c86768 100644
--- a/ports/sysdeps/hppa/nptl/pthread_spin_lock.c
+++ b/ports/sysdeps/hppa/nptl/pthread_spin_lock.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2005-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,23 +15,5 @@
    License along with the GNU C Library.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <atomic.h>
-#include "pthreadP.h"
-
-int
-pthread_spin_lock (pthread_spinlock_t *lock)
-{
-#if 0
-  volatile unsigned int *addr = __ldcw_align (lock);
-
-  while (__ldcw (addr) == 0)
-    while (*addr == 0) ;
-
-  return 0;
-#endif
-
-  while (atomic_compare_and_exchange_val_acq(lock, 1, 0) == 1)
-    while (*lock == 1);
-  
-  return 0;
-}
+#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
+#include_next <nptl/pthread_spin_lock.c>
diff --git a/ports/sysdeps/hppa/nptl/pthread_spin_trylock.c b/ports/sysdeps/hppa/nptl/pthread_spin_trylock.c
deleted file mode 100644
index a802861..0000000
--- a/ports/sysdeps/hppa/nptl/pthread_spin_trylock.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <errno.h>
-#include <atomic.h>
-#include "pthreadP.h"
-
-int
-pthread_spin_trylock (pthread_spinlock_t *lock)
-{
-#if 0
-  volatile unsigned int *a = __ldcw_align (lock);
-
-  return __ldcw (a) ? 0 : EBUSY;
-#endif
-
-  return atomic_compare_and_exchange_val_acq(lock, 1, 0) ? EBUSY : 0;
-
-}
diff --git a/ports/sysdeps/m68k/nptl/pthread_spin_lock.c b/ports/sysdeps/m68k/nptl/pthread_spin_lock.c
index 90a8262..710e48b 100644
--- a/ports/sysdeps/m68k/nptl/pthread_spin_lock.c
+++ b/ports/sysdeps/m68k/nptl/pthread_spin_lock.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010 Free Software Foundation, Inc.
+/* Copyright (C) 2010-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
 
@@ -16,15 +16,5 @@
    License along with the GNU C Library.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <atomic.h>
-#include "pthreadP.h"
-
-int
-pthread_spin_lock (pthread_spinlock_t *lock)
-{
-  while (atomic_compare_and_exchange_val_acq(lock, 1, 0) != 0)
-   while (*lock != 0)
-    ;
-
-  return 0;
-}
+#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
+#include_next <nptl/pthread_spin_lock.c>
diff --git a/ports/sysdeps/m68k/nptl/pthread_spin_trylock.c b/ports/sysdeps/m68k/nptl/pthread_spin_trylock.c
deleted file mode 100644
index f4b0c0d..0000000
--- a/ports/sysdeps/m68k/nptl/pthread_spin_trylock.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Copyright (C) 2010 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <errno.h>
-#include <atomic.h>
-#include "pthreadP.h"
-
-int
-pthread_spin_trylock (pthread_spinlock_t *lock)
-{
-  return atomic_compare_and_exchange_val_acq(lock, 1, 0) ? EBUSY : 0;
-}
diff --git a/ports/sysdeps/mips/nptl/pthread_spin_lock.S b/ports/sysdeps/mips/nptl/pthread_spin_lock.S
deleted file mode 100644
index a8504f1..0000000
--- a/ports/sysdeps/mips/nptl/pthread_spin_lock.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sys/asm.h>
-#include <sysdep.h>
-#include <sgidefs.h>
-
-ENTRY (pthread_spin_lock)
-	.set	push
-#if _MIPS_SIM == _ABIO32
-	.set	mips2
-#endif
-1:	ll	a2, 0(a0)
-	li	a1, 1
-	bnez	a2, 1b
-	sc	a1, 0(a0)
-	beqz	a1, 1b
-	MIPS_SYNC
-	.set	pop
-	li	v0, 0
-	ret
-PSEUDO_END (pthread_spin_lock)
diff --git a/ports/sysdeps/mips/nptl/pthread_spin_lock.c b/ports/sysdeps/mips/nptl/pthread_spin_lock.c
new file mode 100644
index 0000000..83ade5f
--- /dev/null
+++ b/ports/sysdeps/mips/nptl/pthread_spin_lock.c
@@ -0,0 +1,19 @@
+/* Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
+#include_next <nptl/pthread_spin_lock.c>
diff --git a/ports/sysdeps/mips/nptl/pthread_spin_trylock.S b/ports/sysdeps/mips/nptl/pthread_spin_trylock.S
deleted file mode 100644
index 95b55c3..0000000
--- a/ports/sysdeps/mips/nptl/pthread_spin_trylock.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sys/asm.h>
-#include <sysdep.h>
-#define _ERRNO_H 1
-#include <bits/errno.h>
-#include <sgidefs.h>
-
-ENTRY (pthread_spin_trylock)
-	.set	push
-#if _MIPS_SIM == _ABIO32
-	.set	mips2
-#endif
-	ll	a2, 0(a0)
-	li	a1, 1
-	bnez	a2, 1f
-	sc	a1, 0(a0)
-	beqz	a1, 1f
-	MIPS_SYNC
-	.set	pop
-	li	v0, 0
-	ret
-1:	li	v0, EBUSY
-	ret
-PSEUDO_END (pthread_spin_trylock)
-- 
1.7.0.4


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15  3:17                       ` Maxim Kuvyrkov
@ 2012-08-15 16:27                         ` Roland McGrath
  2012-08-15 16:39                           ` Maxim Kuvyrkov
  2012-08-15 16:40                         ` Joseph S. Myers
                                           ` (3 subsequent siblings)
  4 siblings, 1 reply; 34+ messages in thread
From: Roland McGrath @ 2012-08-15 16:27 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Andrew Haley, David Miller, Joseph S. Myers, Richard Sandiford,
	libc-ports, GLIBC Devel, Chris Metcalf

> 	2012-07-09  Maxim Kuvyrkov  <maxim@codesourcery.com>

The indentation here is funny so I don't know if that's just how you were
quoting the fragment in the message or if you meant to indent the header
line in ChangeLog (which you should not).  Remember to use the date of
commit in the header line.

> 	* nptl/pthread_spin_lock.c: New file.
> 	* nptl/pthread_spin_trylock.c: New file.

These go in nptl/ChangeLog and don't get the "nptl/" prefix.

> 	ports/

These go in the separate ChangeLog.{arm,hppa,m68k,mips} files.
From the way you quoted the fragments it wasn't clear if that's
what you meant.

> +#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
> +#include_next <nptl/pthread_spin_lock.c>

You don't use #include_next when you're giving the exact full file name
like this.  Just use #include.


Aside from the log nits, the generic additions look fine to me.  You can
commit those as soon as one of the affected machine's maintainers agrees.
For each machine, the changes need to be approved by that machine's
maintainer.  I'm not the maintainer for any those machines, but I would be
inclined to insist that each definition of SPIN_LOCK_READS_BETWEEN_CMPXCHG
have a comment giving the machine-specific rationale for the choice of value.
I wouldn't wait for all the machine maintainers before committing the
generic parts and whichever machine(s) are approved first.  Each machine's
bits can come along later as those approvals arrive.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15 16:27                         ` Roland McGrath
@ 2012-08-15 16:39                           ` Maxim Kuvyrkov
  2012-08-15 16:44                             ` Roland McGrath
  2012-08-15 17:05                             ` Jeff Law
  0 siblings, 2 replies; 34+ messages in thread
From: Maxim Kuvyrkov @ 2012-08-15 16:39 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Andrew Haley, David Miller, Joseph S. Myers, Richard Sandiford,
	libc-ports, GLIBC Devel, Chris Metcalf

On 16/08/2012, at 4:26 AM, Roland McGrath wrote:

>> 	2012-07-09  Maxim Kuvyrkov  <maxim@codesourcery.com>
> 
> The indentation here is funny so I don't know if that's just how you were
> quoting the fragment in the message or if you meant to indent the header
> line in ChangeLog (which you should not).  Remember to use the date of
> commit in the header line.

This is just an indentation artifact in git commit log.  It will be properly formatted in the actual ChangeLog file.

> 
>> 	* nptl/pthread_spin_lock.c: New file.
>> 	* nptl/pthread_spin_trylock.c: New file.
> 
> These go in nptl/ChangeLog and don't get the "nptl/" prefix.

This I didn't notice.  Thanks.

> 
>> 	ports/
> 
> These go in the separate ChangeLog.{arm,hppa,m68k,mips} files.
> From the way you quoted the fragments it wasn't clear if that's
> what you meant.

This I know.

> 
>> +#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
>> +#include_next <nptl/pthread_spin_lock.c>
> 
> You don't use #include_next when you're giving the exact full file name
> like this.  Just use #include.

The machine-specific pthread_spin_lock.c files go to ports/sysdeps/<machine>/nptl/pthread_spin_lock.c, which comes first in sysdeps search path before the generic nptl/pthread_spin_lock.c.  So it is either '#include_next <nptl/pthread_spin_lock.c>' or '#include "../../../../nptl/pthread_spin_lock.c"'.  The former looks less ugly than the later.

> 
> 
> Aside from the log nits, the generic additions look fine to me.  You can
> commit those as soon as one of the affected machine's maintainers agrees.
> For each machine, the changes need to be approved by that machine's
> maintainer.  I'm not the maintainer for any those machines, but I would be
> inclined to insist that each definition of SPIN_LOCK_READS_BETWEEN_CMPXCHG
> have a comment giving the machine-specific rationale for the choice of value.
> I wouldn't wait for all the machine maintainers before committing the
> generic parts and whichever machine(s) are approved first.  Each machine's
> bits can come along later as those approvals arrive.

OK.

Thanks for the review,

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15  3:17                       ` Maxim Kuvyrkov
  2012-08-15 16:27                         ` Roland McGrath
@ 2012-08-15 16:40                         ` Joseph S. Myers
  2012-08-15 16:43                         ` Carlos O'Donell
                                           ` (2 subsequent siblings)
  4 siblings, 0 replies; 34+ messages in thread
From: Joseph S. Myers @ 2012-08-15 16:40 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Roland McGrath, Andrew Haley, David Miller, Richard Sandiford,
	libc-ports, GLIBC Devel, Chris Metcalf

On Wed, 15 Aug 2012, Maxim Kuvyrkov wrote:

> As before the patch touches ARM, HPPA, M68K and MIPS.

The ARM and MIPS changes are OK.  (ARM and MIPS systems are too diverse 
for it to be particularly meaningful to give a rationale for a particular 
arbitrary choice of SPIN_LOCK_READS_BETWEEN_CMPXCHG.)

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15  3:17                       ` Maxim Kuvyrkov
  2012-08-15 16:27                         ` Roland McGrath
  2012-08-15 16:40                         ` Joseph S. Myers
@ 2012-08-15 16:43                         ` Carlos O'Donell
  2012-08-15 22:00                         ` Andreas Schwab
  2012-08-16 10:21                         ` Torvald Riegel
  4 siblings, 0 replies; 34+ messages in thread
From: Carlos O'Donell @ 2012-08-15 16:43 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Roland McGrath, Andrew Haley, David Miller, Joseph S. Myers,
	Richard Sandiford, libc-ports, GLIBC Devel, Chris Metcalf

On 8/14/2012 11:16 PM, Maxim Kuvyrkov wrote:
> On 26/07/2012, at 6:13 AM, Roland McGrath wrote:
> 
>> Here I think the reasonable thing to do is:
>>
>> /* A machine-specific version can define SPIN_LOCK_READS_BETWEEN_CMPXCHG
>>   to the number of plain reads that it's optimal to spin on between uses
>>   of atomic_compare_and_exchange_val_acq.  If spinning forever is optimal
>>   then use -1.  If no plain reads here would ever be optimal, use 0.  */
>> #ifndef SPIN_LOCK_READS_BETWEEN_CMPXCHG
>> # warning machine-dependent file should define SPIN_LOCK_READS_BETWEEN_CMPXCHG
>> # define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
>> #endif
>>
>> Then ARM et al can do:
>>
>> /* Machine-dependent rationale about the selection of this value.  */
>> #define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
>> #include <nptl/pthread_spin_lock.c>
>>
>> while Teil will use -1.
>>
>>> +      if (PTHREAD_SPIN_LOCK_WAIT)
>>
>> Don't use implicit boolean coercion.
>> Use "if (SPIN_LOCK_READS_BETWEEN_CMPXCHG >= 0)".
>>
>>> +	{
>>> +	  int wait = PTHREAD_SPIN_LOCK_WAIT;
>>> +
>>> +	  while (*lock != 0 && --wait)
>>> +	    ;
>>
>> Write it:
>> 	while (wait > 0 && *lock != 0)
>> 	  --wait;
>>
>> That handles the SPIN_LOCK_READS_BETWEEN_CMPXCHG==0 case implicitly,
>> avoids the ugly empty statement, and doesn't use implicit coercion.
> 
> OK, the updated patch attached.  Is this what you had in mind?
> 
> As before the patch touches ARM, HPPA, M68K and MIPS.
> 
> The patch also adds another optimization to use atomic_exchange_acq for the first attempt at acquiring the lock.  For a free lock atomic_exchange is, generally, faster; while atomic_compare_and_exchange is better for waiting on a contended lock.  Same rationale applies to pthread_spin_trylock.
> 
> This patch builds on MIPS and regression testing is in progress.  OK to apply is tests are fine?
> 
> Thank you,
> 
> --
> Maxim Kuvyrkov
> CodeSourcery / Mentor Graphics
> 
> 
> Add generic versions of pthread_spin_lock and pthread_spin_trylock.
> 
> 	2012-07-09  Maxim Kuvyrkov  <maxim@codesourcery.com>
> 
> 	* nptl/pthread_spin_lock.c: New file.
> 	* nptl/pthread_spin_trylock.c: New file.
> 
> 	ports/
> 	* sysdeps/arm/nptl/pthread_spin_lock.c: Use generic code.
> 	* sysdeps/arm/nptl/pthread_spin_trylock.c: Remove, use generic version.
> 
> 	* sysdeps/hppa/nptl/pthread_spin_lock.c: Use generic code.
> 	* sysdeps/hppa/nptl/pthread_spin_trylock.c: Remove, use generic version.
> 
> 	* sysdeps/m68k/nptl/pthread_spin_lock.c: Use generic code.
> 	* sysdeps/m68k/nptl/pthread_spin_trylock.c: Remove, use generic version.
> 
> 	* sysdeps/mips/nptl/pthread_spin_lock.S: Remove, use generic version.
> 	* sysdeps/mips/nptl/pthread_spin_lock.c: New file.
> 	* sysdeps/mips/nptl/pthread_spin_trylock.S: Remove, use generic version.
> ---
>  nptl/pthread_spin_lock.c                       |   69 ++++++++++++++++++++++++
>  nptl/pthread_spin_trylock.c                    |   27 +++++++++
>  ports/sysdeps/arm/nptl/pthread_spin_lock.c     |   16 +-----
>  ports/sysdeps/arm/nptl/pthread_spin_trylock.c  |   26 ---------
>  ports/sysdeps/hppa/nptl/pthread_spin_lock.c    |   24 +-------
>  ports/sysdeps/hppa/nptl/pthread_spin_trylock.c |   33 -----------
>  ports/sysdeps/m68k/nptl/pthread_spin_lock.c    |   16 +-----
>  ports/sysdeps/m68k/nptl/pthread_spin_trylock.c |   27 ---------
>  ports/sysdeps/mips/nptl/pthread_spin_lock.S    |   36 ------------
>  ports/sysdeps/mips/nptl/pthread_spin_lock.c    |   19 +++++++
>  ports/sysdeps/mips/nptl/pthread_spin_trylock.S |   40 --------------
>  11 files changed, 124 insertions(+), 209 deletions(-)
>  create mode 100644 nptl/pthread_spin_lock.c
>  create mode 100644 nptl/pthread_spin_trylock.c
>  delete mode 100644 ports/sysdeps/arm/nptl/pthread_spin_trylock.c
>  delete mode 100644 ports/sysdeps/hppa/nptl/pthread_spin_trylock.c
>  delete mode 100644 ports/sysdeps/m68k/nptl/pthread_spin_trylock.c
>  delete mode 100644 ports/sysdeps/mips/nptl/pthread_spin_lock.S
>  create mode 100644 ports/sysdeps/mips/nptl/pthread_spin_lock.c
>  delete mode 100644 ports/sysdeps/mips/nptl/pthread_spin_trylock.S
> 
> diff --git a/ports/sysdeps/hppa/nptl/pthread_spin_lock.c b/ports/sysdeps/hppa/nptl/pthread_spin_lock.c
> index bcf2240..7c86768 100644
> --- a/ports/sysdeps/hppa/nptl/pthread_spin_lock.c
> +++ b/ports/sysdeps/hppa/nptl/pthread_spin_lock.c
> @@ -1,4 +1,4 @@
> -/* Copyright (C) 2005 Free Software Foundation, Inc.
> +/* Copyright (C) 2005-2012 Free Software Foundation, Inc.
>     This file is part of the GNU C Library.
>  
>     The GNU C Library is free software; you can redistribute it and/or
> @@ -15,23 +15,5 @@
>     License along with the GNU C Library.  If not, see
>     <http://www.gnu.org/licenses/>.  */
>  
> -#include <atomic.h>
> -#include "pthreadP.h"
> -
> -int
> -pthread_spin_lock (pthread_spinlock_t *lock)
> -{
> -#if 0
> -  volatile unsigned int *addr = __ldcw_align (lock);
> -
> -  while (__ldcw (addr) == 0)
> -    while (*addr == 0) ;
> -
> -  return 0;
> -#endif
> -
> -  while (atomic_compare_and_exchange_val_acq(lock, 1, 0) == 1)
> -    while (*lock == 1);
> -  
> -  return 0;
> -}
> +#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
> +#include_next <nptl/pthread_spin_lock.c>

OK for hppa.

> diff --git a/ports/sysdeps/hppa/nptl/pthread_spin_trylock.c b/ports/sysdeps/hppa/nptl/pthread_spin_trylock.c
> deleted file mode 100644
> index a802861..0000000
> --- a/ports/sysdeps/hppa/nptl/pthread_spin_trylock.c
> +++ /dev/null
> @@ -1,33 +0,0 @@
> -/* Copyright (C) 2005 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library.  If not, see
> -   <http://www.gnu.org/licenses/>.  */
> -
> -#include <errno.h>
> -#include <atomic.h>
> -#include "pthreadP.h"
> -
> -int
> -pthread_spin_trylock (pthread_spinlock_t *lock)
> -{
> -#if 0
> -  volatile unsigned int *a = __ldcw_align (lock);
> -
> -  return __ldcw (a) ? 0 : EBUSY;
> -#endif
> -
> -  return atomic_compare_and_exchange_val_acq(lock, 1, 0) ? EBUSY : 0;
> -
> -}

OK for hppa.

Cheers,
Carlos.
-- 
Carlos O'Donell
Mentor Graphics / CodeSourcery
carlos_odonell@mentor.com
carlos@codesourcery.com
+1 (613) 963 1026

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15 16:39                           ` Maxim Kuvyrkov
@ 2012-08-15 16:44                             ` Roland McGrath
  2012-08-15 16:53                               ` Maxim Kuvyrkov
  2012-08-15 17:05                             ` Jeff Law
  1 sibling, 1 reply; 34+ messages in thread
From: Roland McGrath @ 2012-08-15 16:44 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Andrew Haley, David Miller, Joseph S. Myers, Richard Sandiford,
	libc-ports, GLIBC Devel, Chris Metcalf

> This is just an indentation artifact in git commit log.  It will be
> properly formatted in the actual ChangeLog file.

OK.  Note that we don't use long things like ChangeLog fragments for the
git commit messages, just a short subject line.

> The machine-specific pthread_spin_lock.c files go to
> ports/sysdeps/<machine>/nptl/pthread_spin_lock.c, which comes first in
> sysdeps search path before the generic nptl/pthread_spin_lock.c.  So it
> is either '#include_next <nptl/pthread_spin_lock.c>' or '#include
> "../../../../nptl/pthread_spin_lock.c"'.  The former looks less ugly than
> the later.

Hmm.  Last I knew #include_next from a main source file didn't work as you
expect.  Did GCC change?


Thanks,
Roland

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15 16:44                             ` Roland McGrath
@ 2012-08-15 16:53                               ` Maxim Kuvyrkov
  2012-08-15 16:56                                 ` Roland McGrath
  0 siblings, 1 reply; 34+ messages in thread
From: Maxim Kuvyrkov @ 2012-08-15 16:53 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Andrew Haley, David Miller, Joseph S. Myers, Richard Sandiford,
	libc-ports, GLIBC Devel, Chris Metcalf

On 16/08/2012, at 4:43 AM, Roland McGrath wrote:

>> This is just an indentation artifact in git commit log.  It will be
>> properly formatted in the actual ChangeLog file.
> 
> OK.  Note that we don't use long things like ChangeLog fragments for the
> git commit messages, just a short subject line.
> 
>> The machine-specific pthread_spin_lock.c files go to
>> ports/sysdeps/<machine>/nptl/pthread_spin_lock.c, which comes first in
>> sysdeps search path before the generic nptl/pthread_spin_lock.c.  So it
>> is either '#include_next <nptl/pthread_spin_lock.c>' or '#include
>> "../../../../nptl/pthread_spin_lock.c"'.  The former looks less ugly than
>> the later.
> 
> Hmm.  Last I knew #include_next from a main source file didn't work as you
> expect.  Did GCC change?

This works at least with GCC 4.4 and GCC 4.8, albeit with a warning:

../ports/sysdeps/mips/nptl/pthread_spin_lock.c:19:2: warning: #include_next in primary source file [enabled by default]
 #include_next <nptl/pthread_spin_lock.c>
  ^

Given that previous versions of GCC can, potentially, fail to compile this, I would rather use the "../../../../nptl/pthread_spin_lock.c" version.  Any alternative suggestions?

Thanks,

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15 16:53                               ` Maxim Kuvyrkov
@ 2012-08-15 16:56                                 ` Roland McGrath
  2012-08-15 17:05                                   ` Maxim Kuvyrkov
  0 siblings, 1 reply; 34+ messages in thread
From: Roland McGrath @ 2012-08-15 16:56 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Andrew Haley, David Miller, Joseph S. Myers, Richard Sandiford,
	libc-ports, GLIBC Devel, Chris Metcalf

> This works at least with GCC 4.4 and GCC 4.8, albeit with a warning:
> 
> ../ports/sysdeps/mips/nptl/pthread_spin_lock.c:19:2: warning: #include_next in primary source file [enabled by default]
>  #include_next <nptl/pthread_spin_lock.c>
>   ^

Right.  We don't want that.

> Given that previous versions of GCC can, potentially, fail to compile
> this, I would rather use the "../../../../nptl/pthread_spin_lock.c"
> version.  Any alternative suggestions?

Anything that depends on the location of the source file is a bad idea.
It will break if things get moved around, and perhaps nonobviously.
You could use <sysdeps/../nptl/foo.c> but it merits a comment about why
the more common and simple <nptl/foo.c> is not being used.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15 16:39                           ` Maxim Kuvyrkov
  2012-08-15 16:44                             ` Roland McGrath
@ 2012-08-15 17:05                             ` Jeff Law
  2012-08-15 17:11                               ` Roland McGrath
  1 sibling, 1 reply; 34+ messages in thread
From: Jeff Law @ 2012-08-15 17:05 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Roland McGrath, Andrew Haley, David Miller, Joseph S. Myers,
	Richard Sandiford, libc-ports, GLIBC Devel, Chris Metcalf

On 08/15/2012 10:39 AM, Maxim Kuvyrkov wrote:
>>
>>> +#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
>>> +#include_next <nptl/pthread_spin_lock.c>
I haven't followed this discussion closely,so if I'm bringing up an 
issue which you're already dealing with or have plans to deal with, my 
apologies.

I'm aware these constants can vary on a per-architecture basis; however, 
is any provision made for varying them within a particular architecture.

The reason I ask is once of Red Hat's partners raised an issue around 
adaptive mutexes where the upper bound for the spin count was less than 
a cache round trip on that partner's modern processors.

It seems to me that SPIN_LOCK_READS_BETWEEN_CMPXCHG may have the 
property that varying it at runtime based on the characteristics of the 
particular chip running the application might be worthwhile.

And, yes, there are some vague plans for Red Hat to write and submit 
code for the adaptive mutex issue :-)

jeff


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15 16:56                                 ` Roland McGrath
@ 2012-08-15 17:05                                   ` Maxim Kuvyrkov
  0 siblings, 0 replies; 34+ messages in thread
From: Maxim Kuvyrkov @ 2012-08-15 17:05 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Andrew Haley, David Miller, Joseph S. Myers, Richard Sandiford,
	libc-ports, GLIBC Devel, Chris Metcalf

On 16/08/2012, at 4:56 AM, Roland McGrath wrote:

>> This works at least with GCC 4.4 and GCC 4.8, albeit with a warning:
>> 
>> ../ports/sysdeps/mips/nptl/pthread_spin_lock.c:19:2: warning: #include_next in primary source file [enabled by default]
>> #include_next <nptl/pthread_spin_lock.c>
>>  ^
> 
> Right.  We don't want that.
> 
>> Given that previous versions of GCC can, potentially, fail to compile
>> this, I would rather use the "../../../../nptl/pthread_spin_lock.c"
>> version.  Any alternative suggestions?
> 
> Anything that depends on the location of the source file is a bad idea.
> It will break if things get moved around, and perhaps nonobviously.
> You could use <sysdeps/../nptl/foo.c> but it merits a comment about why
> the more common and simple <nptl/foo.c> is not being used.

Yep, this looks good.  I'll test this and add comments to all affected places.

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15 17:05                             ` Jeff Law
@ 2012-08-15 17:11                               ` Roland McGrath
  2012-08-15 17:24                                 ` Jeff Law
  0 siblings, 1 reply; 34+ messages in thread
From: Roland McGrath @ 2012-08-15 17:11 UTC (permalink / raw)
  To: Jeff Law
  Cc: Maxim Kuvyrkov, Andrew Haley, David Miller, Joseph S. Myers,
	Richard Sandiford, libc-ports, GLIBC Devel, Chris Metcalf

> It seems to me that SPIN_LOCK_READS_BETWEEN_CMPXCHG may have the 
> property that varying it at runtime based on the characteristics of the 
> particular chip running the application might be worthwhile.

Nothing prevents a machine from defining the macro to use a global variable
that is set by some machine-specific initialization code.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15 17:11                               ` Roland McGrath
@ 2012-08-15 17:24                                 ` Jeff Law
  0 siblings, 0 replies; 34+ messages in thread
From: Jeff Law @ 2012-08-15 17:24 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Maxim Kuvyrkov, Andrew Haley, David Miller, Joseph S. Myers,
	Richard Sandiford, libc-ports, GLIBC Devel, Chris Metcalf

On 08/15/2012 11:10 AM, Roland McGrath wrote:
>> It seems to me that SPIN_LOCK_READS_BETWEEN_CMPXCHG may have the
>> property that varying it at runtime based on the characteristics of the
>> particular chip running the application might be worthwhile.
>
> Nothing prevents a machine from defining the macro to use a global variable
> that is set by some machine-specific initialization code.
Understood.  Just wanted to raise the issue.  No pressing need to 
resolve it now, but it's on Red Hat's radar and now it's on the 
community's radar :-)

jeff

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15  3:17                       ` Maxim Kuvyrkov
                                           ` (2 preceding siblings ...)
  2012-08-15 16:43                         ` Carlos O'Donell
@ 2012-08-15 22:00                         ` Andreas Schwab
  2012-08-16 10:21                         ` Torvald Riegel
  4 siblings, 0 replies; 34+ messages in thread
From: Andreas Schwab @ 2012-08-15 22:00 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Roland McGrath, Andrew Haley, David Miller, Joseph S. Myers,
	Richard Sandiford, libc-ports, GLIBC Devel, Chris Metcalf

Maxim Kuvyrkov <maxim@codesourcery.com> writes:

> 	* sysdeps/m68k/nptl/pthread_spin_lock.c: Use generic code.
> 	* sysdeps/m68k/nptl/pthread_spin_trylock.c: Remove, use generic version.

OK.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-15  3:17                       ` Maxim Kuvyrkov
                                           ` (3 preceding siblings ...)
  2012-08-15 22:00                         ` Andreas Schwab
@ 2012-08-16 10:21                         ` Torvald Riegel
  2012-08-16 12:40                           ` Chris Metcalf
  2012-08-22 23:16                           ` Maxim Kuvyrkov
  4 siblings, 2 replies; 34+ messages in thread
From: Torvald Riegel @ 2012-08-16 10:21 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Roland McGrath, Andrew Haley, David Miller, Joseph S. Myers,
	Richard Sandiford, libc-ports, GLIBC Devel, Chris Metcalf

On Wed, 2012-08-15 at 15:16 +1200, Maxim Kuvyrkov wrote:
> On 26/07/2012, at 6:13 AM, Roland McGrath wrote: 
> > /* Machine-dependent rationale about the selection of this value.  */
> > #define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000

This looks like an arbitrary choice.  I don't want to complain about
this patch (whose goal is to just unify similar code), but let me use it
as an example.
Elsewhere in the thread, you (IIRC) mentioned that the assumption is
that a CAS is 100x slower than a load. IMO, this is a flawed assumption.
First, this has more dimensions than one instruction being slower than
another one: cache architecture, what other threads are doing and where
in the cache hierarchy/graph they are, the CAS HW implementation, etc.
Second, it's not really about the slow-down for the current thread when
executing a CAS; it's about what the CAS might do in terms of caching
and the latency at which you detect a free lock on ARM, as Andrew Haley
pointed out.  Third, not all machines in an architecture are similar; a
P4 cmpxchg performs much differently to a cmpxchg on a recent x86 CPU.
Fourth, there's no test for this assumption.

So, we will have to make such assumptions, but how do we make sure that
they are reasonable, and remain reasonable over time?  If we don't,
these will bit-rot, and performance might degrade over time (assuming
that the assumptions were initially correct, which might be hard in the
first place).  Is there a plan for this yet, or discussion about this?

> > while Teil will use -1.

Is there a plan to include a back-off component in this generic spin
lock?  (-1 would spin forever, but not do back-off.)


Torvald

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-16 10:21                         ` Torvald Riegel
@ 2012-08-16 12:40                           ` Chris Metcalf
  2012-08-22 23:16                           ` Maxim Kuvyrkov
  1 sibling, 0 replies; 34+ messages in thread
From: Chris Metcalf @ 2012-08-16 12:40 UTC (permalink / raw)
  To: Torvald Riegel
  Cc: Maxim Kuvyrkov, Roland McGrath, Andrew Haley, David Miller,
	Joseph S. Myers, Richard Sandiford, libc-ports, GLIBC Devel

On 8/16/2012 6:20 AM, Torvald Riegel wrote:
>> while Teil will use -1.
> Is there a plan to include a back-off component in this generic spin
> lock?  (-1 would spin forever, but not do back-off.)

Certainly one could imagine parameterizing a back-off loop with starting
point and ending point for a bounded exponential backoff calling
atomic_delay() in the loop (as Roland proposed a while back).  I expect
something like that would be usable for the tile architecture.

But my instinct is that it is too architecture-dependent to try to get this
right enough for all platforms, and the basic idea of a spinlock is simple
enough for each architecture to express that it's probably not a good
tradeoff.  In fact, I'm a little concerned that the proposed spinlock is
overdesigned as it is, with every platform providing the same macro
parameter to conditionalize it such that it looks exactly the same.  I'd
probably rather see a straight-up ARM implementation, and then just write
it such that HPPA, M68K, and MIPS can #include the ARM version if they want
to, rather than trying to make something that seems more complex than
necessary for the truly generic implementation, but not complex enough to
really be usable by every implementation.

But: bike-shedding.  If the maintainers of those platforms are happy,
that's fine with me.  :-)

-- 
Chris Metcalf, Tilera Corp.
http://www.tilera.com

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] Unify pthread_spin_[try]lock implementations.
  2012-08-16 10:21                         ` Torvald Riegel
  2012-08-16 12:40                           ` Chris Metcalf
@ 2012-08-22 23:16                           ` Maxim Kuvyrkov
  1 sibling, 0 replies; 34+ messages in thread
From: Maxim Kuvyrkov @ 2012-08-22 23:16 UTC (permalink / raw)
  To: Torvald Riegel
  Cc: Roland McGrath, Andrew Haley, David Miller, Joseph S. Myers,
	Richard Sandiford, libc-ports, GLIBC Devel, Chris Metcalf

On 16/08/2012, at 10:20 PM, Torvald Riegel wrote:

> On Wed, 2012-08-15 at 15:16 +1200, Maxim Kuvyrkov wrote:
>> On 26/07/2012, at 6:13 AM, Roland McGrath wrote: 
>>> /* Machine-dependent rationale about the selection of this value.  */
>>> #define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
> 
> This looks like an arbitrary choice.

It is.

>  I don't want to complain about
> this patch (whose goal is to just unify similar code), but let me use it
> as an example.
> Elsewhere in the thread, you (IIRC) mentioned that the assumption is
> that a CAS is 100x slower than a load. IMO, this is a flawed assumption.
> First, this has more dimensions than one instruction being slower than
> another one: cache architecture, what other threads are doing and where
> in the cache hierarchy/graph they are, the CAS HW implementation, etc.
> Second, it's not really about the slow-down for the current thread when
> executing a CAS; it's about what the CAS might do in terms of caching
> and the latency at which you detect a free lock on ARM, as Andrew Haley
> pointed out.  Third, not all machines in an architecture are similar; a
> P4 cmpxchg performs much differently to a cmpxchg on a recent x86 CPU.
> Fourth, there's no test for this assumption.

This are good arguments.

> 
> So, we will have to make such assumptions, but how do we make sure that
> they are reasonable, and remain reasonable over time?  If we don't,
> these will bit-rot, and performance might degrade over time (assuming
> that the assumptions were initially correct, which might be hard in the
> first place).  Is there a plan for this yet, or discussion about this?

The usual way how this kind of problem is solved in a free software project is that the current [arbitrary] constant causes a bottleneck on some application.  Then someone who cares about that application will investigate the problem and come to the mailing list with specific measurements and specific arguments why this constant should change.  If those arguments are reasonable and general enough, then the community will accept the change.  Then the cycle will continue.

In my opinion, really solving the heck out of a problem such as this one is not worthwhile.  I'm happy with the approximate solution that we arrived to, but I certainly encourage anyone who thinks that this is not good enough to step up and fix it for real and forever.

> 
>>> while Teil will use -1.
> 
> Is there a plan to include a back-off component in this generic spin
> lock?  (-1 would spin forever, but not do back-off.)

No.

Thank you,

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2012-08-22 23:16 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-14  4:36 [PATCH 2/3, MIPS] Rewrite MIPS' pthread_spin_[try]lock using __atomic_* builtins Maxim Kuvyrkov
2012-06-28 23:10 ` Joseph S. Myers
2012-07-11  5:58   ` [PATCH] Unify pthread_spin_[try]lock implementations Maxim Kuvyrkov
2012-07-11  8:15     ` Roland McGrath
2012-07-11  8:25       ` David Miller
2012-07-11  8:44         ` Andrew Haley
2012-07-11  8:54           ` Maxim Kuvyrkov
2012-07-11  9:02             ` Andrew Haley
2012-07-11  9:05               ` Maxim Kuvyrkov
2012-07-11 11:22                 ` Roland McGrath
2012-07-11 14:52                   ` Chris Metcalf
2012-07-11 22:16                   ` Maxim Kuvyrkov
2012-07-25 18:13                     ` Roland McGrath
2012-07-25 19:04                       ` Chris Metcalf
2012-07-25 20:22                         ` Roland McGrath
2012-07-25 20:29                           ` Chris Metcalf
2012-07-25 20:43                             ` Roland McGrath
2012-08-15  3:17                       ` Maxim Kuvyrkov
2012-08-15 16:27                         ` Roland McGrath
2012-08-15 16:39                           ` Maxim Kuvyrkov
2012-08-15 16:44                             ` Roland McGrath
2012-08-15 16:53                               ` Maxim Kuvyrkov
2012-08-15 16:56                                 ` Roland McGrath
2012-08-15 17:05                                   ` Maxim Kuvyrkov
2012-08-15 17:05                             ` Jeff Law
2012-08-15 17:11                               ` Roland McGrath
2012-08-15 17:24                                 ` Jeff Law
2012-08-15 16:40                         ` Joseph S. Myers
2012-08-15 16:43                         ` Carlos O'Donell
2012-08-15 22:00                         ` Andreas Schwab
2012-08-16 10:21                         ` Torvald Riegel
2012-08-16 12:40                           ` Chris Metcalf
2012-08-22 23:16                           ` Maxim Kuvyrkov
2012-07-11  8:26       ` Maxim Kuvyrkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).