* [PATCH][BZ #19490] Add unwind descriptors to pthread_spin_init, etc. on i386
@ 2016-01-24 0:18 Paul Pluzhnikov
2016-01-25 13:06 ` Torvald Riegel
0 siblings, 1 reply; 7+ messages in thread
From: Paul Pluzhnikov @ 2016-01-24 0:18 UTC (permalink / raw)
To: GLIBC Devel
[-- Attachment #1: Type: text/plain, Size: 797 bytes --]
Greetings,
This is similar to
https://www.sourceware.org/ml/libc-alpha/2016-01/msg00605.html
https://www.sourceware.org/ml/libc-alpha/2016-01/msg00456.html
but for i386.
Tested on Linux/i686.
Assuming this is OK, should I wait for lifting of the freeze?
2016-01-23 Paul Pluzhnikov <ppluzhnikov@google.com>
[BZ #19490]
* sysdeps/i386/i386-mcount.S (_mcount): Add unwind descriptor
(__fentry__): Likewise
* sysdeps/unix/sysv/linux/i386/_exit.S (_exit): Use ENTRY/END
* sysdeps/i386/nptl/pthread_spin_lock.S (pthread_spin_lock):
Likewise
* sysdeps/i386/pthread_spin_trylock.S (pthread_spin_trylock):
Likewise
* sysdeps/i386/nptl/pthread_spin_unlock.S (pthread_spin_unlock):
Likewise
--
Paul Pluzhnikov
[-- Attachment #2: glibc-bz19490-i686-20160123.txt --]
[-- Type: text/plain, Size: 4399 bytes --]
diff --git a/sysdeps/i386/i386-mcount.S b/sysdeps/i386/i386-mcount.S
index 94fb95e..f92f3ff 100644
--- a/sysdeps/i386/i386-mcount.S
+++ b/sysdeps/i386/i386-mcount.S
@@ -30,10 +30,17 @@
.type C_SYMBOL_NAME(_mcount), @function
.align ALIGNARG(4)
C_LABEL(_mcount)
+ cfi_startproc
/* Save the caller-clobbered registers. */
pushl %eax
+ cfi_adjust_cfa_offset (4)
pushl %ecx
+ cfi_adjust_cfa_offset (4)
pushl %edx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (eax, 8)
+ cfi_rel_offset (ecx, 4)
+ cfi_rel_offset (edx, 0)
movl 12(%esp), %edx
movl 4(%ebp), %eax
@@ -45,9 +52,16 @@ C_LABEL(_mcount)
/* Pop the saved registers. Please note that `mcount' has no
return value. */
popl %edx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edx)
popl %ecx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ecx)
popl %eax
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (eax)
ret
+ cfi_endproc
ASM_SIZE_DIRECTIVE(C_SYMBOL_NAME(_mcount))
#undef mcount
@@ -58,10 +72,17 @@ weak_alias (_mcount, mcount)
.type C_SYMBOL_NAME(__fentry__), @function
.align ALIGNARG(4)
C_LABEL(__fentry__)
+ cfi_startproc
/* Save the caller-clobbered registers. */
pushl %eax
+ cfi_adjust_cfa_offset (4)
pushl %ecx
+ cfi_adjust_cfa_offset (4)
pushl %edx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (eax, 8)
+ cfi_rel_offset (ecx, 4)
+ cfi_rel_offset (edx, 0)
movl 12(%esp), %edx
movl 16(%esp), %eax
@@ -73,7 +94,14 @@ C_LABEL(__fentry__)
/* Pop the saved registers. Please note that `__fentry__' has no
return value. */
popl %edx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (edx)
popl %ecx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ecx)
popl %eax
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (eax)
ret
+ cfi_endproc
ASM_SIZE_DIRECTIVE(C_SYMBOL_NAME(__fentry__))
diff --git a/sysdeps/i386/nptl/pthread_spin_lock.S b/sysdeps/i386/nptl/pthread_spin_lock.S
index e311d95..ea552da 100644
--- a/sysdeps/i386/nptl/pthread_spin_lock.S
+++ b/sysdeps/i386/nptl/pthread_spin_lock.S
@@ -16,11 +16,9 @@
<http://www.gnu.org/licenses/>. */
#include <lowlevellock.h>
+#include <sysdep.h>
- .globl pthread_spin_lock
- .type pthread_spin_lock,@function
- .align 16
-pthread_spin_lock:
+ENTRY(pthread_spin_lock)
mov 4(%esp), %eax
1: LOCK
decl 0(%eax)
@@ -34,4 +32,4 @@ pthread_spin_lock:
cmpl $0, 0(%eax)
jg 1b
jmp 2b
- .size pthread_spin_lock,.-pthread_spin_lock
+END(pthread_spin_lock)
diff --git a/sysdeps/i386/nptl/pthread_spin_unlock.S b/sysdeps/i386/nptl/pthread_spin_unlock.S
index a552cf9..ef75a50 100644
--- a/sysdeps/i386/nptl/pthread_spin_unlock.S
+++ b/sysdeps/i386/nptl/pthread_spin_unlock.S
@@ -16,15 +16,14 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
- .globl pthread_spin_unlock
- .type pthread_spin_unlock,@function
- .align 16
-pthread_spin_unlock:
+#include <sysdep.h>
+
+ENTRY(pthread_spin_unlock)
movl 4(%esp), %eax
movl $1, (%eax)
xorl %eax, %eax
ret
- .size pthread_spin_unlock,.-pthread_spin_unlock
+END(pthread_spin_unlock)
/* The implementation of pthread_spin_init is identical. */
.globl pthread_spin_init
diff --git a/sysdeps/i386/pthread_spin_trylock.S b/sysdeps/i386/pthread_spin_trylock.S
index 36979bd..7d3dabb 100644
--- a/sysdeps/i386/pthread_spin_trylock.S
+++ b/sysdeps/i386/pthread_spin_trylock.S
@@ -17,7 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <pthread-errnos.h>
-
+#include <sysdep.h>
#ifdef UP
# define LOCK
@@ -25,10 +25,7 @@
# define LOCK lock
#endif
- .globl pthread_spin_trylock
- .type pthread_spin_trylock,@function
- .align 16
-pthread_spin_trylock:
+ENTRY(pthread_spin_trylock)
movl 4(%esp), %edx
movl $1, %eax
xorl %ecx, %ecx
@@ -43,4 +40,4 @@ pthread_spin_trylock:
0:
#endif
ret
- .size pthread_spin_trylock,.-pthread_spin_trylock
+END(pthread_spin_trylock)
diff --git a/sysdeps/unix/sysv/linux/i386/_exit.S b/sysdeps/unix/sysv/linux/i386/_exit.S
index e1550e6..6193ff2 100644
--- a/sysdeps/unix/sysv/linux/i386/_exit.S
+++ b/sysdeps/unix/sysv/linux/i386/_exit.S
@@ -17,10 +17,7 @@
#include <sysdep.h>
- .text
- .type _exit,@function
- .global _exit
-_exit:
+ENTRY(_exit)
movl 4(%esp), %ebx
/* Try the new syscall first. */
@@ -37,7 +34,7 @@ _exit:
/* This must not fail. Be sure we don't return. */
hlt
- .size _exit,.-_exit
+END(_exit)
libc_hidden_def (_exit)
rtld_hidden_def (_exit)
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH][BZ #19490] Add unwind descriptors to pthread_spin_init, etc. on i386
2016-01-24 0:18 [PATCH][BZ #19490] Add unwind descriptors to pthread_spin_init, etc. on i386 Paul Pluzhnikov
@ 2016-01-25 13:06 ` Torvald Riegel
2016-01-31 23:10 ` Paul Pluzhnikov
0 siblings, 1 reply; 7+ messages in thread
From: Torvald Riegel @ 2016-01-25 13:06 UTC (permalink / raw)
To: Paul Pluzhnikov; +Cc: GLIBC Devel
On Sat, 2016-01-23 at 16:18 -0800, Paul Pluzhnikov wrote:
> 2016-01-23 Paul Pluzhnikov <ppluzhnikov@google.com>
>
> [BZ #19490]
> * sysdeps/i386/i386-mcount.S (_mcount): Add unwind descriptor
> (__fentry__): Likewise
> * sysdeps/unix/sysv/linux/i386/_exit.S (_exit): Use ENTRY/END
> * sysdeps/i386/nptl/pthread_spin_lock.S (pthread_spin_lock):
> Likewise
> * sysdeps/i386/pthread_spin_trylock.S (pthread_spin_trylock):
> Likewise
> * sysdeps/i386/nptl/pthread_spin_unlock.S (pthread_spin_unlock):
> Likewise
For the spinlocks, I'd really prefer if we could just remove the asm
files. The generic implementation should basically produce the same
code; if not, we should try to fix that instead of keeping the asm
files.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH][BZ #19490] Add unwind descriptors to pthread_spin_init, etc. on i386
2016-01-25 13:06 ` Torvald Riegel
@ 2016-01-31 23:10 ` Paul Pluzhnikov
2016-02-01 11:37 ` Torvald Riegel
0 siblings, 1 reply; 7+ messages in thread
From: Paul Pluzhnikov @ 2016-01-31 23:10 UTC (permalink / raw)
To: Torvald Riegel; +Cc: GLIBC Devel
On Mon, Jan 25, 2016 at 5:06 AM, Torvald Riegel <triegel@redhat.com> wrote:
> For the spinlocks, I'd really prefer if we could just remove the asm
> files. The generic implementation should basically produce the same
> code; if not, we should try to fix that instead of keeping the asm
> files.
Using gcc-4.8.4 (4.8.4-2ubuntu1~14.04):
$ objdump -d nptl/pthread_spin_unlock.o
nptl/pthread_spin_unlock.o: file format elf32-i386
Disassembly of section .text:
00000000 <pthread_spin_unlock>:
0: f0 83 0c 24 00 lock orl $0x0,(%esp)
5: 8b 44 24 04 mov 0x4(%esp),%eax
9: c7 00 00 00 00 00 movl $0x0,(%eax)
f: 31 c0 xor %eax,%eax
11: c3 ret
This isn't quite the same as sysdeps/i386/nptl/pthread_spin_unlock.S
For pthread_spin_lock it's much worse:
$ objdump -d nptl/pthread_spin_lock.o
nptl/pthread_spin_lock.o: file format elf32-i386
Disassembly of section .text:
00000000 <pthread_spin_lock>:
0: 57 push %edi
1: b8 01 00 00 00 mov $0x1,%eax
6: 56 push %esi
7: 53 push %ebx
8: 83 ec 10 sub $0x10,%esp
b: 8b 5c 24 20 mov 0x20(%esp),%ebx
f: 87 03 xchg %eax,(%ebx)
11: 89 44 24 0c mov %eax,0xc(%esp)
15: 8b 44 24 0c mov 0xc(%esp),%eax
19: 31 ff xor %edi,%edi
1b: be 01 00 00 00 mov $0x1,%esi
20: 85 c0 test %eax,%eax
22: 74 29 je 4d <pthread_spin_lock+0x4d>
24: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi
28: 8b 03 mov (%ebx),%eax
2a: 85 c0 test %eax,%eax
2c: 74 15 je 43 <pthread_spin_lock+0x43>
2e: ba e8 03 00 00 mov $0x3e8,%edx
33: eb 08 jmp 3d <pthread_spin_lock+0x3d>
35: 8d 76 00 lea 0x0(%esi),%esi
38: 83 ea 01 sub $0x1,%edx
3b: 74 06 je 43 <pthread_spin_lock+0x43>
3d: 8b 0b mov (%ebx),%ecx
3f: 85 c9 test %ecx,%ecx
41: 75 f5 jne 38 <pthread_spin_lock+0x38>
43: 89 f8 mov %edi,%eax
45: f0 0f b1 33 lock cmpxchg %esi,(%ebx)
49: 85 c0 test %eax,%eax
4b: 75 db jne 28 <pthread_spin_lock+0x28>
4d: 83 c4 10 add $0x10,%esp
50: 31 c0 xor %eax,%eax
52: 5b pop %ebx
53: 5e pop %esi
54: 5f pop %edi
55: c3 ret
So I am not sure we should be throwing ASM out just yet.
--
Paul Pluzhnikov
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH][BZ #19490] Add unwind descriptors to pthread_spin_init, etc. on i386
2016-01-31 23:10 ` Paul Pluzhnikov
@ 2016-02-01 11:37 ` Torvald Riegel
2016-02-01 12:04 ` Szabolcs Nagy
0 siblings, 1 reply; 7+ messages in thread
From: Torvald Riegel @ 2016-02-01 11:37 UTC (permalink / raw)
To: Paul Pluzhnikov; +Cc: GLIBC Devel
[-- Attachment #1: Type: text/plain, Size: 6191 bytes --]
On Sun, 2016-01-31 at 15:09 -0800, Paul Pluzhnikov wrote:
> On Mon, Jan 25, 2016 at 5:06 AM, Torvald Riegel <triegel@redhat.com> wrote:
>
> > For the spinlocks, I'd really prefer if we could just remove the asm
> > files. The generic implementation should basically produce the same
> > code; if not, we should try to fix that instead of keeping the asm
> > files.
>
> Using gcc-4.8.4 (4.8.4-2ubuntu1~14.04):
>
> $ objdump -d nptl/pthread_spin_unlock.o
>
> nptl/pthread_spin_unlock.o: file format elf32-i386
>
>
> Disassembly of section .text:
>
> 00000000 <pthread_spin_unlock>:
> 0: f0 83 0c 24 00 lock orl $0x0,(%esp)
> 5: 8b 44 24 04 mov 0x4(%esp),%eax
> 9: c7 00 00 00 00 00 movl $0x0,(%eax)
> f: 31 c0 xor %eax,%eax
> 11: c3 ret
>
> This isn't quite the same as sysdeps/i386/nptl/pthread_spin_unlock.S
This is because nptl/pthread_spin_unlock.c still issues a full barrier.
If this is changed to an atomic_store_release, one gets this on x86_64:
0000000000000000 <pthread_spin_unlock>:
0: c7 07 00 00 00 00 movl $0x0,(%rdi)
6: 31 c0 xor %eax,%eax
8: c3
Perhaps now is a good time to finally get this done. Most archs are
already using acquire semantics, IIRC. I think aarch64 and arm are the
only major ones that happen to use the current generic unlock with full
barrier -- but they only use acquire MO on unlock, so there's really no
consistent pattern that would justify this.
Note that there's an ongoing debate about whether POSIX requires
pthread_spin_unlock to be a full barrier, whether it should or should
not do that, and whether that makes any difference for all "sane"
programs. But given that we never implemented full barriers on almost
all of the major archs and nobody complained about it, I think we should
continue to not slow down spinlocks just to make weird use cases work
(and the ones that are indeed correct under POSIX are pretty complex
pieces of code).
> For pthread_spin_lock it's much worse:
>
> $ objdump -d nptl/pthread_spin_lock.o
>
> nptl/pthread_spin_lock.o: file format elf32-i386
>
>
> Disassembly of section .text:
>
> 00000000 <pthread_spin_lock>:
> 0: 57 push %edi
> 1: b8 01 00 00 00 mov $0x1,%eax
> 6: 56 push %esi
> 7: 53 push %ebx
> 8: 83 ec 10 sub $0x10,%esp
> b: 8b 5c 24 20 mov 0x20(%esp),%ebx
> f: 87 03 xchg %eax,(%ebx)
> 11: 89 44 24 0c mov %eax,0xc(%esp)
> 15: 8b 44 24 0c mov 0xc(%esp),%eax
> 19: 31 ff xor %edi,%edi
> 1b: be 01 00 00 00 mov $0x1,%esi
> 20: 85 c0 test %eax,%eax
> 22: 74 29 je 4d <pthread_spin_lock+0x4d>
> 24: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi
> 28: 8b 03 mov (%ebx),%eax
> 2a: 85 c0 test %eax,%eax
> 2c: 74 15 je 43 <pthread_spin_lock+0x43>
> 2e: ba e8 03 00 00 mov $0x3e8,%edx
> 33: eb 08 jmp 3d <pthread_spin_lock+0x3d>
> 35: 8d 76 00 lea 0x0(%esi),%esi
> 38: 83 ea 01 sub $0x1,%edx
> 3b: 74 06 je 43 <pthread_spin_lock+0x43>
> 3d: 8b 0b mov (%ebx),%ecx
> 3f: 85 c9 test %ecx,%ecx
> 41: 75 f5 jne 38 <pthread_spin_lock+0x38>
> 43: 89 f8 mov %edi,%eax
> 45: f0 0f b1 33 lock cmpxchg %esi,(%ebx)
> 49: 85 c0 test %eax,%eax
> 4b: 75 db jne 28 <pthread_spin_lock+0x28>
> 4d: 83 c4 10 add $0x10,%esp
> 50: 31 c0 xor %eax,%eax
> 52: 5b pop %ebx
> 53: 5e pop %esi
> 54: 5f pop %edi
> 55: c3 ret
I wouldn't say it's worse. It's mostly different, and the uncontended
path may be a little worse. In the generic version, we added spinning.
This isn't really well-tuned yet, but it's something we want to do
eventually. If we assume uncontended, the initial xchg should be fast;
maybe we need to add a glibc_likely here or such, to make GCC do what we
expect; outlining the contended path (ie, the spinning and cmpxchg)
could also help work around GCC codegen deficiencies.
However, on x86_64 I get the following (adding __glibc_likely to the
atomic_exchange_acq only moves the return up):
0000000000000000 <pthread_spin_lock>:
0: b8 01 00 00 00 mov $0x1,%eax
5: 87 07 xchg %eax,(%rdi)
7: 89 44 24 fc mov %eax,-0x4(%rsp)
b: 8b 44 24 fc mov -0x4(%rsp),%eax
f: 85 c0 test %eax,%eax
11: 75 03 jne 16 <pthread_spin_lock+0x16>
13: 31 c0 xor %eax,%eax
15: c3 retq
16: 45 31 c0 xor %r8d,%r8d
19: be 01 00 00 00 mov $0x1,%esi
1e: 8b 17 mov (%rdi),%edx
20: 85 d2 test %edx,%edx
22: 74 17 je 3b <pthread_spin_lock+0x3b>
24: ba e8 03 00 00 mov $0x3e8,%edx
29: eb 0a jmp 35 <pthread_spin_lock+0x35>
2b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
30: 83 ea 01 sub $0x1,%edx
33: 74 06 je 3b <pthread_spin_lock+0x3b>
35: 8b 0f mov (%rdi),%ecx
37: 85 c9 test %ecx,%ecx
39: 75 f5 jne 30 <pthread_spin_lock+0x30>
3b: 44 89 c0 mov %r8d,%eax
3e: f0 0f b1 37 lock cmpxchg %esi,(%rdi)
42: 85 c0 test %eax,%eax
44: 75 d8 jne 1e <pthread_spin_lock+0x1e>
46: eb cb jmp 13 <pthread_spin_lock+0x13>
The fastpath of this doesn't look bad to me (except at 7: and b:, for
which I don't see a reason).
See attached untested patch for what I played with.
[-- Attachment #2: spinlock.patch --]
[-- Type: text/x-patch, Size: 5282 bytes --]
commit f9a5437b0c0150bac4c5afd769dd6eba09fed1de
Author: Torvald Riegel <triegel@redhat.com>
Date: Mon Feb 1 12:35:50 2016 +0100
generic spinlock cleanup and x86_64 customization removal.
diff --git a/nptl/pthread_spin_lock.c b/nptl/pthread_spin_lock.c
index fb9bcc1..2209341 100644
--- a/nptl/pthread_spin_lock.c
+++ b/nptl/pthread_spin_lock.c
@@ -38,7 +38,7 @@ pthread_spin_lock (pthread_spinlock_t *lock)
We assume that the first try mostly will be successful, and we use
atomic_exchange. For the subsequent tries we use
atomic_compare_and_exchange. */
- if (atomic_exchange_acq (lock, 1) == 0)
+ if (__glibc_likely (atomic_exchange_acq (lock, 1) == 0))
return 0;
do
diff --git a/nptl/pthread_spin_unlock.c b/nptl/pthread_spin_unlock.c
index d4b63ac..ca534a0 100644
--- a/nptl/pthread_spin_unlock.c
+++ b/nptl/pthread_spin_unlock.c
@@ -23,7 +23,6 @@
int
pthread_spin_unlock (pthread_spinlock_t *lock)
{
- atomic_full_barrier ();
- *lock = 0;
+ atomic_store_release (lock, 0);
return 0;
}
diff --git a/sysdeps/x86_64/nptl/pthread_spin_init.c b/sysdeps/x86_64/nptl/pthread_spin_init.c
deleted file mode 100644
index f249c6f..0000000
--- a/sysdeps/x86_64/nptl/pthread_spin_init.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/i386/nptl/pthread_spin_init.c>
diff --git a/sysdeps/x86_64/nptl/pthread_spin_lock.S b/sysdeps/x86_64/nptl/pthread_spin_lock.S
deleted file mode 100644
index b871241..0000000
--- a/sysdeps/x86_64/nptl/pthread_spin_lock.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <lowlevellock.h>
-#include <sysdep.h>
-
-ENTRY(pthread_spin_lock)
-1: LOCK
- decl 0(%rdi)
- jne 2f
- xor %eax, %eax
- ret
-
- .align 16
-2: rep
- nop
- cmpl $0, 0(%rdi)
- jg 1b
- jmp 2b
-END(pthread_spin_lock)
diff --git a/sysdeps/x86_64/nptl/pthread_spin_trylock.S b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
deleted file mode 100644
index c9c5317..0000000
--- a/sysdeps/x86_64/nptl/pthread_spin_trylock.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <pthread-errnos.h>
-#include <sysdep.h>
-
-
-#ifdef UP
-# define LOCK
-#else
-# define LOCK lock
-#endif
-
-ENTRY(pthread_spin_trylock)
- movl $1, %eax
- xorl %ecx, %ecx
- LOCK
- cmpxchgl %ecx, (%rdi)
- movl $EBUSY, %eax
- cmovel %ecx, %eax
- retq
-END(pthread_spin_trylock)
diff --git a/sysdeps/x86_64/nptl/pthread_spin_unlock.S b/sysdeps/x86_64/nptl/pthread_spin_unlock.S
deleted file mode 100644
index 188de2e..0000000
--- a/sysdeps/x86_64/nptl/pthread_spin_unlock.S
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-ENTRY(pthread_spin_unlock)
- movl $1, (%rdi)
- xorl %eax, %eax
- retq
-END(pthread_spin_unlock)
-
- /* The implementation of pthread_spin_init is identical. */
- .globl pthread_spin_init
-pthread_spin_init = pthread_spin_unlock
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH][BZ #19490] Add unwind descriptors to pthread_spin_init, etc. on i386
2016-02-01 11:37 ` Torvald Riegel
@ 2016-02-01 12:04 ` Szabolcs Nagy
2016-02-01 12:26 ` Szabolcs Nagy
2016-02-01 12:32 ` Torvald Riegel
0 siblings, 2 replies; 7+ messages in thread
From: Szabolcs Nagy @ 2016-02-01 12:04 UTC (permalink / raw)
To: Torvald Riegel, Paul Pluzhnikov; +Cc: GLIBC Devel, nd
On 01/02/16 11:36, Torvald Riegel wrote:
> On Sun, 2016-01-31 at 15:09 -0800, Paul Pluzhnikov wrote:
>> On Mon, Jan 25, 2016 at 5:06 AM, Torvald Riegel <triegel@redhat.com> wrote:
>>
>>> For the spinlocks, I'd really prefer if we could just remove the asm
>>> files. The generic implementation should basically produce the same
>>> code; if not, we should try to fix that instead of keeping the asm
>>> files.
>>
>> Using gcc-4.8.4 (4.8.4-2ubuntu1~14.04):
>>
>> $ objdump -d nptl/pthread_spin_unlock.o
>>
>> nptl/pthread_spin_unlock.o: file format elf32-i386
>>
>>
>> Disassembly of section .text:
>>
>> 00000000 <pthread_spin_unlock>:
>> 0: f0 83 0c 24 00 lock orl $0x0,(%esp)
>> 5: 8b 44 24 04 mov 0x4(%esp),%eax
>> 9: c7 00 00 00 00 00 movl $0x0,(%eax)
>> f: 31 c0 xor %eax,%eax
>> 11: c3 ret
>>
>> This isn't quite the same as sysdeps/i386/nptl/pthread_spin_unlock.S
>
> This is because nptl/pthread_spin_unlock.c still issues a full barrier.
> If this is changed to an atomic_store_release, one gets this on x86_64:
>
> 0000000000000000 <pthread_spin_unlock>:
> 0: c7 07 00 00 00 00 movl $0x0,(%rdi)
> 6: 31 c0 xor %eax,%eax
> 8: c3
>
> Perhaps now is a good time to finally get this done. Most archs are
> already using acquire semantics, IIRC. I think aarch64 and arm are the
> only major ones that happen to use the current generic unlock with full
> barrier -- but they only use acquire MO on unlock, so there's really no
> consistent pattern that would justify this.
i think mb(); store(); is actually *weaker* than store_release();
and thus on some hw it might be a bit faster, but i'm not against
changing to store_release (that's one step closer to posix semantics).
(full barrier is weaker here because store_release() has to
prevent reordering wrt load_acquire in *both* directions, so
it may be implemented by the hw like mb(); store(); mb();
although that's not the most efficient implementation..)
> Note that there's an ongoing debate about whether POSIX requires
> pthread_spin_unlock to be a full barrier, whether it should or should
the current unlock is not enough for posix if trylock is
acquire MO:
T1:
unlock(l1);
if (trylock(l2))...
T2:
unlock(l2);
if (trylock(l1))...
with one sided barrier, both trylock can fail to grab
the lock (the loads are not guaranteed to happen after
the stores) which is not seq cst, this does not happen
with release MO unlock.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH][BZ #19490] Add unwind descriptors to pthread_spin_init, etc. on i386
2016-02-01 12:04 ` Szabolcs Nagy
@ 2016-02-01 12:26 ` Szabolcs Nagy
2016-02-01 12:32 ` Torvald Riegel
1 sibling, 0 replies; 7+ messages in thread
From: Szabolcs Nagy @ 2016-02-01 12:26 UTC (permalink / raw)
To: Torvald Riegel, Paul Pluzhnikov; +Cc: GLIBC Devel, nd
On 01/02/16 12:03, Szabolcs Nagy wrote:
> the current unlock is not enough for posix if trylock is
> acquire MO:
>
> T1:
> unlock(l1);
> if (trylock(l2))...
>
> T2:
> unlock(l2);
> if (trylock(l1))...
>
> with one sided barrier, both trylock can fail to grab
> the lock (the loads are not guaranteed to happen after
> the stores) which is not seq cst, this does not happen
> with release MO unlock.
>
sorry, acquire/release MO is not enough to fix this
in c11, on the hw level i believe it is enough with
arm memory model.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH][BZ #19490] Add unwind descriptors to pthread_spin_init, etc. on i386
2016-02-01 12:04 ` Szabolcs Nagy
2016-02-01 12:26 ` Szabolcs Nagy
@ 2016-02-01 12:32 ` Torvald Riegel
1 sibling, 0 replies; 7+ messages in thread
From: Torvald Riegel @ 2016-02-01 12:32 UTC (permalink / raw)
To: Szabolcs Nagy; +Cc: Paul Pluzhnikov, GLIBC Devel, nd
On Mon, 2016-02-01 at 12:03 +0000, Szabolcs Nagy wrote:
> On 01/02/16 11:36, Torvald Riegel wrote:
> > On Sun, 2016-01-31 at 15:09 -0800, Paul Pluzhnikov wrote:
> >> On Mon, Jan 25, 2016 at 5:06 AM, Torvald Riegel <triegel@redhat.com> wrote:
> >>
> >>> For the spinlocks, I'd really prefer if we could just remove the asm
> >>> files. The generic implementation should basically produce the same
> >>> code; if not, we should try to fix that instead of keeping the asm
> >>> files.
> >>
> >> Using gcc-4.8.4 (4.8.4-2ubuntu1~14.04):
> >>
> >> $ objdump -d nptl/pthread_spin_unlock.o
> >>
> >> nptl/pthread_spin_unlock.o: file format elf32-i386
> >>
> >>
> >> Disassembly of section .text:
> >>
> >> 00000000 <pthread_spin_unlock>:
> >> 0: f0 83 0c 24 00 lock orl $0x0,(%esp)
> >> 5: 8b 44 24 04 mov 0x4(%esp),%eax
> >> 9: c7 00 00 00 00 00 movl $0x0,(%eax)
> >> f: 31 c0 xor %eax,%eax
> >> 11: c3 ret
> >>
> >> This isn't quite the same as sysdeps/i386/nptl/pthread_spin_unlock.S
> >
> > This is because nptl/pthread_spin_unlock.c still issues a full barrier.
> > If this is changed to an atomic_store_release, one gets this on x86_64:
> >
> > 0000000000000000 <pthread_spin_unlock>:
> > 0: c7 07 00 00 00 00 movl $0x0,(%rdi)
> > 6: 31 c0 xor %eax,%eax
> > 8: c3
> >
> > Perhaps now is a good time to finally get this done. Most archs are
> > already using acquire semantics, IIRC. I think aarch64 and arm are the
> > only major ones that happen to use the current generic unlock with full
> > barrier -- but they only use acquire MO on unlock, so there's really no
> > consistent pattern that would justify this.
>
> i think mb(); store(); is actually *weaker* than store_release();
If that's indeed the case in the context of the C11 memory model, this
is a bug. But I would be surprised if that's the case. It would also
be a bug if the atomic_full_barrier implementation we have currently is
actually not implementing a C11 seq_cst barrier.
Also cross-check against the mappings here, which I trust to be correct:
http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
> and thus on some hw it might be a bit faster, but i'm not against
> changing to store_release (that's one step closer to posix semantics).
In the context of the memory model used in glibc, store_release is
weaker than a atomic_full_barrier (which is supposed to be at least as
strong as a C11 seq_cst fence).
> (full barrier is weaker here because store_release() has to
> prevent reordering wrt load_acquire in *both* directions, so
> it may be implemented by the hw like mb(); store(); mb();
> although that's not the most efficient implementation..)
I'm not an expert on the ARM memory model, but I believe your assumption
that the semantics we require for atomic_store_release has to prevent
reordering in both directions on ARM is wrong. Even a compiler can
often move stuff from after to before a store_release; the release MO
guarantee is, simplified, something like "if something was before the
release MO on the release side, it will not appear on the observer's
side as if after the release, provided the observer used an acquire load
to observe the release store".
> > Note that there's an ongoing debate about whether POSIX requires
> > pthread_spin_unlock to be a full barrier, whether it should or should
>
> the current unlock is not enough for posix if trylock is
> acquire MO:
>
> T1:
> unlock(l1);
> if (trylock(l2))...
>
> T2:
> unlock(l2);
> if (trylock(l1))...
>
> with one sided barrier, both trylock can fail to grab
> the lock (the loads are not guaranteed to happen after
> the stores) which is not seq cst, this does not happen
> with release MO unlock.
No. If unlock is a release MO store, and trylock is an acquire load,
then both trylocks can fail and both trylock's can succeed. Your
example is similar to Dekker synchronization, and Dekker synchronization
is never guaranteed to produce a winner, and release/acquire are not
sufficient to implement it. I suggest using the cppmem tool to play
around with it and have a look at the possible executions.
If unlock is a seq_cst store and trylock is a seq_cst acquire, this
Dekker implementation would work except that POSIX doesn't guarantee
"synchronizes memory" for a call that fails (so the trylock isn't
sufficient, and you have to assume something like that it can fail
spuriously).
If unlock were an at-least-release MO fence followed by a relaxed MO
store to the lock followed by a seq_cst fence, this example would work.
But this shows, in turn, that (a) "synchronizes memory" can be costly to
implement and (b) POSIX shouldn't try to support hacks that emulate
proper atomics (ie, trylock in the example above).
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2016-02-01 12:32 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-24 0:18 [PATCH][BZ #19490] Add unwind descriptors to pthread_spin_init, etc. on i386 Paul Pluzhnikov
2016-01-25 13:06 ` Torvald Riegel
2016-01-31 23:10 ` Paul Pluzhnikov
2016-02-01 11:37 ` Torvald Riegel
2016-02-01 12:04 ` Szabolcs Nagy
2016-02-01 12:26 ` Szabolcs Nagy
2016-02-01 12:32 ` Torvald Riegel
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).