From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1895) id 342D03858D28; Fri, 24 Mar 2023 16:51:47 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 342D03858D28 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1679676707; bh=gbgkv9O/+JQurjZLZNaulzYLJn1DHVXq2xXQtjGEoJg=; h=From:To:Subject:Date:From; b=YEwfOR7mSUxQ49qA82jMAg1XZ2toXvYF/o2jZMvPe37VuYVC/HTANjRj1y9m77QeX EFj38QENHengkKinTVlFqlYA7qKWPWq0A6Hl+DlChZlj7Dmwln4xy72fevYpixE7bq ++o7rLHq+Yx2o9PJP01NAPTAnioIO7J7VqpwkLkQ= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Wilco Dijkstra To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-6855] libatomic: Fix SEQ_CST 128-bit atomic load [PR108891] X-Act-Checkin: gcc X-Git-Author: Wilco Dijkstra X-Git-Refname: refs/heads/master X-Git-Oldrev: 243fa4883cf6fccaaafddcc82e6b58843c82fb30 X-Git-Newrev: 1f641d6aba284e0c277e6684cd6b2c73591cd14d Message-Id: <20230324165147.342D03858D28@sourceware.org> Date: Fri, 24 Mar 2023 16:51:47 +0000 (GMT) List-Id: https://gcc.gnu.org/g:1f641d6aba284e0c277e6684cd6b2c73591cd14d commit r13-6855-g1f641d6aba284e0c277e6684cd6b2c73591cd14d Author: Wilco Dijkstra Date: Fri Feb 10 17:41:05 2023 +0000 libatomic: Fix SEQ_CST 128-bit atomic load [PR108891] The LSE2 ifunc for 16-byte atomic load requires a barrier before the LDP - without it, it effectively has Load-AcquirePC semantics similar to LDAPR, which is less restrictive than what __ATOMIC_SEQ_CST requires. This patch fixes this and adds comments to make it easier to see which sequence is used for each case. Use a load/store exclusive loop for store to simplify testing memory ordering is correct (it is slightly faster too). libatomic/ PR libgcc/108891 * config/linux/aarch64/atomic_16.S: Fix libat_load_16_i1. Add comments describing the memory order. Diff: --- libatomic/config/linux/aarch64/atomic_16.S | 189 ++++++++++++++++++----------- 1 file changed, 115 insertions(+), 74 deletions(-) diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S index 732c3534a06..05439ce394b 100644 --- a/libatomic/config/linux/aarch64/atomic_16.S +++ b/libatomic/config/linux/aarch64/atomic_16.S @@ -72,33 +72,38 @@ name: \ ENTRY (libat_load_16_i1) cbnz w1, 1f + + /* RELAXED. */ ldp res0, res1, [x0] ret 1: - cmp w1, ACQUIRE - b.hi 2f + cmp w1, SEQ_CST + b.eq 2f + + /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */ ldp res0, res1, [x0] dmb ishld ret -2: + + /* SEQ_CST. */ +2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */ ldp res0, res1, [x0] - dmb ish + dmb ishld ret END (libat_load_16_i1) ENTRY (libat_store_16_i1) cbnz w4, 1f + + /* RELAXED. */ stp in0, in1, [x0] ret -1: - dmb ish - stp in0, in1, [x0] - cmp w4, SEQ_CST - beq 2f - ret -2: - dmb ish + + /* RELEASE/SEQ_CST. */ +1: ldaxp xzr, tmp0, [x0] + stlxp w4, in0, in1, [x0] + cbnz w4, 1b ret END (libat_store_16_i1) @@ -106,29 +111,33 @@ END (libat_store_16_i1) ENTRY (libat_exchange_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] stxp w4, in0, in1, [x5] cbnz w4, 1b ret 2: cmp w4, ACQUIRE b.hi 4f -3: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME. */ +3: ldaxp res0, res1, [x5] stxp w4, in0, in1, [x5] cbnz w4, 3b ret 4: cmp w4, RELEASE b.ne 6f -5: - ldxp res0, res1, [x5] + + /* RELEASE. */ +5: ldxp res0, res1, [x5] stlxp w4, in0, in1, [x5] cbnz w4, 5b ret -6: - ldaxp res0, res1, [x5] + + /* ACQ_REL/SEQ_CST. */ +6: ldaxp res0, res1, [x5] stlxp w4, in0, in1, [x5] cbnz w4, 6b ret @@ -142,6 +151,8 @@ ENTRY (libat_compare_exchange_16_i1) cbz w4, 2f cmp w4, RELEASE b.hs 3f + + /* ACQUIRE/CONSUME. */ caspa exp0, exp1, in0, in1, [x0] 0: cmp exp0, tmp0 @@ -153,15 +164,18 @@ ENTRY (libat_compare_exchange_16_i1) stp exp0, exp1, [x1] mov x0, 0 ret -2: - casp exp0, exp1, in0, in1, [x0] + + /* RELAXED. */ +2: casp exp0, exp1, in0, in1, [x0] b 0b -3: - b.hi 4f + + /* RELEASE. */ +3: b.hi 4f caspl exp0, exp1, in0, in1, [x0] b 0b -4: - caspal exp0, exp1, in0, in1, [x0] + + /* ACQ_REL/SEQ_CST. */ +4: caspal exp0, exp1, in0, in1, [x0] b 0b END (libat_compare_exchange_16_i1) @@ -169,15 +183,17 @@ END (libat_compare_exchange_16_i1) ENTRY (libat_fetch_add_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] adds tmplo, reslo, inlo adc tmphi, reshi, inhi stxp w4, tmp0, tmp1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] adds tmplo, reslo, inlo adc tmphi, reshi, inhi stlxp w4, tmp0, tmp1, [x5] @@ -189,15 +205,17 @@ END (libat_fetch_add_16_i1) ENTRY (libat_add_fetch_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] adds reslo, reslo, inlo adc reshi, reshi, inhi stxp w4, res0, res1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] adds reslo, reslo, inlo adc reshi, reshi, inhi stlxp w4, res0, res1, [x5] @@ -209,15 +227,17 @@ END (libat_add_fetch_16_i1) ENTRY (libat_fetch_sub_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] subs tmplo, reslo, inlo sbc tmphi, reshi, inhi stxp w4, tmp0, tmp1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] subs tmplo, reslo, inlo sbc tmphi, reshi, inhi stlxp w4, tmp0, tmp1, [x5] @@ -229,15 +249,17 @@ END (libat_fetch_sub_16_i1) ENTRY (libat_sub_fetch_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] subs reslo, reslo, inlo sbc reshi, reshi, inhi stxp w4, res0, res1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] subs reslo, reslo, inlo sbc reshi, reshi, inhi stlxp w4, res0, res1, [x5] @@ -249,15 +271,17 @@ END (libat_sub_fetch_16_i1) ENTRY (libat_fetch_or_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] orr tmp0, res0, in0 orr tmp1, res1, in1 stxp w4, tmp0, tmp1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] orr tmp0, res0, in0 orr tmp1, res1, in1 stlxp w4, tmp0, tmp1, [x5] @@ -269,15 +293,17 @@ END (libat_fetch_or_16_i1) ENTRY (libat_or_fetch_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] orr res0, res0, in0 orr res1, res1, in1 stxp w4, res0, res1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] orr res0, res0, in0 orr res1, res1, in1 stlxp w4, res0, res1, [x5] @@ -289,15 +315,17 @@ END (libat_or_fetch_16_i1) ENTRY (libat_fetch_and_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] and tmp0, res0, in0 and tmp1, res1, in1 stxp w4, tmp0, tmp1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] and tmp0, res0, in0 and tmp1, res1, in1 stlxp w4, tmp0, tmp1, [x5] @@ -309,15 +337,17 @@ END (libat_fetch_and_16_i1) ENTRY (libat_and_fetch_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] and res0, res0, in0 and res1, res1, in1 stxp w4, res0, res1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] and res0, res0, in0 and res1, res1, in1 stlxp w4, res0, res1, [x5] @@ -329,15 +359,17 @@ END (libat_and_fetch_16_i1) ENTRY (libat_fetch_xor_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] eor tmp0, res0, in0 eor tmp1, res1, in1 stxp w4, tmp0, tmp1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] eor tmp0, res0, in0 eor tmp1, res1, in1 stlxp w4, tmp0, tmp1, [x5] @@ -349,15 +381,17 @@ END (libat_fetch_xor_16_i1) ENTRY (libat_xor_fetch_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] eor res0, res0, in0 eor res1, res1, in1 stxp w4, res0, res1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] eor res0, res0, in0 eor res1, res1, in1 stlxp w4, res0, res1, [x5] @@ -371,15 +405,17 @@ ENTRY (libat_fetch_nand_16_i1) mvn in0, in0 mvn in1, in1 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] orn tmp0, in0, res0 orn tmp1, in1, res1 stxp w4, tmp0, tmp1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] orn tmp0, in0, res0 orn tmp1, in1, res1 stlxp w4, tmp0, tmp1, [x5] @@ -393,15 +429,17 @@ ENTRY (libat_nand_fetch_16_i1) mvn in0, in0 mvn in1, in1 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] orn res0, in0, res0 orn res1, in1, res1 stxp w4, res0, res1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] orn res0, in0, res0 orn res1, in1, res1 stlxp w4, res0, res1, [x5] @@ -413,9 +451,12 @@ END (libat_nand_fetch_16_i1) ENTRY (libat_test_and_set_16_i1) mov w2, 1 cbnz w1, 2f + + /* RELAXED. */ swpb w0, w2, [x0] ret + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ 2: swpalb w0, w2, [x0] ret END (libat_test_and_set_16_i1)