* [PATCH, AArch64, v3 1/6] aarch64: Extend %R for integer registers
2018-11-01 21:47 [PATCH, AArch64, v3 0/6] LSE atomics out-of-line Richard Henderson
@ 2018-11-01 21:46 ` Richard Henderson
2018-11-01 21:46 ` [PATCH, AArch64, v3 2/6] aarch64: Implement TImode compare-and-swap Richard Henderson
` (6 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-11-01 21:46 UTC (permalink / raw)
To: gcc-patches
Cc: ramana.radhakrishnan, agraf, marcus.shawcroft, james.greenhalgh,
Richard Henderson
From: Richard Henderson <rth@twiddle.net>
* config/aarch64/aarch64.c (aarch64_print_operand): Allow integer
registers with %R.
---
gcc/config/aarch64/aarch64.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b44ee40115d..930f27d9bac 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7102,7 +7102,7 @@ sizetochar (int size)
'S/T/U/V': Print a FP/SIMD register name for a register list.
The register printed is the FP/SIMD register name
of X + 0/1/2/3 for S/T/U/V.
- 'R': Print a scalar FP/SIMD register name + 1.
+ 'R': Print a scalar Integer/FP/SIMD register name + 1.
'X': Print bottom 16 bits of integer constant in hex.
'w/x': Print a general register name or the zero register
(32-bit or 64-bit).
@@ -7294,12 +7294,13 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'R':
- if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
- {
- output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
- return;
- }
- asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
+ if (REG_P (x) && FP_REGNUM_P (REGNO (x)))
+ asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
+ else if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
+ asm_fprintf (f, "x%d", REGNO (x) - R0_REGNUM + 1);
+ else
+ output_operand_lossage ("incompatible register operand for '%%%c'",
+ code);
break;
case 'X':
--
2.17.2
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH, AArch64, v3 2/6] aarch64: Implement TImode compare-and-swap
2018-11-01 21:47 [PATCH, AArch64, v3 0/6] LSE atomics out-of-line Richard Henderson
2018-11-01 21:46 ` [PATCH, AArch64, v3 1/6] aarch64: Extend %R for integer registers Richard Henderson
@ 2018-11-01 21:46 ` Richard Henderson
2018-11-01 21:47 ` [PATCH, AArch64, v3 6/6] Enable -matomic-ool by default Richard Henderson
` (5 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-11-01 21:46 UTC (permalink / raw)
To: gcc-patches
Cc: ramana.radhakrishnan, agraf, marcus.shawcroft, james.greenhalgh,
Richard Henderson
From: Richard Henderson <rth@twiddle.net>
This pattern will only be used with the __sync functions, because
we do not yet have a bare TImode atomic load.
* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Add support
for NE comparison of TImode values.
(aarch64_emit_load_exclusive): Add support for TImode.
(aarch64_emit_store_exclusive): Likewise.
(aarch64_split_compare_and_swap): Disable strong_zero_p for TImode.
* config/aarch64/atomics.md (@atomic_compare_and_swap<ALLI_TI>):
Change iterator from ALLI to ALLI_TI.
(@atomic_compare_and_swap<JUST_TI>): New.
(@atomic_compare_and_swap<JUST_TI>_lse): New.
(aarch64_load_exclusive_pair): New.
(aarch64_store_exclusive_pair): New.
* config/aarch64/iterators.md (JUST_TI): New.
---
gcc/config/aarch64/aarch64.c | 55 +++++++++++++++++---
gcc/config/aarch64/atomics.md | 91 +++++++++++++++++++++++++++++++--
gcc/config/aarch64/iterators.md | 3 ++
3 files changed, 137 insertions(+), 12 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 930f27d9bac..942f2037235 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1611,10 +1611,40 @@ emit_set_insn (rtx x, rtx y)
rtx
aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
{
- machine_mode mode = SELECT_CC_MODE (code, x, y);
- rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+ machine_mode cmp_mode;
+ machine_mode cc_mode;
+ rtx cc_reg;
- emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
+ if (swap_commutative_operands_p (x, y))
+ {
+ code = swap_condition (code);
+ std::swap (x, y);
+ }
+ cmp_mode = GET_MODE (x);
+
+ if (cmp_mode == TImode)
+ {
+ gcc_assert (code == NE);
+
+ cc_mode = CCmode;
+ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+
+ rtx x_lo = operand_subword (x, 0, 0, TImode);
+ rtx y_lo = operand_subword (y, 0, 0, TImode);
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
+
+ rtx x_hi = operand_subword (x, 1, 0, TImode);
+ rtx y_hi = operand_subword (y, 1, 0, TImode);
+ emit_insn (gen_ccmpdi (cc_reg, cc_reg, x_hi, y_hi,
+ gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
+ GEN_INT (AARCH64_EQ)));
+ }
+ else
+ {
+ cc_mode = SELECT_CC_MODE (code, x, y);
+ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
+ }
return cc_reg;
}
@@ -2162,7 +2192,6 @@ aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
gcc_assert (r != NULL);
return rtx_equal_p (x, r);
}
-
static rtx
aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
@@ -14619,16 +14648,26 @@ static void
aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
rtx mem, rtx model_rtx)
{
- emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
+ if (mode == TImode)
+ emit_insn (gen_aarch64_load_exclusive_pair (gen_lowpart (DImode, rval),
+ gen_highpart (DImode, rval),
+ mem, model_rtx));
+ else
+ emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
}
/* Emit store exclusive. */
static void
aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
- rtx rval, rtx mem, rtx model_rtx)
+ rtx mem, rtx rval, rtx model_rtx)
{
- emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx));
+ if (mode == TImode)
+ emit_insn (gen_aarch64_store_exclusive_pair
+ (bval, mem, operand_subword (rval, 0, 0, TImode),
+ operand_subword (rval, 1, 0, TImode), model_rtx));
+ else
+ emit_insn (gen_aarch64_store_exclusive (mode, bval, mem, rval, model_rtx));
}
/* Mark the previous jump instruction as unlikely. */
@@ -14755,7 +14794,7 @@ aarch64_split_compare_and_swap (rtx operands[])
CBNZ scratch, .label1
.label2:
CMP rval, 0. */
- bool strong_zero_p = !is_weak && oldval == const0_rtx;
+ bool strong_zero_p = !is_weak && oldval == const0_rtx && mode != TImode;
label1 = NULL;
if (!is_weak)
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 00f7af4e4ac..08a3a1ff955 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -22,10 +22,10 @@
(define_expand "@atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "register_operand" "") ;; bool out
- (match_operand:ALLI 1 "register_operand" "") ;; val out
- (match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory
- (match_operand:ALLI 3 "nonmemory_operand" "") ;; expected
- (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired
+ (match_operand:ALLI_TI 1 "register_operand" "") ;; val out
+ (match_operand:ALLI_TI 2 "aarch64_sync_memory_operand" "") ;; memory
+ (match_operand:ALLI_TI 3 "nonmemory_operand" "") ;; expected
+ (match_operand:ALLI_TI 4 "aarch64_reg_or_zero" "") ;; desired
(match_operand:SI 5 "const_int_operand") ;; is_weak
(match_operand:SI 6 "const_int_operand") ;; mod_s
(match_operand:SI 7 "const_int_operand")] ;; mod_f
@@ -88,6 +88,30 @@
}
)
+(define_insn_and_split "@aarch64_compare_and_swap<mode>"
+ [(set (reg:CC CC_REGNUM) ;; bool out
+ (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+ (set (match_operand:JUST_TI 0 "register_operand" "=&r") ;; val out
+ (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+ (set (match_dup 1)
+ (unspec_volatile:JUST_TI
+ [(match_operand:JUST_TI 2 "aarch64_reg_or_zero" "rZ") ;; expect
+ (match_operand:JUST_TI 3 "aarch64_reg_or_zero" "rZ") ;; desired
+ (match_operand:SI 4 "const_int_operand") ;; is_weak
+ (match_operand:SI 5 "const_int_operand") ;; mod_s
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
+ UNSPECV_ATOMIC_CMPSW))
+ (clobber (match_scratch:SI 7 "=&r"))]
+ ""
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ aarch64_split_compare_and_swap (operands);
+ DONE;
+ }
+)
+
(define_insn "@aarch64_compare_and_swap<mode>_lse"
[(set (match_operand:SI 0 "register_operand" "+r") ;; val out
(zero_extend:SI
@@ -133,6 +157,28 @@
return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
})
+(define_insn "@aarch64_compare_and_swap<mode>_lse"
+ [(set (match_operand:JUST_TI 0 "register_operand" "+r") ;; val out
+ (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+ (set (match_dup 1)
+ (unspec_volatile:JUST_TI
+ [(match_dup 0) ;; expect
+ (match_operand:JUST_TI 2 "register_operand" "r") ;; desired
+ (match_operand:SI 3 "const_int_operand")] ;; mod_s
+ UNSPECV_ATOMIC_CMPSW))]
+ "TARGET_LSE"
+{
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model))
+ return "casp\t%0, %R0, %2, %R2, %1";
+ else if (is_mm_acquire (model) || is_mm_consume (model))
+ return "caspa\t%0, %R0, %2, %R2, %1";
+ else if (is_mm_release (model))
+ return "caspl\t%0, %R0, %2, %R2, %1";
+ else
+ return "caspal\t%0, %R0, %2, %R2, %1";
+})
+
(define_expand "atomic_exchange<mode>"
[(match_operand:ALLI 0 "register_operand" "")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
@@ -581,6 +627,24 @@
}
)
+(define_insn "aarch64_load_exclusive_pair"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec_volatile:DI
+ [(match_operand:TI 2 "aarch64_sync_memory_operand" "Q")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPECV_LX))
+ (set (match_operand:DI 1 "register_operand" "=r")
+ (unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LX))]
+ ""
+ {
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
+ return "ldxp\t%0, %1, %2";
+ else
+ return "ldaxp\t%0, %1, %2";
+ }
+)
+
(define_insn "@aarch64_store_exclusive<mode>"
[(set (match_operand:SI 0 "register_operand" "=&r")
(unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
@@ -599,6 +663,25 @@
}
)
+(define_insn "aarch64_store_exclusive_pair"
+ [(set (match_operand:SI 0 "register_operand" "=&r")
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
+ (set (match_operand:TI 1 "aarch64_sync_memory_operand" "=Q")
+ (unspec_volatile:TI
+ [(match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
+ (match_operand:DI 3 "aarch64_reg_or_zero" "rZ")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPECV_SX))]
+ ""
+ {
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
+ return "stxp\t%w0, %x2, %x3, %1";
+ else
+ return "stlxp\t%w0, %x2, %x3, %1";
+ }
+)
+
(define_expand "mem_thread_fence"
[(match_operand:SI 0 "const_int_operand" "")]
""
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 524e4e6929b..dd26bdbbc6b 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -29,6 +29,9 @@
;; Iterator for HI, SI, DI, some instructions can only work on these modes.
(define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI])
+;; "Iterator" for just TI -- features like @pattern only work with iterators.
+(define_mode_iterator JUST_TI [TI])
+
;; Iterator for QI and HI modes
(define_mode_iterator SHORT [QI HI])
--
2.17.2
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH, AArch64, v3 6/6] Enable -matomic-ool by default
2018-11-01 21:47 [PATCH, AArch64, v3 0/6] LSE atomics out-of-line Richard Henderson
2018-11-01 21:46 ` [PATCH, AArch64, v3 1/6] aarch64: Extend %R for integer registers Richard Henderson
2018-11-01 21:46 ` [PATCH, AArch64, v3 2/6] aarch64: Implement TImode compare-and-swap Richard Henderson
@ 2018-11-01 21:47 ` Richard Henderson
2018-11-01 21:47 ` [PATCH, AArch64, v3 3/6] aarch64: Tidy aarch64_split_compare_and_swap Richard Henderson
` (4 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-11-01 21:47 UTC (permalink / raw)
To: gcc-patches
Cc: ramana.radhakrishnan, agraf, marcus.shawcroft, james.greenhalgh
Do Not Merge Upstream.
This is for agraf and his testing within SLES.
---
gcc/common/config/aarch64/aarch64-common.c | 6 ++++--
gcc/config/aarch64/aarch64.c | 6 ++++--
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/gcc/common/config/aarch64/aarch64-common.c b/gcc/common/config/aarch64/aarch64-common.c
index dd7d4267340..a916df3bcbe 100644
--- a/gcc/common/config/aarch64/aarch64-common.c
+++ b/gcc/common/config/aarch64/aarch64-common.c
@@ -32,9 +32,11 @@
#include "diagnostic.h"
#include "params.h"
-#ifdef TARGET_BIG_ENDIAN_DEFAULT
#undef TARGET_DEFAULT_TARGET_FLAGS
-#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
+#ifdef TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END | MASK_ATOMIC_OOL)
+#else
+#define TARGET_DEFAULT_TARGET_FLAGS (MASK_ATOMIC_OOL)
#endif
#undef TARGET_HANDLE_OPTION
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9ab8b95c344..b60f364799d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -18043,9 +18043,11 @@ aarch64_run_selftests (void)
#undef TARGET_C_MODE_FOR_SUFFIX
#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
-#ifdef TARGET_BIG_ENDIAN_DEFAULT
#undef TARGET_DEFAULT_TARGET_FLAGS
-#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
+#ifdef TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END | MASK_ATOMIC_OOL)
+#else
+#define TARGET_DEFAULT_TARGET_FLAGS (MASK_ATOMIC_OOL)
#endif
#undef TARGET_CLASS_MAX_NREGS
--
2.17.2
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH, AArch64, v3 3/6] aarch64: Tidy aarch64_split_compare_and_swap
2018-11-01 21:47 [PATCH, AArch64, v3 0/6] LSE atomics out-of-line Richard Henderson
` (2 preceding siblings ...)
2018-11-01 21:47 ` [PATCH, AArch64, v3 6/6] Enable -matomic-ool by default Richard Henderson
@ 2018-11-01 21:47 ` Richard Henderson
2018-11-01 21:47 ` [PATCH, AArch64, v3 5/6] aarch64: Implement -matomic-ool Richard Henderson
` (3 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-11-01 21:47 UTC (permalink / raw)
To: gcc-patches
Cc: ramana.radhakrishnan, agraf, marcus.shawcroft, james.greenhalgh,
Richard Henderson
From: Richard Henderson <rth@twiddle.net>
With aarch64_track_speculation, we had extra code to do exactly what the
!strong_zero_p path already did. The rest is reducing code duplication.
* config/aarch64/aarch64 (aarch64_split_compare_and_swap): Disable
strong_zero_p for aarch64_track_speculation; unify some code paths;
use aarch64_gen_compare_reg instead of open-coding.
---
gcc/config/aarch64/aarch64.c | 50 ++++++++++--------------------------
1 file changed, 14 insertions(+), 36 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 942f2037235..b29f437aeaf 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14767,13 +14767,11 @@ aarch64_emit_post_barrier (enum memmodel model)
void
aarch64_split_compare_and_swap (rtx operands[])
{
- rtx rval, mem, oldval, newval, scratch;
+ rtx rval, mem, oldval, newval, scratch, x, model_rtx;
machine_mode mode;
bool is_weak;
rtx_code_label *label1, *label2;
- rtx x, cond;
enum memmodel model;
- rtx model_rtx;
rval = operands[0];
mem = operands[1];
@@ -14794,7 +14792,8 @@ aarch64_split_compare_and_swap (rtx operands[])
CBNZ scratch, .label1
.label2:
CMP rval, 0. */
- bool strong_zero_p = !is_weak && oldval == const0_rtx && mode != TImode;
+ bool strong_zero_p = (!is_weak && !aarch64_track_speculation &&
+ oldval == const0_rtx && mode != TImode);
label1 = NULL;
if (!is_weak)
@@ -14807,35 +14806,20 @@ aarch64_split_compare_and_swap (rtx operands[])
/* The initial load can be relaxed for a __sync operation since a final
barrier will be emitted to stop code hoisting. */
if (is_mm_sync (model))
- aarch64_emit_load_exclusive (mode, rval, mem,
- GEN_INT (MEMMODEL_RELAXED));
+ aarch64_emit_load_exclusive (mode, rval, mem, GEN_INT (MEMMODEL_RELAXED));
else
aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
if (strong_zero_p)
- {
- if (aarch64_track_speculation)
- {
- /* Emit an explicit compare instruction, so that we can correctly
- track the condition codes. */
- rtx cc_reg = aarch64_gen_compare_reg (NE, rval, const0_rtx);
- x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
- }
- else
- x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
-
- x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
- gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
- aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
- }
+ x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
else
{
- cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
- x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
- x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
- gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
- aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+ rtx cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
+ x = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
}
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
+ aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
@@ -14856,22 +14840,16 @@ aarch64_split_compare_and_swap (rtx operands[])
aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
}
else
- {
- cond = gen_rtx_REG (CCmode, CC_REGNUM);
- x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
- emit_insn (gen_rtx_SET (cond, x));
- }
+ aarch64_gen_compare_reg (NE, scratch, const0_rtx);
emit_label (label2);
+
/* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
to set the condition flags. If this is not used it will be removed by
later passes. */
if (strong_zero_p)
- {
- cond = gen_rtx_REG (CCmode, CC_REGNUM);
- x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
- emit_insn (gen_rtx_SET (cond, x));
- }
+ aarch64_gen_compare_reg (NE, rval, const0_rtx);
+
/* Emit any final barrier needed for a __sync operation. */
if (is_mm_sync (model))
aarch64_emit_post_barrier (model);
--
2.17.2
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH, AArch64, v3 5/6] aarch64: Implement -matomic-ool
2018-11-01 21:47 [PATCH, AArch64, v3 0/6] LSE atomics out-of-line Richard Henderson
` (3 preceding siblings ...)
2018-11-01 21:47 ` [PATCH, AArch64, v3 3/6] aarch64: Tidy aarch64_split_compare_and_swap Richard Henderson
@ 2018-11-01 21:47 ` Richard Henderson
2019-09-05 9:56 ` Kyrill Tkachov
2018-11-01 21:47 ` [PATCH, AArch64, v3 4/6] aarch64: Add out-of-line functions for LSE atomics Richard Henderson
` (2 subsequent siblings)
7 siblings, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2018-11-01 21:47 UTC (permalink / raw)
To: gcc-patches
Cc: ramana.radhakrishnan, agraf, marcus.shawcroft, james.greenhalgh
* config/aarch64/aarch64.opt (-matomic-ool): New.
* config/aarch64/aarch64.c (aarch64_atomic_ool_func): New.
(aarch64_ool_cas_names, aarch64_ool_swp_names): New.
(aarch64_ool_ldadd_names, aarch64_ool_ldset_names): New.
(aarch64_ool_ldclr_names, aarch64_ool_ldeor_names): New.
(aarch64_expand_compare_and_swap): Honor TARGET_ATOMIC_OOL.
* config/aarch64/atomics.md (atomic_exchange<ALLI>): Likewise.
(atomic_<atomic_op><ALLI>): Likewise.
(atomic_fetch_<atomic_op><ALLI>): Likewise.
(atomic_<atomic_op>_fetch<ALLI>): Likewise.
---
gcc/config/aarch64/aarch64-protos.h | 13 +++
gcc/config/aarch64/aarch64.c | 87 +++++++++++++++++
.../atomic-comp-swap-release-acquire.c | 2 +-
.../gcc.target/aarch64/atomic-op-acq_rel.c | 2 +-
.../gcc.target/aarch64/atomic-op-acquire.c | 2 +-
.../gcc.target/aarch64/atomic-op-char.c | 2 +-
.../gcc.target/aarch64/atomic-op-consume.c | 2 +-
.../gcc.target/aarch64/atomic-op-imm.c | 2 +-
.../gcc.target/aarch64/atomic-op-int.c | 2 +-
.../gcc.target/aarch64/atomic-op-long.c | 2 +-
.../gcc.target/aarch64/atomic-op-relaxed.c | 2 +-
.../gcc.target/aarch64/atomic-op-release.c | 2 +-
.../gcc.target/aarch64/atomic-op-seq_cst.c | 2 +-
.../gcc.target/aarch64/atomic-op-short.c | 2 +-
.../aarch64/atomic_cmp_exchange_zero_reg_1.c | 2 +-
.../atomic_cmp_exchange_zero_strong_1.c | 2 +-
.../gcc.target/aarch64/sync-comp-swap.c | 2 +-
.../gcc.target/aarch64/sync-op-acquire.c | 2 +-
.../gcc.target/aarch64/sync-op-full.c | 2 +-
gcc/config/aarch64/aarch64.opt | 4 +
gcc/config/aarch64/atomics.md | 94 +++++++++++++++++--
gcc/doc/invoke.texi | 14 ++-
22 files changed, 220 insertions(+), 26 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 1fe1a50d52a..1c1877cd200 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -630,4 +630,17 @@ poly_uint64 aarch64_regmode_natural_size (machine_mode);
bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
+struct atomic_ool_names
+{
+ const char *str[5][4];
+};
+
+rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
+ const atomic_ool_names *names);
+extern const atomic_ool_names aarch64_ool_swp_names;
+extern const atomic_ool_names aarch64_ool_ldadd_names;
+extern const atomic_ool_names aarch64_ool_ldset_names;
+extern const atomic_ool_names aarch64_ool_ldclr_names;
+extern const atomic_ool_names aarch64_ool_ldeor_names;
+
#endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b29f437aeaf..9ab8b95c344 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14679,6 +14679,82 @@ aarch64_emit_unlikely_jump (rtx insn)
add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
}
+/* We store the names of the various atomic helpers in a 5x4 array.
+ Return the libcall function given MODE, MODEL and NAMES. */
+
+rtx
+aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
+ const atomic_ool_names *names)
+{
+ memmodel model = memmodel_base (INTVAL (model_rtx));
+ int mode_idx, model_idx;
+
+ switch (mode)
+ {
+ case E_QImode:
+ mode_idx = 0;
+ break;
+ case E_HImode:
+ mode_idx = 1;
+ break;
+ case E_SImode:
+ mode_idx = 2;
+ break;
+ case E_DImode:
+ mode_idx = 3;
+ break;
+ case E_TImode:
+ mode_idx = 4;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ switch (model)
+ {
+ case MEMMODEL_RELAXED:
+ model_idx = 0;
+ break;
+ case MEMMODEL_CONSUME:
+ case MEMMODEL_ACQUIRE:
+ model_idx = 1;
+ break;
+ case MEMMODEL_RELEASE:
+ model_idx = 2;
+ break;
+ case MEMMODEL_ACQ_REL:
+ case MEMMODEL_SEQ_CST:
+ model_idx = 3;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ return init_one_libfunc_visibility (names->str[mode_idx][model_idx],
+ VISIBILITY_HIDDEN);
+}
+
+#define DEF0(B, N) \
+ { "__aa64_" #B #N "_relax", \
+ "__aa64_" #B #N "_acq", \
+ "__aa64_" #B #N "_rel", \
+ "__aa64_" #B #N "_acq_rel" }
+
+#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
+ { NULL, NULL, NULL, NULL }
+#define DEF5(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16)
+
+static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } };
+const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } };
+const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } };
+const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
+const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } };
+const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } };
+
+#undef DEF0
+#undef DEF4
+#undef DEF5
+
/* Expand a compare and swap pattern. */
void
@@ -14725,6 +14801,17 @@ aarch64_expand_compare_and_swap (rtx operands[])
newval, mod_s));
cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
}
+ else if (TARGET_ATOMIC_OOL)
+ {
+ /* Oldval must satisfy compare afterward. */
+ if (!aarch64_plus_operand (oldval, mode))
+ oldval = force_reg (mode, oldval);
+ rtx func = aarch64_atomic_ool_func (mode, mod_s, &aarch64_ool_cas_names);
+ rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode,
+ oldval, mode, newval, mode,
+ XEXP (mem, 0), ptr_mode);
+ cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
+ }
else
{
/* The oldval predicate varies by mode. Test it and force to reg. */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
index 49ca5d0d09c..e92f205c3a8 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-atomic-ool" } */
#include "atomic-comp-swap-release-acquire.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
index 74f26348e42..6965431f7d9 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
#include "atomic-op-acq_rel.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
index 66c1b1efe20..07dbca49d56 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
#include "atomic-op-acquire.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
index c09d0434ecf..73bfbb7afc9 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
#include "atomic-op-char.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
index 5783ab84f5c..c7945b3a22d 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
#include "atomic-op-consume.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
index 18b8f0b04e9..e46bb3de7c1 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
int v = 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
index 8520f0839ba..9b55deb5225 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
#include "atomic-op-int.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
index d011f8c5ce2..2622f75331f 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
long v = 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
index ed96bfdb978..f118a37a352 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
#include "atomic-op-relaxed.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
index fc4be17de89..579634b08e8 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
#include "atomic-op-release.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
index 613000fe490..016b0d6619f 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
#include "atomic-op-seq_cst.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
index e82c8118ece..978bd1d8377 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
#include "atomic-op-short.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
index f2a21ddf2e1..77430ecdbce 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=armv8-a+nolse" } */
+/* { dg-options "-O2 -march=armv8-a+nolse -mno-atomic-ool" } */
/* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */
int
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
index 8d2ae67dfbe..7d58b2f6bd0 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=armv8-a+nolse" } */
+/* { dg-options "-O2 -march=armv8-a+nolse -mno-atomic-ool" } */
/* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */
int
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
index e571b2f13b3..7fc5885d0fd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
+++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-atomic-ool" } */
#include "sync-comp-swap.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
index 357bf1be3b2..6ad0daa8998 100644
--- a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
+++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
#include "sync-op-acquire.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c
index c6ba1629965..9a7afeb70d3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c
+++ b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=armv8-a+nolse -O2" } */
+/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
#include "sync-op-full.x"
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index b2e80cbf6f1..83166834165 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -218,3 +218,7 @@ Enables verbose cost model dumping in the debug dump files.
mtrack-speculation
Target Var(aarch64_track_speculation)
Generate code to track when the CPU might be speculating incorrectly.
+
+matomic-ool
+Target Report Mask(ATOMIC_OOL) Save
+Generate local calls to out-of-line atomic operations.
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 08a3a1ff955..24c1fabee59 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -186,16 +186,27 @@
(match_operand:SI 3 "const_int_operand" "")]
""
{
- rtx (*gen) (rtx, rtx, rtx, rtx);
-
/* Use an atomic SWP when available. */
if (TARGET_LSE)
- gen = gen_aarch64_atomic_exchange<mode>_lse;
+ {
+ emit_insn (gen_aarch64_atomic_exchange<mode>_lse
+ (operands[0], operands[1], operands[2], operands[3]));
+ }
+ else if (TARGET_ATOMIC_OOL)
+ {
+ machine_mode mode = <MODE>mode;
+ rtx func = aarch64_atomic_ool_func (mode, operands[3],
+ &aarch64_ool_swp_names);
+ rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL,
+ mode, operands[2], mode,
+ XEXP (operands[1], 0), ptr_mode);
+ emit_move_insn (operands[0], rval);
+ }
else
- gen = gen_aarch64_atomic_exchange<mode>;
-
- emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
-
+ {
+ emit_insn (gen_aarch64_atomic_exchange<mode>
+ (operands[0], operands[1], operands[2], operands[3]));
+ }
DONE;
}
)
@@ -280,6 +291,39 @@
}
operands[1] = force_reg (<MODE>mode, operands[1]);
}
+ else if (TARGET_ATOMIC_OOL)
+ {
+ const atomic_ool_names *names;
+ switch (<CODE>)
+ {
+ case MINUS:
+ operands[1] = expand_simple_unop (<MODE>mode, NEG, operands[1],
+ NULL, 1);
+ /* fallthru */
+ case PLUS:
+ names = &aarch64_ool_ldadd_names;
+ break;
+ case IOR:
+ names = &aarch64_ool_ldset_names;
+ break;
+ case XOR:
+ names = &aarch64_ool_ldeor_names;
+ break;
+ case AND:
+ operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
+ NULL, 1);
+ names = &aarch64_ool_ldclr_names;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ machine_mode mode = <MODE>mode;
+ rtx func = aarch64_atomic_ool_func (mode, operands[2], names);
+ emit_library_call_value (func, NULL_RTX, LCT_NORMAL, mode,
+ operands[1], mode,
+ XEXP (operands[0], 0), ptr_mode);
+ DONE;
+ }
else
gen = gen_aarch64_atomic_<atomic_optab><mode>;
@@ -405,6 +449,40 @@
}
operands[2] = force_reg (<MODE>mode, operands[2]);
}
+ else if (TARGET_ATOMIC_OOL)
+ {
+ const atomic_ool_names *names;
+ switch (<CODE>)
+ {
+ case MINUS:
+ operands[2] = expand_simple_unop (<MODE>mode, NEG, operands[2],
+ NULL, 1);
+ /* fallthru */
+ case PLUS:
+ names = &aarch64_ool_ldadd_names;
+ break;
+ case IOR:
+ names = &aarch64_ool_ldset_names;
+ break;
+ case XOR:
+ names = &aarch64_ool_ldeor_names;
+ break;
+ case AND:
+ operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
+ NULL, 1);
+ names = &aarch64_ool_ldclr_names;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ machine_mode mode = <MODE>mode;
+ rtx func = aarch64_atomic_ool_func (mode, operands[3], names);
+ rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL, mode,
+ operands[2], mode,
+ XEXP (operands[1], 0), ptr_mode);
+ emit_move_insn (operands[0], rval);
+ DONE;
+ }
else
gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
@@ -494,7 +572,7 @@
{
/* Use an atomic load-operate instruction when possible. In this case
we will re-compute the result from the original mem value. */
- if (TARGET_LSE)
+ if (TARGET_LSE || TARGET_ATOMIC_OOL)
{
rtx tmp = gen_reg_rtx (<MODE>mode);
operands[2] = force_reg (<MODE>mode, operands[2]);
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 284594df010..70bd0d0a0a1 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -623,7 +623,7 @@ Objective-C and Objective-C++ Dialects}.
-mpc-relative-literal-loads @gol
-msign-return-address=@var{scope} @gol
-march=@var{name} -mcpu=@var{name} -mtune=@var{name} @gol
--moverride=@var{string} -mverbose-cost-dump -mtrack-speculation}
+-moverride=@var{string} -mverbose-cost-dump -mtrack-speculation -matomic-ool}
@emph{Adapteva Epiphany Options}
@gccoptlist{-mhalf-reg-file -mprefer-short-insn-regs @gol
@@ -15109,6 +15109,18 @@ be used by the compiler when expanding calls to
@code{__builtin_speculation_safe_copy} to permit a more efficient code
sequence to be generated.
+@item -matomic-ool
+@itemx -mno-atomic-ool
+Enable or disable calls to out-of-line helpers to implement atomic operations.
+These helpers will, at runtime, determine if ARMv8.1-Atomics instructions
+should be used; if not, they will use the load/store-exclusive instructions
+that are present in the base ARMv8.0 ISA.
+
+This option is only applicable when compiling for the base ARMv8.0
+instruction set. If using a later revision, e.g. @option{-march=armv8.1-a}
+or @option{-march=armv8-a+lse}, the ARMv8.1-Atomics instructions will be
+used directly.
+
@item -march=@var{name}
@opindex march
Specify the name of the target architecture and, optionally, one or
--
2.17.2
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH, AArch64, v3 5/6] aarch64: Implement -matomic-ool
2018-11-01 21:47 ` [PATCH, AArch64, v3 5/6] aarch64: Implement -matomic-ool Richard Henderson
@ 2019-09-05 9:56 ` Kyrill Tkachov
2019-09-05 12:17 ` Richard Henderson
0 siblings, 1 reply; 14+ messages in thread
From: Kyrill Tkachov @ 2019-09-05 9:56 UTC (permalink / raw)
To: Richard Henderson, gcc-patches
Cc: Ramana Radhakrishnan, agraf, Marcus Shawcroft, James Greenhalgh
Hi Richard,
On 11/1/18 9:46 PM, Richard Henderson wrote:
> Â Â Â Â Â Â Â * config/aarch64/aarch64.opt (-matomic-ool): New.
> Â Â Â Â Â Â Â * config/aarch64/aarch64.c (aarch64_atomic_ool_func): New.
> Â Â Â Â Â Â Â (aarch64_ool_cas_names, aarch64_ool_swp_names): New.
> Â Â Â Â Â Â Â (aarch64_ool_ldadd_names, aarch64_ool_ldset_names): New.
> Â Â Â Â Â Â Â (aarch64_ool_ldclr_names, aarch64_ool_ldeor_names): New.
> Â Â Â Â Â Â Â (aarch64_expand_compare_and_swap): Honor TARGET_ATOMIC_OOL.
> Â Â Â Â Â Â Â * config/aarch64/atomics.md (atomic_exchange<ALLI>): Likewise.
> Â Â Â Â Â Â Â (atomic_<atomic_op><ALLI>): Likewise.
> Â Â Â Â Â Â Â (atomic_fetch_<atomic_op><ALLI>): Likewise.
> Â Â Â Â Â Â Â (atomic_<atomic_op>_fetch<ALLI>): Likewise.
> ---
>  gcc/config/aarch64/aarch64-protos.h          | 13 +++
>  gcc/config/aarch64/aarch64.c                 | 87 +++++++++++++++++
>  .../atomic-comp-swap-release-acquire.c       | 2 +-
>  .../gcc.target/aarch64/atomic-op-acq_rel.c   | 2 +-
>  .../gcc.target/aarch64/atomic-op-acquire.c   | 2 +-
>  .../gcc.target/aarch64/atomic-op-char.c      | 2 +-
>  .../gcc.target/aarch64/atomic-op-consume.c   | 2 +-
>  .../gcc.target/aarch64/atomic-op-imm.c       | 2 +-
>  .../gcc.target/aarch64/atomic-op-int.c       | 2 +-
>  .../gcc.target/aarch64/atomic-op-long.c      | 2 +-
>  .../gcc.target/aarch64/atomic-op-relaxed.c   | 2 +-
>  .../gcc.target/aarch64/atomic-op-release.c   | 2 +-
>  .../gcc.target/aarch64/atomic-op-seq_cst.c   | 2 +-
>  .../gcc.target/aarch64/atomic-op-short.c     | 2 +-
>  .../aarch64/atomic_cmp_exchange_zero_reg_1.c | 2 +-
>  .../atomic_cmp_exchange_zero_strong_1.c      | 2 +-
>  .../gcc.target/aarch64/sync-comp-swap.c      | 2 +-
>  .../gcc.target/aarch64/sync-op-acquire.c     | 2 +-
>  .../gcc.target/aarch64/sync-op-full.c        | 2 +-
>  gcc/config/aarch64/aarch64.opt               | 4 +
>  gcc/config/aarch64/atomics.md                | 94 +++++++++++++++++--
>  gcc/doc/invoke.texi                          | 14 ++-
> Â 22 files changed, 220 insertions(+), 26 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h
> b/gcc/config/aarch64/aarch64-protos.h
> index 1fe1a50d52a..1c1877cd200 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -630,4 +630,17 @@ poly_uint64 aarch64_regmode_natural_size
> (machine_mode);
>
> Â bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
>
> +struct atomic_ool_names
> +{
> +Â Â Â const char *str[5][4];
> +};
> +
> +rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â const atomic_ool_names *names);
> +extern const atomic_ool_names aarch64_ool_swp_names;
> +extern const atomic_ool_names aarch64_ool_ldadd_names;
> +extern const atomic_ool_names aarch64_ool_ldset_names;
> +extern const atomic_ool_names aarch64_ool_ldclr_names;
> +extern const atomic_ool_names aarch64_ool_ldeor_names;
> +
> Â #endif /* GCC_AARCH64_PROTOS_H */
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index b29f437aeaf..9ab8b95c344 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -14679,6 +14679,82 @@ aarch64_emit_unlikely_jump (rtx insn)
> Â Â add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
> Â }
>
> +/* We store the names of the various atomic helpers in a 5x4 array.
> +  Return the libcall function given MODE, MODEL and NAMES. */
> +
> +rtx
> +aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â const atomic_ool_names *names)
> +{
> +Â memmodel model = memmodel_base (INTVAL (model_rtx));
> +Â int mode_idx, model_idx;
> +
> +Â switch (mode)
> +Â Â Â {
> +Â Â Â case E_QImode:
> +Â Â Â Â Â mode_idx = 0;
> +Â Â Â Â Â break;
> +Â Â Â case E_HImode:
> +Â Â Â Â Â mode_idx = 1;
> +Â Â Â Â Â break;
> +Â Â Â case E_SImode:
> +Â Â Â Â Â mode_idx = 2;
> +Â Â Â Â Â break;
> +Â Â Â case E_DImode:
> +Â Â Â Â Â mode_idx = 3;
> +Â Â Â Â Â break;
> +Â Â Â case E_TImode:
> +Â Â Â Â Â mode_idx = 4;
> +Â Â Â Â Â break;
> +Â Â Â default:
> +Â Â Â Â Â gcc_unreachable ();
> +Â Â Â }
> +
> +Â switch (model)
> +Â Â Â {
> +Â Â Â case MEMMODEL_RELAXED:
> +Â Â Â Â Â model_idx = 0;
> +Â Â Â Â Â break;
> +Â Â Â case MEMMODEL_CONSUME:
> +Â Â Â case MEMMODEL_ACQUIRE:
> +Â Â Â Â Â model_idx = 1;
> +Â Â Â Â Â break;
> +Â Â Â case MEMMODEL_RELEASE:
> +Â Â Â Â Â model_idx = 2;
> +Â Â Â Â Â break;
> +Â Â Â case MEMMODEL_ACQ_REL:
> +Â Â Â case MEMMODEL_SEQ_CST:
> +Â Â Â Â Â model_idx = 3;
> +Â Â Â Â Â break;
> +Â Â Â default:
> +Â Â Â Â Â gcc_unreachable ();
> +Â Â Â }
> +
> +Â return init_one_libfunc_visibility (names->str[mode_idx][model_idx],
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â VISIBILITY_HIDDEN);
> +}
> +
> +#define DEF0(B, N) \
> +Â { "__aa64_" #B #N "_relax", \
> +Â Â Â "__aa64_" #B #N "_acq", \
> +Â Â Â "__aa64_" #B #N "_rel", \
> +Â Â Â "__aa64_" #B #N "_acq_rel" }
> +
> +#define DEF4(B)Â DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â { NULL, NULL, NULL, NULL }
> +#define DEF5(B)Â DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8),
> DEF0(B, 16)
> +
> +static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } };
> +const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } };
> +const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } };
> +const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
> +const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } };
> +const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } };
> +
> +#undef DEF0
> +#undef DEF4
> +#undef DEF5
> +
>  /* Expand a compare and swap pattern. */
>
> Â void
> @@ -14725,6 +14801,17 @@ aarch64_expand_compare_and_swap (rtx operands[])
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â newval, mod_s));
> Â Â Â Â Â Â cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
> Â Â Â Â }
> +Â else if (TARGET_ATOMIC_OOL)
> +Â Â Â {
> +     /* Oldval must satisfy compare afterward. */
> +Â Â Â Â Â if (!aarch64_plus_operand (oldval, mode))
> +Â Â Â Â Â Â oldval = force_reg (mode, oldval);
> +Â Â Â Â Â rtx func = aarch64_atomic_ool_func (mode, mod_s,
> &aarch64_ool_cas_names);
> +Â Â Â Â Â rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode,
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â oldval, mode, newval, mode,
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â XEXP (mem, 0), ptr_mode);
As reported at https://gcc.gnu.org/ml/gcc-patches/2019-09/msg00118.html
I've encountered ICEs here with -mabi=ilp32 due to the mode of the
address of MEM.
  rtx addr = XEXP (mem, 0);
     if (Pmode != ptr_mode)
   addr = convert_memory_address (ptr_mode, addr);
above the emit_library_call_value to force the address into ptr_mode and
use addr as the argument fixed that for me.
I needed to do similar additions to the other places in the patch that
call emit_library_call_value.
This allowed an aarch64-none-elf build to succeed (which builds an
-mabi=ilp32 multilib) but I'm not sure if it's papering over a problem?
Thanks,
Kyrill
> +Â Â Â Â Â cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
> +Â Â Â }
> Â Â else
> Â Â Â Â {
>       /* The oldval predicate varies by mode. Test it and force to
> reg. */
> diff --git
> a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
> b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
> index 49ca5d0d09c..e92f205c3a8 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf
> -mno-atomic-ool" } */
>
> Â #include "atomic-comp-swap-release-acquire.x"
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
> b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
> index 74f26348e42..6965431f7d9 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â #include "atomic-op-acq_rel.x"
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
> b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
> index 66c1b1efe20..07dbca49d56 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â #include "atomic-op-acquire.x"
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
> b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
> index c09d0434ecf..73bfbb7afc9 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â #include "atomic-op-char.x"
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
> b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
> index 5783ab84f5c..c7945b3a22d 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â #include "atomic-op-consume.x"
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
> b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
> index 18b8f0b04e9..e46bb3de7c1 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â int v = 0;
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
> b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
> index 8520f0839ba..9b55deb5225 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â #include "atomic-op-int.x"
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
> b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
> index d011f8c5ce2..2622f75331f 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â long v = 0;
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
> b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
> index ed96bfdb978..f118a37a352 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â #include "atomic-op-relaxed.x"
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
> b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
> index fc4be17de89..579634b08e8 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â #include "atomic-op-release.x"
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
> b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
> index 613000fe490..016b0d6619f 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â #include "atomic-op-seq_cst.x"
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
> b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
> index e82c8118ece..978bd1d8377 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â #include "atomic-op-short.x"
>
> diff --git
> a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
> b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
> index f2a21ddf2e1..77430ecdbce 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-O2 -march=armv8-a+nolse" } */
> +/* { dg-options "-O2 -march=armv8-a+nolse -mno-atomic-ool" } */
> Â /* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */
>
> Â int
> diff --git
> a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
> b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
> index 8d2ae67dfbe..7d58b2f6bd0 100644
> --- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-O2 -march=armv8-a+nolse" } */
> +/* { dg-options "-O2 -march=armv8-a+nolse -mno-atomic-ool" } */
> Â /* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */
>
> Â int
> diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
> b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
> index e571b2f13b3..7fc5885d0fd 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf
> -mno-atomic-ool" } */
>
> Â #include "sync-comp-swap.x"
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
> b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
> index 357bf1be3b2..6ad0daa8998 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â #include "sync-op-acquire.x"
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c
> b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c
> index c6ba1629965..9a7afeb70d3 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c
> @@ -1,5 +1,5 @@
> Â /* { dg-do compile } */
> -/* { dg-options "-march=armv8-a+nolse -O2" } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -mno-atomic-ool" } */
>
> Â #include "sync-op-full.x"
>
> diff --git a/gcc/config/aarch64/aarch64.opt
> b/gcc/config/aarch64/aarch64.opt
> index b2e80cbf6f1..83166834165 100644
> --- a/gcc/config/aarch64/aarch64.opt
> +++ b/gcc/config/aarch64/aarch64.opt
> @@ -218,3 +218,7 @@ Enables verbose cost model dumping in the debug
> dump files.
> Â mtrack-speculation
> Â Target Var(aarch64_track_speculation)
> Â Generate code to track when the CPU might be speculating incorrectly.
> +
> +matomic-ool
> +Target Report Mask(ATOMIC_OOL) Save
> +Generate local calls to out-of-line atomic operations.
> diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
> index 08a3a1ff955..24c1fabee59 100644
> --- a/gcc/config/aarch64/atomics.md
> +++ b/gcc/config/aarch64/atomics.md
> @@ -186,16 +186,27 @@
> Â Â (match_operand:SI 3 "const_int_operand" "")]
> Â Â ""
> Â Â {
> -Â Â Â rtx (*gen) (rtx, rtx, rtx, rtx);
> -
>     /* Use an atomic SWP when available. */
> Â Â Â Â if (TARGET_LSE)
> -Â Â Â Â Â gen = gen_aarch64_atomic_exchange<mode>_lse;
> +Â Â Â Â Â {
> +Â Â Â Â Â Â emit_insn (gen_aarch64_atomic_exchange<mode>_lse
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â (operands[0], operands[1], operands[2], operands[3]));
> +Â Â Â Â Â }
> +Â Â Â else if (TARGET_ATOMIC_OOL)
> +Â Â Â Â Â {
> +Â Â Â Â Â Â machine_mode mode = <MODE>mode;
> +Â Â Â Â Â Â rtx func = aarch64_atomic_ool_func (mode, operands[3],
> + &aarch64_ool_swp_names);
> +Â Â Â Â Â Â rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL,
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â mode, operands[2], mode,
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â XEXP (operands[1], 0),
> ptr_mode);
> +Â Â Â Â Â Â Â emit_move_insn (operands[0], rval);
> +Â Â Â Â Â }
> Â Â Â Â else
> -Â Â Â Â Â gen = gen_aarch64_atomic_exchange<mode>;
> -
> -Â Â Â emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
> -
> +Â Â Â Â Â {
> +Â Â Â Â Â Â emit_insn (gen_aarch64_atomic_exchange<mode>
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â (operands[0], operands[1], operands[2], operands[3]));
> +Â Â Â Â Â }
> Â Â Â Â DONE;
> Â Â }
> Â )
> @@ -280,6 +291,39 @@
> Â Â Â Â Â Â Â Â Â Â }
> Â Â Â Â Â Â Â Â operands[1] = force_reg (<MODE>mode, operands[1]);
> Â Â Â Â Â Â }
> +Â Â Â else if (TARGET_ATOMIC_OOL)
> +Â Â Â Â Â {
> +Â Â Â Â Â Â Â const atomic_ool_names *names;
> +Â Â Â Â Â Â switch (<CODE>)
> +Â Â Â Â Â Â Â Â {
> +Â Â Â Â Â Â Â Â case MINUS:
> +Â Â Â Â Â Â Â Â Â Â operands[1] = expand_simple_unop (<MODE>mode, NEG,
> operands[1],
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â NULL, 1);
> +Â Â Â Â Â Â Â Â Â Â /* fallthru */
> +Â Â Â Â Â Â Â Â case PLUS:
> +Â Â Â Â Â Â Â Â Â Â names = &aarch64_ool_ldadd_names;
> +Â Â Â Â Â Â Â Â Â Â break;
> +Â Â Â Â Â Â Â Â case IOR:
> +Â Â Â Â Â Â Â Â Â Â names = &aarch64_ool_ldset_names;
> +Â Â Â Â Â Â Â Â Â Â break;
> +Â Â Â Â Â Â Â Â case XOR:
> +Â Â Â Â Â Â Â Â Â Â names = &aarch64_ool_ldeor_names;
> +Â Â Â Â Â Â Â Â Â Â break;
> +Â Â Â Â Â Â Â Â case AND:
> +Â Â Â Â Â Â Â Â Â Â operands[1] = expand_simple_unop (<MODE>mode, NOT,
> operands[1],
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â NULL, 1);
> +Â Â Â Â Â Â Â Â Â Â names = &aarch64_ool_ldclr_names;
> +Â Â Â Â Â Â Â Â Â Â break;
> +Â Â Â Â Â Â Â Â default:
> +Â Â Â Â Â Â Â Â Â Â gcc_unreachable ();
> +Â Â Â Â Â Â Â Â }
> +Â Â Â Â Â Â Â machine_mode mode = <MODE>mode;
> +Â Â Â Â Â Â rtx func = aarch64_atomic_ool_func (mode, operands[2], names);
> +Â Â Â Â Â Â emit_library_call_value (func, NULL_RTX, LCT_NORMAL, mode,
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â operands[1], mode,
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â XEXP (operands[0], 0), ptr_mode);
> +Â Â Â Â Â Â Â DONE;
> +Â Â Â Â Â }
> Â Â Â Â else
> Â Â Â Â Â Â gen = gen_aarch64_atomic_<atomic_optab><mode>;
>
> @@ -405,6 +449,40 @@
> Â Â Â Â Â Â Â Â }
> Â Â Â Â Â Â operands[2] = force_reg (<MODE>mode, operands[2]);
> Â Â Â Â }
> +Â else if (TARGET_ATOMIC_OOL)
> +Â Â Â {
> +Â Â Â Â Â const atomic_ool_names *names;
> +Â Â Â Â Â switch (<CODE>)
> +Â Â Â Â Â Â {
> +Â Â Â Â Â Â case MINUS:
> +Â Â Â Â Â Â Â Â operands[2] = expand_simple_unop (<MODE>mode, NEG, operands[2],
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â NULL, 1);
> +Â Â Â Â Â Â Â Â /* fallthru */
> +Â Â Â Â Â Â case PLUS:
> +Â Â Â Â Â Â Â Â names = &aarch64_ool_ldadd_names;
> +Â Â Â Â Â Â Â Â break;
> +Â Â Â Â Â Â case IOR:
> +Â Â Â Â Â Â Â Â names = &aarch64_ool_ldset_names;
> +Â Â Â Â Â Â Â Â break;
> +Â Â Â Â Â Â case XOR:
> +Â Â Â Â Â Â Â Â names = &aarch64_ool_ldeor_names;
> +Â Â Â Â Â Â Â Â break;
> +Â Â Â Â Â Â case AND:
> +Â Â Â Â Â Â Â Â operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â NULL, 1);
> +Â Â Â Â Â Â Â Â names = &aarch64_ool_ldclr_names;
> +Â Â Â Â Â Â Â Â break;
> +Â Â Â Â Â Â default:
> +Â Â Â Â Â Â Â Â gcc_unreachable ();
> +Â Â Â Â Â Â }
> +Â Â Â Â Â machine_mode mode = <MODE>mode;
> +Â Â Â Â Â rtx func = aarch64_atomic_ool_func (mode, operands[3], names);
> +Â Â Â Â Â rtx rval = emit_library_call_value (func, operands[0],
> LCT_NORMAL, mode,
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â operands[2], mode,
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â XEXP (operands[1], 0),
> ptr_mode);
> +Â Â Â Â Â emit_move_insn (operands[0], rval);
> +Â Â Â Â Â DONE;
> +Â Â Â }
> Â Â else
> Â Â Â Â gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
>
> @@ -494,7 +572,7 @@
> Â {
> Â Â /* Use an atomic load-operate instruction when possible. In this case
> Â Â Â Â Â we will re-compute the result from the original mem value. */
> -Â if (TARGET_LSE)
> +Â if (TARGET_LSE || TARGET_ATOMIC_OOL)
> Â Â Â Â {
> Â Â Â Â Â Â rtx tmp = gen_reg_rtx (<MODE>mode);
> Â Â Â Â Â Â operands[2] = force_reg (<MODE>mode, operands[2]);
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 284594df010..70bd0d0a0a1 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -623,7 +623,7 @@ Objective-C and Objective-C++ Dialects}.
> Â -mpc-relative-literal-loads @gol
> Â -msign-return-address=@var{scope} @gol
> Â -march=@var{name}Â -mcpu=@var{name}Â -mtune=@var{name} @gol
> --moverride=@var{string}Â -mverbose-cost-dump -mtrack-speculation}
> +-moverride=@var{string}Â -mverbose-cost-dump -mtrack-speculation
> -matomic-ool}
>
> Â @emph{Adapteva Epiphany Options}
>  @gccoptlist{-mhalf-reg-file -mprefer-short-insn-regs @gol
> @@ -15109,6 +15109,18 @@ be used by the compiler when expanding calls to
> Â @code{__builtin_speculation_safe_copy} to permit a more efficient code
> Â sequence to be generated.
>
> +@item -matomic-ool
> +@itemx -mno-atomic-ool
> +Enable or disable calls to out-of-line helpers to implement atomic
> operations.
> +These helpers will, at runtime, determine if ARMv8.1-Atomics instructions
> +should be used; if not, they will use the load/store-exclusive
> instructions
> +that are present in the base ARMv8.0 ISA.
> +
> +This option is only applicable when compiling for the base ARMv8.0
> +instruction set. If using a later revision, e.g.
> @option{-march=armv8.1-a}
> +or @option{-march=armv8-a+lse}, the ARMv8.1-Atomics instructions will be
> +used directly.
> +
> Â @item -march=@var{name}
> Â @opindex march
> Â Specify the name of the target architecture and, optionally, one or
> --
> 2.17.2
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH, AArch64, v3 5/6] aarch64: Implement -matomic-ool
2019-09-05 9:56 ` Kyrill Tkachov
@ 2019-09-05 12:17 ` Richard Henderson
0 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2019-09-05 12:17 UTC (permalink / raw)
To: Kyrill Tkachov, gcc-patches
Cc: Ramana Radhakrishnan, agraf, Marcus Shawcroft, James Greenhalgh
On 9/5/19 2:56 AM, Kyrill Tkachov wrote:
> On 11/1/18 9:46 PM, Richard Henderson wrote:
>> +Â else if (TARGET_ATOMIC_OOL)
>> +Â Â Â {
>> +     /* Oldval must satisfy compare afterward. */
>> +Â Â Â Â Â if (!aarch64_plus_operand (oldval, mode))
>> +Â Â Â Â Â Â oldval = force_reg (mode, oldval);
>> +Â Â Â Â Â rtx func = aarch64_atomic_ool_func (mode, mod_s, &aarch64_ool_cas_names);
>> +Â Â Â Â Â rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode,
>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â oldval, mode, newval, mode,
>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â XEXP (mem, 0), ptr_mode);
>
>
> As reported at https://gcc.gnu.org/ml/gcc-patches/2019-09/msg00118.html
>
> I've encountered ICEs here with -mabi=ilp32 due to the mode of the address of MEM.
>
> Â Â rtx addr = XEXP (mem, 0);
> Â Â Â Â Â if (Pmode != ptr_mode)
> Â Â Â addr = convert_memory_address (ptr_mode, addr);
>
> above the emit_library_call_value to force the address into ptr_mode and use
> addr as the argument fixed that for me.
The address should be promoted to Pmode for the call, as the little assembly
routine doesn't (and shouldn't) care for the extension itself.
I'll try aarch64-elf before re-posting.
r~
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH, AArch64, v3 4/6] aarch64: Add out-of-line functions for LSE atomics
2018-11-01 21:47 [PATCH, AArch64, v3 0/6] LSE atomics out-of-line Richard Henderson
` (4 preceding siblings ...)
2018-11-01 21:47 ` [PATCH, AArch64, v3 5/6] aarch64: Implement -matomic-ool Richard Henderson
@ 2018-11-01 21:47 ` Richard Henderson
2019-09-05 10:00 ` Kyrill Tkachov
2018-11-11 12:30 ` [PATCH, AArch64, v3 0/6] LSE atomics out-of-line Richard Henderson
2019-09-05 9:51 ` Kyrill Tkachov
7 siblings, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2018-11-01 21:47 UTC (permalink / raw)
To: gcc-patches
Cc: ramana.radhakrishnan, agraf, marcus.shawcroft, james.greenhalgh
This is the libgcc part of the interface -- providing the functions.
Rationale is provided at the top of libgcc/config/aarch64/lse.S.
* config/aarch64/lse-init.c: New file.
* config/aarch64/lse.S: New file.
* config/aarch64/t-lse: New file.
* config.host: Add t-lse to all aarch64 tuples.
---
libgcc/config/aarch64/lse-init.c | 45 ++++++
libgcc/config.host | 4 +
libgcc/config/aarch64/lse.S | 238 +++++++++++++++++++++++++++++++
libgcc/config/aarch64/t-lse | 44 ++++++
4 files changed, 331 insertions(+)
create mode 100644 libgcc/config/aarch64/lse-init.c
create mode 100644 libgcc/config/aarch64/lse.S
create mode 100644 libgcc/config/aarch64/t-lse
diff --git a/libgcc/config/aarch64/lse-init.c b/libgcc/config/aarch64/lse-init.c
new file mode 100644
index 00000000000..03b4e1e8ea8
--- /dev/null
+++ b/libgcc/config/aarch64/lse-init.c
@@ -0,0 +1,45 @@
+/* Out-of-line LSE atomics for AArch64 architecture, Init.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ Contributed by Linaro Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Define the symbol gating the LSE implementations. */
+extern _Bool __aa64_have_atomics
+ __attribute__((visibility("hidden"), nocommon));
+
+/* Disable initialization of __aa64_have_atomics during bootstrap. */
+#ifndef inhibit_libc
+# include <sys/auxv.h>
+
+/* Disable initialization if the system headers are too old. */
+# if defined(AT_HWCAP) && defined(HWCAP_ATOMICS)
+
+static void __attribute__((constructor))
+init_have_atomics (void)
+{
+ unsigned long hwcap = getauxval (AT_HWCAP);
+ __aa64_have_atomics = (hwcap & HWCAP_ATOMICS) != 0;
+}
+
+# endif /* HWCAP */
+#endif /* inhibit_libc */
diff --git a/libgcc/config.host b/libgcc/config.host
index 029f6569caf..7e9a8b6bc8f 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -340,23 +340,27 @@ aarch64*-*-elf | aarch64*-*-rtems*)
extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o"
extra_parts="$extra_parts crtfastmath.o"
tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+ tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
md_unwind_header=aarch64/aarch64-unwind.h
;;
aarch64*-*-freebsd*)
extra_parts="$extra_parts crtfastmath.o"
tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+ tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
md_unwind_header=aarch64/freebsd-unwind.h
;;
aarch64*-*-fuchsia*)
tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+ tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"
;;
aarch64*-*-linux*)
extra_parts="$extra_parts crtfastmath.o"
md_unwind_header=aarch64/linux-unwind.h
tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+ tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
;;
alpha*-*-linux*)
diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
new file mode 100644
index 00000000000..3e42a6569af
--- /dev/null
+++ b/libgcc/config/aarch64/lse.S
@@ -0,0 +1,238 @@
+/* Out-of-line LSE atomics for AArch64 architecture.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ Contributed by Linaro Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/*
+ * The problem that we are trying to solve is operating system deployment
+ * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).
+ *
+ * There are a number of potential solutions for this problem which have
+ * been proposed and rejected for various reasons. To recap:
+ *
+ * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/
+ * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten.
+ * However, not all Linux distributions are happy with multiple builds,
+ * and anyway it has no effect on main applications.
+ *
+ * (2) IFUNC. We could put these functions into libgcc_s.so, and have
+ * a single copy of each function for all DSOs. However, ARM is concerned
+ * that the branch-to-indirect-branch that is implied by using a PLT,
+ * as required by IFUNC, is too much overhead for smaller cpus.
+ *
+ * (3) Statically predicted direct branches. This is the approach that
+ * is taken here. These functions are linked into every DSO that uses them.
+ * All of the symbols are hidden, so that the functions are called via a
+ * direct branch. The choice of LSE vs non-LSE is done via one byte load
+ * followed by a well-predicted direct branch. The functions are compiled
+ * separately to minimize code size.
+ */
+
+/* Tell the assembler to accept LSE instructions. */
+ .arch armv8-a+lse
+
+/* Declare the symbol gating the LSE implementations. */
+ .hidden __aa64_have_atomics
+
+/* Turn size and memory model defines into mnemonic fragments. */
+#if SIZE == 1
+# define S b
+# define MASK , uxtb
+#elif SIZE == 2
+# define S h
+# define MASK , uxth
+#elif SIZE == 4 || SIZE == 8 || SIZE == 16
+# define S
+# define MASK
+#else
+# error
+#endif
+
+#if MODEL == 1
+# define SUFF _relax
+# define A
+# define L
+#elif MODEL == 2
+# define SUFF _acq
+# define A a
+# define L
+#elif MODEL == 3
+# define SUFF _rel
+# define A
+# define L l
+#elif MODEL == 4
+# define SUFF _acq_rel
+# define A a
+# define L l
+#else
+# error
+#endif
+
+/* Concatenate symbols. */
+#define glue2_(A, B) A ## B
+#define glue2(A, B) glue2_(A, B)
+#define glue3_(A, B, C) A ## B ## C
+#define glue3(A, B, C) glue3_(A, B, C)
+#define glue4_(A, B, C, D) A ## B ## C ## D
+#define glue4(A, B, C, D) glue4_(A, B, C, D)
+
+/* Select the size of a register, given a regno. */
+#define x(N) glue2(x, N)
+#define w(N) glue2(w, N)
+#if SIZE < 8
+# define s(N) w(N)
+#else
+# define s(N) x(N)
+#endif
+
+#define NAME(BASE) glue4(__aa64_, BASE, SIZE, SUFF)
+#define LDXR glue4(ld, A, xr, S)
+#define STXR glue4(st, L, xr, S)
+
+/* Temporary registers used. Other than these, only the return value
+ register (x0) and the flags are modified. */
+#define tmp0 16
+#define tmp1 17
+#define tmp2 15
+
+/* Start and end a function. */
+.macro STARTFN name
+ .text
+ .balign 16
+ .globl \name
+ .hidden \name
+ .type \name, %function
+\name:
+.endm
+
+.macro ENDFN name
+ .size \name, . - \name
+.endm
+
+/* Branch to LABEL if LSE is enabled.
+ The branch should be easily predicted, in that it will, after constructors,
+ always branch the same way. The expectation is that systems that implement
+ ARMv8.1-Atomics are "beefier" than those that omit the extension.
+ By arranging for the fall-through path to use load-store-exclusive insns,
+ we aid the branch predictor of the smallest cpus. */
+.macro JUMP_IF_LSE label
+ adrp x(tmp0), __aa64_have_atomics
+ ldrb w(tmp0), [x(tmp0), :lo12:__aa64_have_atomics]
+ cbnz w(tmp0), \label
+.endm
+
+#ifdef L_cas
+
+STARTFN NAME(cas)
+ JUMP_IF_LSE 8f
+
+#if SIZE < 16
+#define CAS glue4(cas, A, L, S)
+
+ mov s(tmp0), s(0)
+0: LDXR s(0), [x2]
+ cmp s(0), s(tmp0) MASK
+ bne 1f
+ STXR w(tmp1), s(1), [x2]
+ cbnz w(tmp1), 0b
+1: ret
+
+8: CAS w(0), w(1), [x2]
+ ret
+
+#else
+#define LDXP glue3(ld, A, xp)
+#define STXP glue3(st, L, xp)
+#define CASP glue3(casp, A, L)
+
+ mov x(tmp0), x0
+ mov x(tmp1), x1
+0: LDXP x0, x1, [x4]
+ cmp x0, x(tmp0)
+ ccmp x1, x(tmp1), #0, eq
+ bne 1f
+ STXP w(tmp2), x(tmp0), x(tmp1), [x4]
+ cbnz w(tmp2), 0b
+1: ret
+
+8: CASP x0, x1, x2, x3, [x4]
+ ret
+
+#endif
+
+ENDFN NAME(cas)
+#endif
+
+#ifdef L_swp
+#define SWP glue4(swp, A, L, S)
+
+STARTFN NAME(swp)
+ JUMP_IF_LSE 8f
+
+ mov s(tmp0), s(0)
+0: LDXR s(0), [x1]
+ STXR w(tmp1), s(tmp0), [x1]
+ cbnz w(tmp1), 0b
+ ret
+
+8: SWP w(0), w(0), [x1]
+ ret
+
+ENDFN NAME(swp)
+#endif
+
+#if defined(L_ldadd) || defined(L_ldclr) \
+ || defined(L_ldeor) || defined(L_ldset)
+
+#ifdef L_ldadd
+#define LDNM ldadd
+#define OP add
+#elif defined(L_ldclr)
+#define LDNM ldclr
+#define OP bic
+#elif defined(L_ldeor)
+#define LDNM ldeor
+#define OP eor
+#elif defined(L_ldset)
+#define LDNM ldset
+#define OP orr
+#else
+#error
+#endif
+#define LDOP glue4(LDNM, A, L, S)
+
+STARTFN NAME(LDNM)
+ JUMP_IF_LSE 8f
+
+ mov s(tmp0), s(0)
+0: LDXR s(0), [x1]
+ OP s(tmp1), s(0), s(tmp0)
+ STXR w(tmp1), s(tmp1), [x1]
+ cbnz w(tmp1), 0b
+ ret
+
+8: LDOP s(0), s(0), [x1]
+ ret
+
+ENDFN NAME(LDNM)
+#endif
diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse
new file mode 100644
index 00000000000..c7f4223cd45
--- /dev/null
+++ b/libgcc/config/aarch64/t-lse
@@ -0,0 +1,44 @@
+# Out-of-line LSE atomics for AArch64 architecture.
+# Copyright (C) 2018 Free Software Foundation, Inc.
+# Contributed by Linaro Ltd.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+# Compare-and-swap has 5 sizes and 4 memory models.
+S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))
+O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0)))
+
+# Swap, Load-and-operate have 4 sizes and 4 memory models
+S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset))
+O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1)))
+
+LSE_OBJS := $(O0) $(O1)
+
+libgcc-objects += $(LSE_OBJS) lse-init$(objext)
+
+empty =
+space = $(empty) $(empty)
+PAT_SPLIT = $(subst _,$(space),$(*F))
+PAT_BASE = $(word 1,$(PAT_SPLIT))
+PAT_N = $(word 2,$(PAT_SPLIT))
+PAT_M = $(word 3,$(PAT_SPLIT))
+
+lse-init$(objext): $(srcdir)/config/aarch64/lse-init.c
+ $(gcc_compile) -c $<
+
+$(LSE_OBJS): $(srcdir)/config/aarch64/lse.S
+ $(gcc_compile) -DL_$(PAT_BASE) -DSIZE=$(PAT_N) -DMODEL=$(PAT_M) -c $<
--
2.17.2
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH, AArch64, v3 4/6] aarch64: Add out-of-line functions for LSE atomics
2018-11-01 21:47 ` [PATCH, AArch64, v3 4/6] aarch64: Add out-of-line functions for LSE atomics Richard Henderson
@ 2019-09-05 10:00 ` Kyrill Tkachov
2019-09-05 12:13 ` Richard Henderson
0 siblings, 1 reply; 14+ messages in thread
From: Kyrill Tkachov @ 2019-09-05 10:00 UTC (permalink / raw)
To: Richard Henderson, gcc-patches
Cc: Ramana Radhakrishnan, agraf, Marcus Shawcroft, James Greenhalgh
Hi Richard,
On 11/1/18 9:46 PM, Richard Henderson wrote:
> This is the libgcc part of the interface -- providing the functions.
> Rationale is provided at the top of libgcc/config/aarch64/lse.S.
>
> Â Â Â Â Â Â Â * config/aarch64/lse-init.c: New file.
> Â Â Â Â Â Â Â * config/aarch64/lse.S: New file.
> Â Â Â Â Â Â Â * config/aarch64/t-lse: New file.
> Â Â Â Â Â Â Â * config.host: Add t-lse to all aarch64 tuples.
> ---
> Â libgcc/config/aarch64/lse-init.c |Â 45 ++++++
>  libgcc/config.host              |  4 +
> Â libgcc/config/aarch64/lse.SÂ Â Â Â Â | 238 +++++++++++++++++++++++++++++++
>  libgcc/config/aarch64/t-lse     | 44 ++++++
> Â 4 files changed, 331 insertions(+)
> Â create mode 100644 libgcc/config/aarch64/lse-init.c
> Â create mode 100644 libgcc/config/aarch64/lse.S
> Â create mode 100644 libgcc/config/aarch64/t-lse
>
> diff --git a/libgcc/config/aarch64/lse-init.c
> b/libgcc/config/aarch64/lse-init.c
> new file mode 100644
> index 00000000000..03b4e1e8ea8
> --- /dev/null
> +++ b/libgcc/config/aarch64/lse-init.c
> @@ -0,0 +1,45 @@
> +/* Out-of-line LSE atomics for AArch64 architecture, Init.
> +Â Â Copyright (C) 2018 Free Software Foundation, Inc.
> +Â Â Contributed by Linaro Ltd.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
> +for more details.
> +
> +Under Section 7 of GPL version 3, you are granted additional
> +permissions described in the GCC Runtime Library Exception, version
> +3.1, as published by the Free Software Foundation.
> +
> +You should have received a copy of the GNU General Public License and
> +a copy of the GCC Runtime Library Exception along with this program;
> +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
> +<http://www.gnu.org/licenses/>. */
> +
> +/* Define the symbol gating the LSE implementations. */
> +extern _Bool __aa64_have_atomics
> +Â Â Â __attribute__((visibility("hidden"), nocommon));
> +
Bootstrapping this patch series on an Armv8-A system with OOL atomics
enabled by default gave me link errors
when building libgomp about __aa64_have_atomics being undefined.
I haven't followed the series from the start so maybe I'm missing some
things, but I don't see where this variable is supposed to "live"?
Removing the 'extern' from here allows the bootstrap to proceed but it
fails at a later stage with bizzare errors like:
In file included from build/gencondmd.c:51:
$SRC/gcc/config/aarch64/constraints.md: In function ‘bool
satisfies_constraint_S(rtx)Â’:
$SRC/gcc/config/aarch64/constraints.md:120:10: error: ‘C’ was not
declared in this scope; did you mean ‘PC’?
 120 | (define_constraint "Y"
     |         ^
     |         PC
which looks like a miscompilation of sorts.
Thanks,
Kyrill
> +/* Disable initialization of __aa64_have_atomics during bootstrap. */
> +#ifndef inhibit_libc
> +# include <sys/auxv.h>
> +
> +/* Disable initialization if the system headers are too old. */
> +# if defined(AT_HWCAP) && defined(HWCAP_ATOMICS)
> +
> +static void __attribute__((constructor))
> +init_have_atomics (void)
> +{
> +Â unsigned long hwcap = getauxval (AT_HWCAP);
> +Â __aa64_have_atomics = (hwcap & HWCAP_ATOMICS) != 0;
> +}
> +
> +# endif /* HWCAP */
> +#endif /* inhibit_libc */
> diff --git a/libgcc/config.host b/libgcc/config.host
> index 029f6569caf..7e9a8b6bc8f 100644
> --- a/libgcc/config.host
> +++ b/libgcc/config.host
> @@ -340,23 +340,27 @@ aarch64*-*-elf | aarch64*-*-rtems*)
> Â Â Â Â Â Â Â Â extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o"
> Â Â Â Â Â Â Â Â extra_parts="$extra_parts crtfastmath.o"
> Â Â Â Â Â Â Â Â tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
> +Â Â Â Â Â Â tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
> Â Â Â Â Â Â Â Â tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
> Â Â Â Â Â Â Â Â md_unwind_header=aarch64/aarch64-unwind.h
> Â Â Â Â Â Â Â Â ;;
> Â aarch64*-*-freebsd*)
> Â Â Â Â Â Â Â Â extra_parts="$extra_parts crtfastmath.o"
> Â Â Â Â Â Â Â Â tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
> +Â Â Â Â Â Â tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
> Â Â Â Â Â Â Â Â tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
> Â Â Â Â Â Â Â Â md_unwind_header=aarch64/freebsd-unwind.h
> Â Â Â Â Â Â Â Â ;;
> Â aarch64*-*-fuchsia*)
> Â Â Â Â Â Â Â Â tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
> +Â Â Â Â Â Â tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
> Â Â Â Â Â Â Â Â tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"
> Â Â Â Â Â Â Â Â ;;
> Â aarch64*-*-linux*)
> Â Â Â Â Â Â Â Â extra_parts="$extra_parts crtfastmath.o"
> Â Â Â Â Â Â Â Â md_unwind_header=aarch64/linux-unwind.h
> Â Â Â Â Â Â Â Â tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
> +Â Â Â Â Â Â tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
> Â Â Â Â Â Â Â Â tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
> Â Â Â Â Â Â Â Â ;;
> Â alpha*-*-linux*)
> diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
> new file mode 100644
> index 00000000000..3e42a6569af
> --- /dev/null
> +++ b/libgcc/config/aarch64/lse.S
> @@ -0,0 +1,238 @@
> +/* Out-of-line LSE atomics for AArch64 architecture.
> +Â Â Copyright (C) 2018 Free Software Foundation, Inc.
> +Â Â Contributed by Linaro Ltd.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
> +for more details.
> +
> +Under Section 7 of GPL version 3, you are granted additional
> +permissions described in the GCC Runtime Library Exception, version
> +3.1, as published by the Free Software Foundation.
> +
> +You should have received a copy of the GNU General Public License and
> +a copy of the GCC Runtime Library Exception along with this program;
> +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
> +<http://www.gnu.org/licenses/>. */
> +
> +/*
> + * The problem that we are trying to solve is operating system deployment
> + * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).
> + *
> + * There are a number of potential solutions for this problem which have
> + * been proposed and rejected for various reasons. To recap:
> + *
> + * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/
> + * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten.
> + * However, not all Linux distributions are happy with multiple builds,
> + * and anyway it has no effect on main applications.
> + *
> + * (2) IFUNC. We could put these functions into libgcc_s.so, and have
> + * a single copy of each function for all DSOs. However, ARM is
> concerned
> + * that the branch-to-indirect-branch that is implied by using a PLT,
> + * as required by IFUNC, is too much overhead for smaller cpus.
> + *
> + * (3) Statically predicted direct branches. This is the approach that
> + * is taken here. These functions are linked into every DSO that
> uses them.
> + * All of the symbols are hidden, so that the functions are called via a
> + * direct branch. The choice of LSE vs non-LSE is done via one byte load
> + * followed by a well-predicted direct branch. The functions are
> compiled
> + * separately to minimize code size.
> + */
> +
> +/* Tell the assembler to accept LSE instructions. */
> +Â Â Â Â Â Â .arch armv8-a+lse
> +
> +/* Declare the symbol gating the LSE implementations. */
> +Â Â Â Â Â Â .hidden __aa64_have_atomics
> +
> +/* Turn size and memory model defines into mnemonic fragments. */
> +#if SIZE == 1
> +# define SÂ Â Â Â b
> +# define MASKÂ , uxtb
> +#elif SIZE == 2
> +# define SÂ Â Â Â h
> +# define MASKÂ , uxth
> +#elif SIZE == 4 || SIZE == 8 || SIZE == 16
> +# define S
> +# define MASK
> +#else
> +# error
> +#endif
> +
> +#if MODEL == 1
> +# define SUFFÂ _relax
> +# define A
> +# define L
> +#elif MODEL == 2
> +# define SUFFÂ _acq
> +# define AÂ Â Â Â a
> +# define L
> +#elif MODEL == 3
> +# define SUFFÂ _rel
> +# define A
> +# define LÂ Â Â Â l
> +#elif MODEL == 4
> +# define SUFFÂ _acq_rel
> +# define AÂ Â Â Â a
> +# define LÂ Â Â Â l
> +#else
> +# error
> +#endif
> +
> +/* Concatenate symbols. */
> +#define glue2_(A, B)Â Â Â Â Â Â Â Â Â Â A ## B
> +#define glue2(A, B)Â Â Â Â Â Â Â Â Â Â Â glue2_(A, B)
> +#define glue3_(A, B, C)Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â A ## B ## C
> +#define glue3(A, B, C)Â Â Â Â Â Â Â Â glue3_(A, B, C)
> +#define glue4_(A, B, C, D)Â Â Â Â A ## B ## C ## D
> +#define glue4(A, B, C, D)Â Â Â Â Â glue4_(A, B, C, D)
> +
> +/* Select the size of a register, given a regno. */
> +#define x(N)Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â glue2(x, N)
> +#define w(N)Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â glue2(w, N)
> +#if SIZE < 8
> +# define s(N)Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â w(N)
> +#else
> +# define s(N)Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â x(N)
> +#endif
> +
> +#define NAME(BASE)Â Â Â Â Â Â Â Â Â Â Â Â glue4(__aa64_, BASE, SIZE, SUFF)
> +#define LDXRÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â glue4(ld, A, xr, S)
> +#define STXRÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â glue4(st, L, xr, S)
> +
> +/* Temporary registers used. Other than these, only the return value
> +  register (x0) and the flags are modified. */
> +#define tmp0Â Â 16
> +#define tmp1Â Â 17
> +#define tmp2Â Â 15
> +
> +/* Start and end a function. */
> +.macro STARTFN name
> +Â Â Â Â Â Â .text
> +Â Â Â Â Â Â .balign 16
> +      .globl \name
> +Â Â Â Â Â Â .hidden \name
> +      .type  \name, %function
> +\name:
> +.endm
> +
> +.macro ENDFN name
> +      .size  \name, . - \name
> +.endm
> +
> +/* Branch to LABEL if LSE is enabled.
> +Â Â The branch should be easily predicted, in that it will, after
> constructors,
> +  always branch the same way. The expectation is that systems that
> implement
> +Â Â ARMv8.1-Atomics are "beefier" than those that omit the extension.
> +Â Â By arranging for the fall-through path to use load-store-exclusive
> insns,
> +  we aid the branch predictor of the smallest cpus. */
> +.macro JUMP_IF_LSE label
> +      adrp   x(tmp0), __aa64_have_atomics
> +      ldrb   w(tmp0), [x(tmp0), :lo12:__aa64_have_atomics]
> +      cbnz   w(tmp0), \label
> +.endm
> +
> +#ifdef L_cas
> +
> +STARTFNÂ Â Â Â Â Â Â NAME(cas)
> +Â Â Â Â Â Â JUMP_IF_LSEÂ Â Â Â 8f
> +
> +#if SIZE < 16
> +#define CASÂ Â Â glue4(cas, A, L, S)
> +
> +      mov            s(tmp0), s(0)
> +0:Â Â Â Â LDXRÂ Â Â Â Â Â Â Â Â Â Â s(0), [x2]
> +      cmp            s(0), s(tmp0) MASK
> +      bne            1f
> +Â Â Â Â Â Â STXRÂ Â Â Â Â Â Â Â Â Â Â w(tmp1), s(1), [x2]
> +      cbnz           w(tmp1), 0b
> +1:Â Â Â Â ret
> +
> +8:Â Â Â Â CASÂ Â Â Â Â Â Â Â Â Â Â Â w(0), w(1), [x2]
> +Â Â Â Â Â Â ret
> +
> +#else
> +#define LDXPÂ Â glue3(ld, A, xp)
> +#define STXPÂ Â glue3(st, L, xp)
> +#define CASPÂ Â glue3(casp, A, L)
> +
> +      mov            x(tmp0), x0
> +      mov            x(tmp1), x1
> +0:Â Â Â Â LDXPÂ Â Â Â Â Â Â Â Â Â Â x0, x1, [x4]
> +      cmp            x0, x(tmp0)
> +      ccmp           x1, x(tmp1), #0, eq
> +      bne            1f
> +Â Â Â Â Â Â STXPÂ Â Â Â Â Â Â Â Â Â Â w(tmp2), x(tmp0), x(tmp1), [x4]
> +      cbnz           w(tmp2), 0b
> +1:Â Â Â Â ret
> +
> +8:Â Â Â Â CASPÂ Â Â Â Â Â Â Â Â Â Â x0, x1, x2, x3, [x4]
> +Â Â Â Â Â Â ret
> +
> +#endif
> +
> +ENDFNÂ NAME(cas)
> +#endif
> +
> +#ifdef L_swp
> +#define SWPÂ Â Â glue4(swp, A, L, S)
> +
> +STARTFNÂ Â Â Â Â Â Â NAME(swp)
> +Â Â Â Â Â Â JUMP_IF_LSEÂ Â Â Â 8f
> +
> +      mov            s(tmp0), s(0)
> +0:Â Â Â Â LDXRÂ Â Â Â Â Â Â Â Â Â Â s(0), [x1]
> +Â Â Â Â Â Â STXRÂ Â Â Â Â Â Â Â Â Â Â w(tmp1), s(tmp0), [x1]
> +      cbnz           w(tmp1), 0b
> +Â Â Â Â Â Â ret
> +
> +8:Â Â Â Â SWPÂ Â Â Â Â Â Â Â Â Â Â Â w(0), w(0), [x1]
> +Â Â Â Â Â Â ret
> +
> +ENDFNÂ NAME(swp)
> +#endif
> +
> +#if defined(L_ldadd) || defined(L_ldclr) \
> +Â Â Â || defined(L_ldeor) || defined(L_ldset)
> +
> +#ifdef L_ldadd
> +#define LDNMÂ Â ldadd
> +#define OPÂ Â Â Â add
> +#elif defined(L_ldclr)
> +#define LDNMÂ Â ldclr
> +#define OPÂ Â Â Â bic
> +#elif defined(L_ldeor)
> +#define LDNMÂ Â ldeor
> +#define OPÂ Â Â Â eor
> +#elif defined(L_ldset)
> +#define LDNMÂ Â ldset
> +#define OPÂ Â Â Â orr
> +#else
> +#error
> +#endif
> +#define LDOPÂ Â glue4(LDNM, A, L, S)
> +
> +STARTFNÂ Â Â Â Â Â Â NAME(LDNM)
> +Â Â Â Â Â Â JUMP_IF_LSEÂ Â Â Â 8f
> +
> +      mov            s(tmp0), s(0)
> +0:Â Â Â Â LDXRÂ Â Â Â Â Â Â Â Â Â Â s(0), [x1]
> +Â Â Â Â Â Â OPÂ Â Â Â Â Â Â Â Â Â Â Â Â s(tmp1), s(0), s(tmp0)
> +Â Â Â Â Â Â STXRÂ Â Â Â Â Â Â Â Â Â Â w(tmp1), s(tmp1), [x1]
> +      cbnz           w(tmp1), 0b
> +Â Â Â Â Â Â ret
> +
> +8:Â Â Â Â LDOPÂ Â Â Â Â Â Â Â Â Â Â s(0), s(0), [x1]
> +Â Â Â Â Â Â ret
> +
> +ENDFNÂ NAME(LDNM)
> +#endif
> diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse
> new file mode 100644
> index 00000000000..c7f4223cd45
> --- /dev/null
> +++ b/libgcc/config/aarch64/t-lse
> @@ -0,0 +1,44 @@
> +# Out-of-line LSE atomics for AArch64 architecture.
> +# Copyright (C) 2018 Free Software Foundation, Inc.
> +# Contributed by Linaro Ltd.
> +#
> +# This file is part of GCC.
> +#
> +# GCC is free software; you can redistribute it and/or modify it
> +# under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 3, or (at your option)
> +# any later version.
> +#
> +# GCC is distributed in the hope that it will be useful, but
> +# WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +# General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with GCC; see the file COPYING3. If not see
> +# <http://www.gnu.org/licenses/>.
> +
> +# Compare-and-swap has 5 sizes and 4 memory models.
> +S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))
> +O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0)))
> +
> +# Swap, Load-and-operate have 4 sizes and 4 memory models
> +S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor
> ldset))
> +O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1)))
> +
> +LSE_OBJS := $(O0) $(O1)
> +
> +libgcc-objects += $(LSE_OBJS) lse-init$(objext)
> +
> +empty     =
> +space     = $(empty) $(empty)
> +PAT_SPLITÂ = $(subst _,$(space),$(*F))
> +PAT_BASEÂ Â = $(word 1,$(PAT_SPLIT))
> +PAT_NÂ Â Â Â Â = $(word 2,$(PAT_SPLIT))
> +PAT_MÂ Â Â Â Â = $(word 3,$(PAT_SPLIT))
> +
> +lse-init$(objext): $(srcdir)/config/aarch64/lse-init.c
> +Â Â Â Â Â Â $(gcc_compile) -c $<
> +
> +$(LSE_OBJS): $(srcdir)/config/aarch64/lse.S
> +Â Â Â Â Â Â $(gcc_compile) -DL_$(PAT_BASE) -DSIZE=$(PAT_N)
> -DMODEL=$(PAT_M) -c $<
> --
> 2.17.2
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH, AArch64, v3 4/6] aarch64: Add out-of-line functions for LSE atomics
2019-09-05 10:00 ` Kyrill Tkachov
@ 2019-09-05 12:13 ` Richard Henderson
2019-09-05 12:53 ` Kyrill Tkachov
0 siblings, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2019-09-05 12:13 UTC (permalink / raw)
To: Kyrill Tkachov, gcc-patches
Cc: Ramana Radhakrishnan, agraf, Marcus Shawcroft, James Greenhalgh
On 9/5/19 3:00 AM, Kyrill Tkachov wrote:
>> +/* Define the symbol gating the LSE implementations. */
>> +extern _Bool __aa64_have_atomics
>> +Â Â Â __attribute__((visibility("hidden"), nocommon));
>> +
>
> Bootstrapping this patch series on an Armv8-A system with OOL atomics enabled
> by default gave me link errors
>
> when building libgomp about __aa64_have_atomics being undefined.
>
> I haven't followed the series from the start so maybe I'm missing some things,
> but I don't see where this variable is supposed to "live"?
Removing the extern here is the correct fix.
Obviously the v3 patch set conversion from C to assembly wasn't properly
tested, or I made some last-minute changes before posting. Time has erased
that memory.
> Removing the 'extern' from here allows the bootstrap to proceed but it fails at
> a later stage with bizzare errors like:
>
> In file included from build/gencondmd.c:51:
> $SRC/gcc/config/aarch64/constraints.md: In function âbool
> satisfies_constraint_S(rtx)â:
> $SRC/gcc/config/aarch64/constraints.md:120:10: error: âCâ was not declared in
> this scope; did you mean âPCâ?
> Â 120 | (define_constraint "Y"
> Â Â Â Â Â |Â Â Â Â Â Â Â Â Â ^
> Â Â Â Â Â |Â Â Â Â Â Â Â Â Â PC
>
> which looks like a miscompilation of sorts.
I noticed a couple of typos in the assembly that used the wrong register form
(wN vs xN) on the LSE instructions. By chance were you testing on a system
with LSE instructions enabled?
r~
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH, AArch64, v3 4/6] aarch64: Add out-of-line functions for LSE atomics
2019-09-05 12:13 ` Richard Henderson
@ 2019-09-05 12:53 ` Kyrill Tkachov
0 siblings, 0 replies; 14+ messages in thread
From: Kyrill Tkachov @ 2019-09-05 12:53 UTC (permalink / raw)
To: Richard Henderson, gcc-patches
Cc: Ramana Radhakrishnan, agraf, Marcus Shawcroft, James Greenhalgh
On 9/5/19 1:12 PM, Richard Henderson wrote:
> On 9/5/19 3:00 AM, Kyrill Tkachov wrote:
>>> +/* Define the symbol gating the LSE implementations. */
>>> +extern _Bool __aa64_have_atomics
>>> +Â Â Â __attribute__((visibility("hidden"), nocommon));
>>> +
>> Bootstrapping this patch series on an Armv8-A system with OOL atomics enabled
>> by default gave me link errors
>>
>> when building libgomp about __aa64_have_atomics being undefined.
>>
>> I haven't followed the series from the start so maybe I'm missing some things,
>> but I don't see where this variable is supposed to "live"?
> Removing the extern here is the correct fix.
>
> Obviously the v3 patch set conversion from C to assembly wasn't properly
> tested, or I made some last-minute changes before posting. Time has erased
> that memory.
>
>> Removing the 'extern' from here allows the bootstrap to proceed but it fails at
>> a later stage with bizzare errors like:
>>
>> In file included from build/gencondmd.c:51:
>> $SRC/gcc/config/aarch64/constraints.md: In function âbool
>> satisfies_constraint_S(rtx)â:
>> $SRC/gcc/config/aarch64/constraints.md:120:10: error: âCâ was not declared in
>> this scope; did you mean âPCâ?
>> Â 120 | (define_constraint "Y"
>> Â Â Â Â Â |Â Â Â Â Â Â Â Â Â ^
>> Â Â Â Â Â |Â Â Â Â Â Â Â Â Â PC
>>
>> which looks like a miscompilation of sorts.
> I noticed a couple of typos in the assembly that used the wrong register form
> (wN vs xN) on the LSE instructions. By chance were you testing on a system
> with LSE instructions enabled?
No, it was an Armv8.0-A system without LSE.
If you need help with testing I'd be happy to help!
Kyrill
>
>
> r~
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH, AArch64, v3 0/6] LSE atomics out-of-line
2018-11-01 21:47 [PATCH, AArch64, v3 0/6] LSE atomics out-of-line Richard Henderson
` (5 preceding siblings ...)
2018-11-01 21:47 ` [PATCH, AArch64, v3 4/6] aarch64: Add out-of-line functions for LSE atomics Richard Henderson
@ 2018-11-11 12:30 ` Richard Henderson
2019-09-05 9:51 ` Kyrill Tkachov
7 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-11-11 12:30 UTC (permalink / raw)
To: Richard Henderson, gcc-patches
Cc: ramana.radhakrishnan, agraf, marcus.shawcroft, james.greenhalgh
Ping.
On 11/1/18 10:46 PM, Richard Henderson wrote:
> From: Richard Henderson <rth@twiddle.net>
>
> Changes since v2:
> * Committed half of the patch set.
> * Split inline TImode support from out-of-line patches.
> * Removed the ST<OP> out-of-line functions, to match inline.
> * Moved the out-of-line functions to assembly.
>
> What I have not done, but is now a possibility, is to use a custom
> calling convention for the out-of-line routines. I now only clobber
> 2 (or 3, for TImode) temp regs and set a return value.
>
>
> r~
>
>
> Richard Henderson (6):
> aarch64: Extend %R for integer registers
> aarch64: Implement TImode compare-and-swap
> aarch64: Tidy aarch64_split_compare_and_swap
> aarch64: Add out-of-line functions for LSE atomics
> aarch64: Implement -matomic-ool
> Enable -matomic-ool by default
>
> gcc/config/aarch64/aarch64-protos.h | 13 +
> gcc/common/config/aarch64/aarch64-common.c | 6 +-
> gcc/config/aarch64/aarch64.c | 211 ++++++++++++----
> .../atomic-comp-swap-release-acquire.c | 2 +-
> .../gcc.target/aarch64/atomic-op-acq_rel.c | 2 +-
> .../gcc.target/aarch64/atomic-op-acquire.c | 2 +-
> .../gcc.target/aarch64/atomic-op-char.c | 2 +-
> .../gcc.target/aarch64/atomic-op-consume.c | 2 +-
> .../gcc.target/aarch64/atomic-op-imm.c | 2 +-
> .../gcc.target/aarch64/atomic-op-int.c | 2 +-
> .../gcc.target/aarch64/atomic-op-long.c | 2 +-
> .../gcc.target/aarch64/atomic-op-relaxed.c | 2 +-
> .../gcc.target/aarch64/atomic-op-release.c | 2 +-
> .../gcc.target/aarch64/atomic-op-seq_cst.c | 2 +-
> .../gcc.target/aarch64/atomic-op-short.c | 2 +-
> .../aarch64/atomic_cmp_exchange_zero_reg_1.c | 2 +-
> .../atomic_cmp_exchange_zero_strong_1.c | 2 +-
> .../gcc.target/aarch64/sync-comp-swap.c | 2 +-
> .../gcc.target/aarch64/sync-op-acquire.c | 2 +-
> .../gcc.target/aarch64/sync-op-full.c | 2 +-
> libgcc/config/aarch64/lse-init.c | 45 ++++
> gcc/config/aarch64/aarch64.opt | 4 +
> gcc/config/aarch64/atomics.md | 185 +++++++++++++-
> gcc/config/aarch64/iterators.md | 3 +
> gcc/doc/invoke.texi | 14 +-
> libgcc/config.host | 4 +
> libgcc/config/aarch64/lse.S | 238 ++++++++++++++++++
> libgcc/config/aarch64/t-lse | 44 ++++
> 28 files changed, 717 insertions(+), 84 deletions(-)
> create mode 100644 libgcc/config/aarch64/lse-init.c
> create mode 100644 libgcc/config/aarch64/lse.S
> create mode 100644 libgcc/config/aarch64/t-lse
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH, AArch64, v3 0/6] LSE atomics out-of-line
2018-11-01 21:47 [PATCH, AArch64, v3 0/6] LSE atomics out-of-line Richard Henderson
` (6 preceding siblings ...)
2018-11-11 12:30 ` [PATCH, AArch64, v3 0/6] LSE atomics out-of-line Richard Henderson
@ 2019-09-05 9:51 ` Kyrill Tkachov
7 siblings, 0 replies; 14+ messages in thread
From: Kyrill Tkachov @ 2019-09-05 9:51 UTC (permalink / raw)
To: Richard Henderson, gcc-patches
Cc: Ramana Radhakrishnan, agraf, Marcus Shawcroft, James Greenhalgh,
Richard Henderson
Hi Richard,
On 11/1/18 9:46 PM, Richard Henderson wrote:
> From: Richard Henderson <rth@twiddle.net>
>
> Changes since v2:
> Â * Committed half of the patch set.
> Â * Split inline TImode support from out-of-line patches.
> Â * Removed the ST<OP> out-of-line functions, to match inline.
> Â * Moved the out-of-line functions to assembly.
>
> What I have not done, but is now a possibility, is to use a custom
> calling convention for the out-of-line routines. I now only clobber
> 2 (or 3, for TImode) temp regs and set a return value.
>
I think this patch series would be great to have for GCC 10!
I've rebased them on current trunk and fixed up a couple of minor
conflicts in my local tree.
After that, I've encountered a couple of issues with building a compiler
with these patches.
I'll respond to the individual patches that I think cause the trouble.
Thanks,
Kyrill
>
> r~
>
>
> Richard Henderson (6):
> Â aarch64: Extend %R for integer registers
> Â aarch64: Implement TImode compare-and-swap
> Â aarch64: Tidy aarch64_split_compare_and_swap
> Â aarch64: Add out-of-line functions for LSE atomics
> Â aarch64: Implement -matomic-ool
> Â Enable -matomic-ool by default
>
>  gcc/config/aarch64/aarch64-protos.h          | 13 +
>  gcc/common/config/aarch64/aarch64-common.c   |  6 +-
>  gcc/config/aarch64/aarch64.c                 | 211 ++++++++++++----
>  .../atomic-comp-swap-release-acquire.c       |  2 +-
>  .../gcc.target/aarch64/atomic-op-acq_rel.c   |  2 +-
>  .../gcc.target/aarch64/atomic-op-acquire.c   |  2 +-
>  .../gcc.target/aarch64/atomic-op-char.c      |  2 +-
>  .../gcc.target/aarch64/atomic-op-consume.c   |  2 +-
>  .../gcc.target/aarch64/atomic-op-imm.c       |  2 +-
>  .../gcc.target/aarch64/atomic-op-int.c       |  2 +-
>  .../gcc.target/aarch64/atomic-op-long.c      |  2 +-
>  .../gcc.target/aarch64/atomic-op-relaxed.c   |  2 +-
>  .../gcc.target/aarch64/atomic-op-release.c   |  2 +-
>  .../gcc.target/aarch64/atomic-op-seq_cst.c   |  2 +-
>  .../gcc.target/aarch64/atomic-op-short.c     |  2 +-
>  .../aarch64/atomic_cmp_exchange_zero_reg_1.c |  2 +-
>  .../atomic_cmp_exchange_zero_strong_1.c      |  2 +-
>  .../gcc.target/aarch64/sync-comp-swap.c      |  2 +-
>  .../gcc.target/aarch64/sync-op-acquire.c     |  2 +-
>  .../gcc.target/aarch64/sync-op-full.c        |  2 +-
>  libgcc/config/aarch64/lse-init.c             | 45 ++++
>  gcc/config/aarch64/aarch64.opt               |  4 +
>  gcc/config/aarch64/atomics.md                | 185 +++++++++++++-
>  gcc/config/aarch64/iterators.md              |  3 +
>  gcc/doc/invoke.texi                          | 14 +-
>  libgcc/config.host                           |  4 +
> Â libgcc/config/aarch64/lse.SÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â | 238 ++++++++++++++++++
>  libgcc/config/aarch64/t-lse                  | 44 ++++
> Â 28 files changed, 717 insertions(+), 84 deletions(-)
> Â create mode 100644 libgcc/config/aarch64/lse-init.c
> Â create mode 100644 libgcc/config/aarch64/lse.S
> Â create mode 100644 libgcc/config/aarch64/t-lse
>
> --
> 2.17.2
>
^ permalink raw reply [flat|nested] 14+ messages in thread