Skip to content

Commit

Permalink
aarch64: Use LDIAPP/STILP if FEAT_LRCPC3 and FEAT_LSE2 are available
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed Aug 10, 2023
1 parent c06f3ff commit 21a0e82
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 13 deletions.
2 changes: 2 additions & 0 deletions .github/.cspell/project-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ kuser
ldar
ldaxp
ldclrp
ldiapp
ldsetp
ldxp
lghi
Expand Down Expand Up @@ -120,6 +121,7 @@ sreg
sstatus
stdarch
stdsimd
stilp
stlxp
stpq
stqcx
Expand Down
6 changes: 4 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,12 +217,14 @@ fn main() {
let is_macos = target_os == "macos";
let mut has_lse = is_macos;
// FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2.
// As of rustc 1.70, target_feature "lse2"/"lse128" is not available on rustc side:
// As of rustc 1.70, target_feature "lse2"/"lse128"/"rcpc3" is not available on rustc side:
// https://github.com/rust-lang/rust/blob/1.70.0/compiler/rustc_codegen_ssa/src/target_features.rs#L58
target_feature_if("lse2", is_macos, &version, None, false);
// LLVM supports FEAT_LSE128 on LLVM 16+:
// LLVM supports FEAT_LRCPC3 and FEAT_LSE128 on LLVM 16+:
// https://github.com/llvm/llvm-project/commit/a6aaa969f7caec58a994142f8d855861cf3a1463
// https://github.com/llvm/llvm-project/commit/7fea6f2e0e606e5339c3359568f680eaf64aa306
has_lse |= target_feature_if("lse128", false, &version, None, false);
target_feature_if("rcpc3", false, &version, None, false);
// aarch64_target_feature stabilized in Rust 1.61.
target_feature_if("lse", has_lse, &version, Some(61), true);

Expand Down
2 changes: 1 addition & 1 deletion src/imp/atomic128/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Here is the table of targets that support 128-bit atomics and the instructions u
| target_arch | load | store | CAS | RMW | note |
| ----------- | ---- | ----- | --- | --- | ---- |
| x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b | cmpxchg16b target feature required. vmovdqa requires Intel or AMD CPU with AVX. <br> Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only. <br> Requires rustc 1.59+ when cmpxchg16b target feature is enabled at compile-time, otherwise requires rustc 1.69+ |
| aarch64 | ldxp/stxp or casp or ldp | ldxp/stxp or casp or stp/swpp | ldxp/stxp or casp | ldxp/stxp or casp/swpp/ldclrp/ldsetp | casp requires lse target feature, ldp/stp requires lse2 target feature, swpp/ldclrp/ldsetp requires lse128 target feature. <br> Both compile-time and run-time detection are supported for lse. lse2 and lse128 are currently compile-time detection only. <br> Requires rustc 1.59+ |
| aarch64 | ldxp/stxp or casp or ldp/ldiapp | ldxp/stxp or casp or stp/stilp/swpp | ldxp/stxp or casp | ldxp/stxp or casp/swpp/ldclrp/ldsetp | casp requires lse target feature, ldp/stp requires lse2 target feature, ldiapp/stilp requires lse2 and rcpc3 target features, swpp/ldclrp/ldsetp requires lse128 target feature. <br> Both compile-time and run-time detection are supported for lse. Others are currently compile-time detection only. <br> Requires rustc 1.59+ |
| powerpc64 | lq | stq | lqarx/stqcx. | lqarx/stqcx. | Requires target-cpu pwr8+ (powerpc64le is pwr8 by default). Both compile-time and run-time detection are supported (run-time detection is currently disabled by default). <br> Requires nightly |
| s390x | lpq | stpq | cdsg | cdsg | Requires nightly |

Expand Down
57 changes: 48 additions & 9 deletions src/imp/atomic128/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
// - LDXP/STXP loop (DW LL/SC)
// - CASP (DWCAS) added as FEAT_LSE (mandatory from armv8.1-a)
// - LDP/STP (DW load/store) if FEAT_LSE2 (optional from armv8.2-a, mandatory from armv8.4-a) is available
// - LDIAPP/STILP (DW acquire-load/release-store) added as FEAT_LRCPC3 (optional from armv8.9-a/armv9.4-a) (if FEAT_LSE2 is also available)
// - LDCLRP/LDSETP/SWPP (DW RMW) added as FEAT_LSE128 (optional from armv9.4-a)
//
// If outline-atomics is not enabled and FEAT_LSE is not available at
Expand All @@ -16,6 +17,7 @@
// However, when portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP
// loop for RMW (by default, it is set on Apple hardware; see build script for details).
// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store.
// If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store.
// If FEAT_LSE128 is available at compile-time, we use LDCLRP/LDSETP/SWPP for fetch_and/fetch_or/swap/{release,seqcst}-store.
//
// Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2.
Expand Down Expand Up @@ -50,12 +52,14 @@
// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
//
// Generated asm:
// - aarch64 https://godbolt.org/z/nds1nWbnq
// - aarch64 msvc https://godbolt.org/z/PTKdhbKqW
// - aarch64 (+lse) https://godbolt.org/z/5GzssfTKc
// - aarch64 msvc (+lse) https://godbolt.org/z/oYE87caM7
// - aarch64 (+lse,+lse2) https://godbolt.org/z/36dPjMbaG
// - aarch64 (+lse2,+lse128) https://godbolt.org/z/9MKa4ofbo
// - aarch64 https://godbolt.org/z/zT5av9nMP
// - aarch64 msvc https://godbolt.org/z/b5r9ordYW
// - aarch64 (+lse) https://godbolt.org/z/6EeE94ebd
// - aarch64 msvc (+lse) https://godbolt.org/z/d3Tev7nbv
// - aarch64 (+lse,+lse2) https://godbolt.org/z/K1xhW5jP8
// - aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/3jzsxedq8
// - aarch64 (+lse2,+lse128) https://godbolt.org/z/jqdYaP6a3
// - aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/h156b4TMv

include!("macros.rs");

Expand Down Expand Up @@ -221,7 +225,7 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
}
}
}
// If CPU supports FEAT_LSE2, LDP is single-copy atomic reads,
// If CPU supports FEAT_LSE2, LDP/LDIAPP is single-copy atomic reads,
// otherwise it is two single-copy atomic reads.
// Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile
#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))]
Expand Down Expand Up @@ -250,6 +254,19 @@ unsafe fn atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 {
}
match order {
Ordering::Relaxed => atomic_load_relaxed!("", readonly),
#[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))]
Ordering::Acquire => {
// SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
// Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-Pair-of-registers-
asm!(
"ldiapp {prev_lo}, {prev_hi}, [{src}]",
src = in(reg) ptr_reg!(src),
prev_hi = lateout(reg) prev_hi,
prev_lo = lateout(reg) prev_lo,
options(nostack, preserves_flags),
);
}
#[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
Ordering::Acquire => atomic_load_relaxed!("dmb ishld"),
Ordering::SeqCst => {
asm!(
Expand Down Expand Up @@ -355,7 +372,16 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
// https://reviews.llvm.org/D143506
match order {
Ordering::Relaxed => atomic_store_stp(dst, val, order),
Ordering::Release | Ordering::SeqCst => {
#[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))]
Ordering::Release => atomic_store_stp(dst, val, order),
#[cfg(not(any(
target_feature = "rcpc3",
portable_atomic_target_feature = "rcpc3",
)))]
Ordering::Release => {
_atomic_swap_swpp(dst, val, order);
}
Ordering::SeqCst => {
_atomic_swap_swpp(dst, val, order);
}
_ => unreachable!("{:?}", order),
Expand All @@ -374,7 +400,7 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
atomic_swap(dst, val, order);
}
}
// If CPU supports FEAT_LSE2, STP is single-copy atomic writes,
// If CPU supports FEAT_LSE2, STP/STILP is single-copy atomic writes,
// otherwise it is two single-copy atomic writes.
// Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile
#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))]
Expand Down Expand Up @@ -404,6 +430,19 @@ unsafe fn atomic_store_stp(dst: *mut u128, val: u128, order: Ordering) {
}
match order {
Ordering::Relaxed => atomic_store!("", ""),
#[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))]
Ordering::Release => {
// SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
// Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/STILP--Store-Release-ordered-Pair-of-registers-
asm!(
"stilp {val_lo}, {val_hi}, [{dst}]",
dst = in(reg) ptr_reg!(dst),
val_lo = in(reg) val.pair.lo,
val_hi = in(reg) val.pair.hi,
options(nostack, preserves_flags),
);
}
#[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
Ordering::Release => atomic_store!("", "dmb ish"),
Ordering::SeqCst => atomic_store!("dmb ish", "dmb ish"),
_ => unreachable!("{:?}", order),
Expand Down
8 changes: 7 additions & 1 deletion tools/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ build() {
return 0
else
if [[ -n "${CI:-}" ]]; then
if [[ ${count} -lt 10 ]]; then
if [[ ${count} -lt 6 ]]; then
: $((count++))
else
count=0
Expand Down Expand Up @@ -590,10 +590,16 @@ build() {
esac
# Support for FEAT_LRCPC3 and FEAT_LSE128 requires LLVM 16+ (Rust 1.70+).
if [[ "${rustc_minor_version}" -ge 70 ]]; then
CARGO_TARGET_DIR="${target_dir}/rcpc3" \
RUSTFLAGS="${target_rustflags} -C target-feature=+lse,+lse2,+rcpc3" \
x_cargo "${args[@]}" "$@"
# FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2.
CARGO_TARGET_DIR="${target_dir}/lse128" \
RUSTFLAGS="${target_rustflags} -C target-feature=+lse2,+lse128" \
x_cargo "${args[@]}" "$@"
CARGO_TARGET_DIR="${target_dir}/lse128-rcpc3" \
RUSTFLAGS="${target_rustflags} -C target-feature=+lse2,+lse128,+rcpc3" \
x_cargo "${args[@]}" "$@"
fi
;;
powerpc64-*)
Expand Down

0 comments on commit 21a0e82

Please sign in to comment.