Skip to content

Commit

Permalink
aarch64: Support FEAT_LSE128
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed Jan 30, 2023
1 parent 9f8ec91 commit 1a1baba
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 6 deletions.
3 changes: 3 additions & 0 deletions .github/.cspell/project-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ isel
ishld
isync
ldaxp
ldclrp
ldsetp
ldxp
libcalls
libtcl
Expand Down Expand Up @@ -104,6 +106,7 @@ subarch
subc
subcmd
subfe
swpp
syscall
systemsim
tagme
Expand Down
1 change: 1 addition & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ fn main() {
// As of rustc 1.67, target_feature "lse2" is not available on rustc side:
// https://github.com/rust-lang/rust/blob/1.67.0/compiler/rustc_codegen_ssa/src/target_features.rs#L47
target_feature_if("lse2", is_macos, &version, None, false);
target_feature_if("lse128", false, &version, None, false);
}
"arm" => {
// #[cfg(target_feature = "v7")] and others don't work on stable.
Expand Down
12 changes: 6 additions & 6 deletions src/imp/atomic128/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

The table of targets that support 128-bit atomics and the instructions used:

| target_arch | load | store | CAS | note |
| ----------- | ----- | ----- | ---- | ---- |
| x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b target feature required. vmovdqa requires Intel or AMD CPU with AVX. <br> Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only. <br> Requires rustc 1.59+ when cmpxchg16b target feature is enabled at compile-time, otherwise requires nightly |
| aarch64 | ldxp/stxp or ldp | ldxp/stxp or stp | ldxp/stxp or casp | casp requires lse target feature, ldp/stp requires lse2 target feature. <br> Both compile-time and run-time detection are supported for lse. lse2 is currently compile-time detection only. <br> Requires rustc 1.59+ |
| powerpc64 | lq | stq | lqarx/stqcx. | Little endian or target CPU pwr8+. <br> Requires nightly |
| s390x | lpq | stpq | cdsg | Requires nightly |
| target_arch | load | store | CAS | RMW | note |
| ----------- | ----- | ----- | ---- | ---- | ---- |
| x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b | cmpxchg16b target feature required. vmovdqa requires Intel or AMD CPU with AVX. <br> Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only. <br> Requires rustc 1.59+ when cmpxchg16b target feature is enabled at compile-time, otherwise requires nightly |
| aarch64 | ldxp/stxp or casp or ldp | ldxp/stxp or stp | ldxp/stxp or casp | ldxp/stxp or swpp/ldclrp/ldsetp | casp requires lse target feature, ldp/stp requires lse2 target feature, swpp/ldclrp/ldsetp requires lse128 target feature. <br> Both compile-time and run-time detection are supported for lse. lse2 and lse128 are currently compile-time detection only. <br> Requires rustc 1.59+ |
| powerpc64 | lq | stq | lqarx/stqcx. | lqarx/stqcx. | Little endian or target CPU pwr8+. <br> Requires nightly |
| s390x | lpq | stpq | cdsg | cdsg | Requires nightly |

Run-time detections are enabled by default and can be disabled with `--cfg portable_atomic_no_outline_atomics`.
135 changes: 135 additions & 0 deletions src/imp/atomic128/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,23 @@
// - LDXP/STXP loop (DW LL/SC)
// - CASP (DWCAS) added as FEAT_LSE (armv8.1-a)
// - LDP/STP (DW load/store) if FEAT_LSE2 (armv8.4-a) is available
// - LDCLRP/LDSETP/SWPP (DW RMW) added as FEAT_LSE128 (armv9.4-a)
//
// If outline-atomics is not enabled, we use CASP if FEAT_LSE is enabled at
// compile-time, otherwise, use LDXP/STXP loop.
// If outline-atomics is enabled, we use CASP for compare_exchange(_weak) if
// FEAT_LSE is available at run-time.
// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store.
// If FEAT_LSE128 is available at compile-time, we use LDCLRP/LDSETP/SWPP for fetch_and/fetch_or/swap.
//
// Note: As of rustc 1.68.0-nightly, -C target-feature=+lse2 does not implicitly
// enable target_feature "lse": https://godbolt.org/z/GYTcTeda6
// Also, as of rustc 1.67, target_feature "lse2" is not available on rustc side:
// https://github.com/rust-lang/rust/blob/1.67.0/compiler/rustc_codegen_ssa/src/target_features.rs#L47
//
// Note: LLVM supports FEAT_LSE128 on only LLVM 16+:
// https://github.com/llvm/llvm-project/commit/7fea6f2e0e606e5339c3359568f680eaf64aa306
//
// Note that we do not separate LL and SC into separate functions, but handle
// them within a single asm block. This is because it is theoretically possible
// for the compiler to insert operations that might clear the reservation between
Expand Down Expand Up @@ -45,6 +50,7 @@
// - aarch64 https://godbolt.org/z/z5cd5W8fh
// - aarch64 (+lse) https://godbolt.org/z/offxb8rrj
// - aarch64 (+lse,+lse2) https://godbolt.org/z/8nn4WE4cj
// - aarch64 (+lse,+lse2,+lse128) https://godbolt.org/z/8nn4WE4cj

include!("macros.rs");

Expand Down Expand Up @@ -455,6 +461,49 @@ use self::atomic_compare_exchange as atomic_compare_exchange_weak;

#[inline]
unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 {
#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
// SAFETY: the caller must uphold the safety contract for `atomic_swap`.
// cfg guarantee that the CPU supports FEAT_LSE128.
unsafe {
_atomic_swap_swpp(dst, val, order)
}
#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
// SAFETY: the caller must uphold the safety contract for `atomic_swap`.
unsafe {
_atomic_swap_ldxp_stxp(dst, val, order)
}
}
#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
#[inline]
unsafe fn _atomic_swap_swpp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);

// SAFETY: the caller must guarantee that `dst` is valid for both writes and
// reads, 16-byte aligned, that there are no concurrent non-atomic operations,
// and the CPU supports FEAT_LSE128.
//
// Refs:
// - https://developer.arm.com/documentation/ddi0602/2022-12/Base-Instructions/SWPP--SWPPA--SWPPAL--SWPPL--Swap-quadword-in-memory-?lang=en
unsafe {
let val = U128 { whole: val };
let (prev_lo, prev_hi);
macro_rules! swap {
($acquire:tt, $release:tt) => {
asm!(
concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst", ptr_modifier!(), "}]"),
dst = in(reg) dst,
val_lo = inout(reg) val.pair.lo => prev_lo,
val_hi = inout(reg) val.pair.hi => prev_hi,
options(nostack, preserves_flags),
)
};
}
atomic_rmw!(swap, order);
U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
}
}
#[inline]
unsafe fn _atomic_swap_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);

// SAFETY: the caller must uphold the safety contract for `atomic_swap`.
Expand Down Expand Up @@ -558,6 +607,49 @@ unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 {

#[inline]
unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 {
#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
// SAFETY: the caller must uphold the safety contract for `atomic_or`.
// cfg guarantee that the CPU supports FEAT_LSE128.
unsafe {
_atomic_and_ldclrp(dst, val, order)
}
#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
// SAFETY: the caller must uphold the safety contract for `atomic_or`.
unsafe {
_atomic_and_ldxp_stxp(dst, val, order)
}
}
#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
#[inline]
unsafe fn _atomic_and_ldclrp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);

// SAFETY: the caller must guarantee that `dst` is valid for both writes and
// reads, 16-byte aligned, that there are no concurrent non-atomic operations,
// and the CPU supports FEAT_LSE128.
//
// Refs:
// - https://developer.arm.com/documentation/ddi0602/2022-12/Base-Instructions/LDCLRP--LDCLRPA--LDCLRPAL--LDCLRPL--Atomic-bit-clear-on-quadword-in-memory-?lang=en
unsafe {
let val = U128 { whole: !val };
let (prev_lo, prev_hi);
macro_rules! and {
($acquire:tt, $release:tt) => {
asm!(
concat!("ldclrp", $acquire, $release, " {val_lo}, {val_hi}, [{dst", ptr_modifier!(), "}]"),
dst = in(reg) dst,
val_lo = inout(reg) val.pair.lo => prev_lo,
val_hi = inout(reg) val.pair.hi => prev_hi,
options(nostack, preserves_flags),
)
};
}
atomic_rmw!(and, order);
U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
}
}
#[inline]
unsafe fn _atomic_and_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);

// SAFETY: the caller must uphold the safety contract for `atomic_and`.
Expand Down Expand Up @@ -630,6 +722,49 @@ unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 {

#[inline]
unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 {
#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
// SAFETY: the caller must uphold the safety contract for `atomic_or`.
// cfg guarantee that the CPU supports FEAT_LSE128.
unsafe {
_atomic_or_ldsetp(dst, val, order)
}
#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
// SAFETY: the caller must uphold the safety contract for `atomic_or`.
unsafe {
_atomic_or_ldxp_stxp(dst, val, order)
}
}
#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
#[inline]
unsafe fn _atomic_or_ldsetp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);

// SAFETY: the caller must guarantee that `dst` is valid for both writes and
// reads, 16-byte aligned, that there are no concurrent non-atomic operations,
// and the CPU supports FEAT_LSE128.
//
// Refs:
// - https://developer.arm.com/documentation/ddi0602/2022-12/Base-Instructions/LDSETP--LDSETPA--LDSETPAL--LDSETPL--Atomic-bit-set-on-quadword-in-memory-?lang=en
unsafe {
let val = U128 { whole: val };
let (prev_lo, prev_hi);
macro_rules! or {
($acquire:tt, $release:tt) => {
asm!(
concat!("ldsetp", $acquire, $release, " {val_lo}, {val_hi}, [{dst", ptr_modifier!(), "}]"),
dst = in(reg) dst,
val_lo = inout(reg) val.pair.lo => prev_lo,
val_hi = inout(reg) val.pair.hi => prev_hi,
options(nostack, preserves_flags),
)
};
}
atomic_rmw!(or, order);
U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
}
}
#[inline]
unsafe fn _atomic_or_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);

// SAFETY: the caller must uphold the safety contract for `atomic_or`.
Expand Down
3 changes: 3 additions & 0 deletions tools/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,9 @@ build() {
CARGO_TARGET_DIR="${target_dir}/lse2" \
RUSTFLAGS="${target_rustflags} -C target-feature=+lse,+lse2" \
x_cargo "${args[@]}" "$@"
CARGO_TARGET_DIR="${target_dir}/lse128" \
RUSTFLAGS="${target_rustflags} -C target-feature=+lse,+lse2,+lse128" \
x_cargo "${args[@]}" "$@"
;;
esac
;;
Expand Down

0 comments on commit 1a1baba

Please sign in to comment.