Skip to content

Commit

Permalink
x86_64: Add portable_atomic_vmovdqa_atomic cfg
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed Jun 4, 2023
1 parent 8fcbe7c commit a9e5014
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 55 deletions.
1 change: 1 addition & 0 deletions .github/.cspell/project-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ rcpc
reentrancy
relibc
rsil
sandybridge
sbcs
sched
selgr
Expand Down
11 changes: 11 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,12 @@ jobs:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b --cfg portable_atomic_no_outline_atomics
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b --cfg portable_atomic_no_outline_atomics
if: (matrix.target == '' || startsWith(matrix.target, 'x86_64'))
# Sandy Bridge (the first Intel chip that introduced AVX) with portable_atomic_vmovdqa_atomic cfg
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic
if: (matrix.target == '' || startsWith(matrix.target, 'x86_64'))
# +lse
# As of QEMU 8.0, QEMU has not yet implemented FEAT_LSE2: https://linaro.atlassian.net/browse/QEMU-300
# FEAT_LSE2 is tested on Cirrus CI's aarch64 macOS VM.
Expand Down Expand Up @@ -487,6 +493,11 @@ jobs:
# vmovdqa load/store path has been tested above, disable outline-atomics and test cmpxchg16b load/store path.
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b --cfg portable_atomic_no_outline_atomics
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b --cfg portable_atomic_no_outline_atomics
# Sandy Bridge (the first Intel chip that introduced AVX) with portable_atomic_vmovdqa_atomic cfg
- run: tools/test.sh -vv 2>&1 | ts -i '%.s '
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic

codegen:
needs: tidy
Expand Down
7 changes: 6 additions & 1 deletion src/imp/atomic128/detect/x86_64.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
// Adapted from https://github.com/rust-lang/stdarch.

#![cfg_attr(
any(not(target_feature = "sse"), miri, portable_atomic_sanitize_thread),
any(
not(target_feature = "sse"),
portable_atomic_vmovdqa_atomic,
miri,
portable_atomic_sanitize_thread,
),
allow(dead_code)
)]

Expand Down
168 changes: 114 additions & 54 deletions src/imp/atomic128/x86_64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
//
// Generated asm:
// - x86_64 (+cmpxchg16b) https://godbolt.org/z/WPvfn16sY
// - x86_64 (+cmpxchg16b) https://godbolt.org/z/f9rT3eEs8
// - x86_64 (+cmpxchg16b,+avx,vmovdqa_atomic) https://godbolt.org/z/feWx41Moa

include!("macros.rs");

Expand Down Expand Up @@ -37,12 +38,18 @@ macro_rules! debug_assert_cmpxchg16b {
}
};
}
#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
#[cfg(any(
not(any(portable_atomic_no_outline_atomics, target_env = "sgx")),
all(portable_atomic_vmovdqa_atomic, target_feature = "avx"),
))]
#[cfg(target_feature = "sse")]
macro_rules! debug_assert_vmovdqa_atomic {
() => {{
debug_assert_cmpxchg16b!();
debug_assert!(detect::detect().has_vmovdqa_atomic());
#[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))]
{
debug_assert!(detect::detect().has_vmovdqa_atomic());
}
}};
}

Expand Down Expand Up @@ -140,7 +147,10 @@ unsafe fn cmpxchg16b(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
//
// Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled.
// https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html
#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
#[cfg(any(
not(any(portable_atomic_no_outline_atomics, target_env = "sgx")),
all(portable_atomic_vmovdqa_atomic, target_feature = "avx"),
))]
#[cfg(target_feature = "sse")]
#[target_feature(enable = "avx")]
#[inline]
Expand All @@ -162,7 +172,10 @@ unsafe fn atomic_load_vmovdqa(src: *mut u128) -> u128 {
core::mem::transmute(out)
}
}
#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
#[cfg(any(
not(any(portable_atomic_no_outline_atomics, target_env = "sgx")),
all(portable_atomic_vmovdqa_atomic, target_feature = "avx"),
))]
#[cfg(target_feature = "sse")]
#[target_feature(enable = "avx")]
#[inline]
Expand Down Expand Up @@ -199,8 +212,15 @@ unsafe fn atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) {

#[cfg(not(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
any(
all(
any(portable_atomic_no_outline_atomics, target_env = "sgx"),
not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")),
),
not(target_feature = "sse"),
),
)))]
#[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))]
macro_rules! load_store_detect {
(
vmovdqa = $vmovdqa:ident
Expand Down Expand Up @@ -250,36 +270,56 @@ unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 {
// SGX doesn't support CPUID.
#[cfg(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
any(
all(
any(portable_atomic_no_outline_atomics, target_env = "sgx"),
not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")),
),
not(target_feature = "sse"),
),
))]
// SAFETY: the caller must uphold the safety contract.
// cfg guarantees that CMPXCHG16B is available at compile-time.
unsafe {
// cmpxchg16b is always SeqCst.
atomic_load_cmpxchg16b(src)
_atomic_load_cmpxchg16b(src)
}
#[cfg(not(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
any(
all(
any(portable_atomic_no_outline_atomics, target_env = "sgx"),
not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")),
),
not(target_feature = "sse"),
),
)))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
ifunc!(unsafe fn(src: *mut u128) -> u128 {
load_store_detect! {
vmovdqa = atomic_load_vmovdqa
cmpxchg16b = atomic_load_cmpxchg16b
// Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst.
fallback = atomic_load_seqcst
}
})
{
#[cfg(all(portable_atomic_vmovdqa_atomic, target_feature = "avx"))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
atomic_load_vmovdqa(src)
}
#[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
ifunc!(unsafe fn(src: *mut u128) -> u128 {
load_store_detect! {
vmovdqa = atomic_load_vmovdqa
cmpxchg16b = _atomic_load_cmpxchg16b
// Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst.
fallback = atomic_load_seqcst
}
})
}
}
}
#[cfg_attr(
not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
target_feature(enable = "cmpxchg16b")
)]
#[inline]
unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 {
unsafe fn _atomic_load_cmpxchg16b(src: *mut u128) -> u128 {
debug_assert!(src as usize % 16 == 0);
debug_assert_cmpxchg16b!();

Expand Down Expand Up @@ -328,60 +368,80 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
// SGX doesn't support CPUID.
#[cfg(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
any(
all(
any(portable_atomic_no_outline_atomics, target_env = "sgx"),
not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")),
),
not(target_feature = "sse"),
),
))]
// SAFETY: the caller must uphold the safety contract.
// cfg guarantees that CMPXCHG16B is available at compile-time.
unsafe {
// cmpxchg16b is always SeqCst.
let _ = order;
atomic_store_cmpxchg16b(dst, val);
_atomic_store_cmpxchg16b(dst, val);
}
#[cfg(not(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
any(
all(
any(portable_atomic_no_outline_atomics, target_env = "sgx"),
not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")),
),
not(target_feature = "sse"),
),
)))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
#[cfg(target_feature = "sse")]
fn_alias! {
#[target_feature(enable = "avx")]
unsafe fn(dst: *mut u128, val: u128);
// atomic store by vmovdqa has at least release semantics.
atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release);
atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst);
{
#[cfg(all(portable_atomic_vmovdqa_atomic, target_feature = "avx"))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
atomic_store_vmovdqa(dst, val, order);
}
match order {
// Relaxed and Release stores are equivalent in all implementations
// that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback).
// core::arch's cmpxchg16b will never called here.
Ordering::Relaxed | Ordering::Release => {
ifunc!(unsafe fn(dst: *mut u128, val: u128) {
load_store_detect! {
vmovdqa = atomic_store_vmovdqa_non_seqcst
cmpxchg16b = atomic_store_cmpxchg16b
fallback = atomic_store_non_seqcst
}
});
#[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
#[cfg(target_feature = "sse")]
fn_alias! {
#[target_feature(enable = "avx")]
unsafe fn(dst: *mut u128, val: u128);
// atomic store by vmovdqa has at least release semantics.
atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release);
atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst);
}
Ordering::SeqCst => {
ifunc!(unsafe fn(dst: *mut u128, val: u128) {
load_store_detect! {
vmovdqa = atomic_store_vmovdqa_seqcst
cmpxchg16b = atomic_store_cmpxchg16b
fallback = atomic_store_seqcst
}
});
match order {
// Relaxed and Release stores are equivalent in all implementations
// that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback).
// core::arch's cmpxchg16b will never called here.
Ordering::Relaxed | Ordering::Release => {
ifunc!(unsafe fn(dst: *mut u128, val: u128) {
load_store_detect! {
vmovdqa = atomic_store_vmovdqa_non_seqcst
cmpxchg16b = _atomic_store_cmpxchg16b
fallback = atomic_store_non_seqcst
}
});
}
Ordering::SeqCst => {
ifunc!(unsafe fn(dst: *mut u128, val: u128) {
load_store_detect! {
vmovdqa = atomic_store_vmovdqa_seqcst
cmpxchg16b = _atomic_store_cmpxchg16b
fallback = atomic_store_seqcst
}
});
}
_ => unreachable!("{:?}", order),
}
_ => unreachable!("{:?}", order),
}
}
}
#[cfg_attr(
not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
target_feature(enable = "cmpxchg16b")
)]
unsafe fn atomic_store_cmpxchg16b(dst: *mut u128, val: u128) {
unsafe fn _atomic_store_cmpxchg16b(dst: *mut u128, val: u128) {
// SAFETY: the caller must uphold the safety contract.
unsafe {
// cmpxchg16b is always SeqCst.
Expand Down
4 changes: 4 additions & 0 deletions tools/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ known_cfgs=(
portable_atomic_disable_fiq
portable_atomic_no_outline_atomics
portable_atomic_outline_atomics
portable_atomic_vmovdqa_atomic

# Not public APIs
portable_atomic_test_outline_atomics_detect_false
Expand Down Expand Up @@ -560,6 +561,9 @@ build() {
x_cargo "${args[@]}" "$@"
;;
esac
# Sandy Bridge (the first Intel chip that introduced AVX) with portable_atomic_vmovdqa_atomic cfg
RUSTFLAGS="${target_rustflags} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic" \
x_cargo "${args[@]}" --target-dir target/vmovdqa_atomic "$@"
;;
aarch64* | arm64*)
# macOS is skipped because it is +lse,+lse2 by default
Expand Down

0 comments on commit a9e5014

Please sign in to comment.