diff --git a/.github/.cspell/project-dictionary.txt b/.github/.cspell/project-dictionary.txt index cf44990f..3e6757b3 100644 --- a/.github/.cspell/project-dictionary.txt +++ b/.github/.cspell/project-dictionary.txt @@ -102,6 +102,7 @@ reentrancy rsbegin rsend rsil +sandybridge sbcs sched selgr diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 15dd77f9..08d9b46c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -275,6 +275,12 @@ jobs: RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b --cfg portable_atomic_no_outline_atomics RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b --cfg portable_atomic_no_outline_atomics if: matrix.target == '' && !contains(matrix.rust, 'i686') || startsWith(matrix.target, 'x86_64') + # Sandy Bridge (the first Intel chip that introduced AVX) with portable_atomic_vmovdqa_atomic cfg + - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME + env: + RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic + RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic + if: matrix.target == '' && !contains(matrix.rust, 'i686') || startsWith(matrix.target, 'x86_64') # aarch64 +lse # As of QEMU 8.0, QEMU has not yet implemented FEAT_LSE2: https://linaro.atlassian.net/browse/QEMU-300 # FEAT_LSE2 is tested on Cirrus CI's aarch64 macOS VM. @@ -510,6 +516,11 @@ jobs: # vmovdqa load/store path has been tested above, disable outline-atomics and test cmpxchg16b load/store path. RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b --cfg portable_atomic_no_outline_atomics RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b --cfg portable_atomic_no_outline_atomics + # Sandy Bridge (the first Intel chip that introduced AVX) with portable_atomic_vmovdqa_atomic cfg + - run: tools/test.sh -vv 2>&1 | ts -i '%.s ' + env: + RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic + RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic codegen: needs: tidy diff --git a/src/imp/atomic128/detect/x86_64.rs b/src/imp/atomic128/detect/x86_64.rs index d162d659..0ff847c5 100644 --- a/src/imp/atomic128/detect/x86_64.rs +++ b/src/imp/atomic128/detect/x86_64.rs @@ -1,6 +1,13 @@ // Adapted from https://github.com/rust-lang/stdarch. -#![cfg_attr(any(not(target_feature = "sse"), portable_atomic_sanitize_thread), allow(dead_code))] +#![cfg_attr( + any( + not(target_feature = "sse"), + portable_atomic_vmovdqa_atomic, + portable_atomic_sanitize_thread, + ), + allow(dead_code) +)] // Miri doesn't support inline assembly used in __cpuid: https://github.com/rust-lang/miri/issues/932 // SGX doesn't support CPUID: https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs#L102-L105 diff --git a/src/imp/atomic128/x86_64.rs b/src/imp/atomic128/x86_64.rs index e75540cd..deb3bfe8 100644 --- a/src/imp/atomic128/x86_64.rs +++ b/src/imp/atomic128/x86_64.rs @@ -8,7 +8,8 @@ // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit // // Generated asm: -// - x86_64 (+cmpxchg16b) https://godbolt.org/z/WPvfn16sY +// - x86_64 (+cmpxchg16b) https://godbolt.org/z/f9rT3eEs8 +// - x86_64 (+cmpxchg16b,+avx,vmovdqa_atomic) https://godbolt.org/z/feWx41Moa include!("macros.rs"); @@ -37,12 +38,18 @@ macro_rules! debug_assert_cmpxchg16b { } }; } -#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(any( + not(any(portable_atomic_no_outline_atomics, target_env = "sgx")), + all(portable_atomic_vmovdqa_atomic, target_feature = "avx"), +))] #[cfg(target_feature = "sse")] macro_rules! debug_assert_vmovdqa_atomic { () => {{ debug_assert_cmpxchg16b!(); - debug_assert!(detect::detect().has_vmovdqa_atomic()); + #[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))] + { + debug_assert!(detect::detect().has_vmovdqa_atomic()); + } }}; } @@ -140,7 +147,10 @@ unsafe fn cmpxchg16b(dst: *mut u128, old: u128, new: u128) -> (u128, bool) { // // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html -#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(any( + not(any(portable_atomic_no_outline_atomics, target_env = "sgx")), + all(portable_atomic_vmovdqa_atomic, target_feature = "avx"), +))] #[cfg(target_feature = "sse")] #[target_feature(enable = "avx")] #[inline] @@ -162,7 +172,10 @@ unsafe fn atomic_load_vmovdqa(src: *mut u128) -> u128 { core::mem::transmute(out) } } -#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(any( + not(any(portable_atomic_no_outline_atomics, target_env = "sgx")), + all(portable_atomic_vmovdqa_atomic, target_feature = "avx"), +))] #[cfg(target_feature = "sse")] #[target_feature(enable = "avx")] #[inline] @@ -199,8 +212,15 @@ unsafe fn atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) { #[cfg(not(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), - any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + any( + all( + any(portable_atomic_no_outline_atomics, target_env = "sgx"), + not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")), + ), + not(target_feature = "sse"), + ), )))] +#[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))] macro_rules! load_store_detect { ( vmovdqa = $vmovdqa:ident @@ -250,28 +270,48 @@ unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 { // SGX doesn't support CPUID. #[cfg(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), - any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + any( + all( + any(portable_atomic_no_outline_atomics, target_env = "sgx"), + not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")), + ), + not(target_feature = "sse"), + ), ))] // SAFETY: the caller must uphold the safety contract. // cfg guarantees that CMPXCHG16B is available at compile-time. unsafe { // cmpxchg16b is always SeqCst. - atomic_load_cmpxchg16b(src) + _atomic_load_cmpxchg16b(src) } #[cfg(not(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), - any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + any( + all( + any(portable_atomic_no_outline_atomics, target_env = "sgx"), + not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")), + ), + not(target_feature = "sse"), + ), )))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - ifunc!(unsafe fn(src: *mut u128) -> u128 { - load_store_detect! { - vmovdqa = atomic_load_vmovdqa - cmpxchg16b = atomic_load_cmpxchg16b - // Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst. - fallback = atomic_load_seqcst - } - }) + { + #[cfg(all(portable_atomic_vmovdqa_atomic, target_feature = "avx"))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_load_vmovdqa(src) + } + #[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + ifunc!(unsafe fn(src: *mut u128) -> u128 { + load_store_detect! { + vmovdqa = atomic_load_vmovdqa + cmpxchg16b = _atomic_load_cmpxchg16b + // Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst. + fallback = atomic_load_seqcst + } + }) + } } } #[cfg_attr( @@ -279,7 +319,7 @@ unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 { target_feature(enable = "cmpxchg16b") )] #[inline] -unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 { +unsafe fn _atomic_load_cmpxchg16b(src: *mut u128) -> u128 { debug_assert!(src as usize % 16 == 0); debug_assert_cmpxchg16b!(); @@ -328,52 +368,72 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { // SGX doesn't support CPUID. #[cfg(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), - any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + any( + all( + any(portable_atomic_no_outline_atomics, target_env = "sgx"), + not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")), + ), + not(target_feature = "sse"), + ), ))] // SAFETY: the caller must uphold the safety contract. // cfg guarantees that CMPXCHG16B is available at compile-time. unsafe { // cmpxchg16b is always SeqCst. let _ = order; - atomic_store_cmpxchg16b(dst, val); + _atomic_store_cmpxchg16b(dst, val); } #[cfg(not(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), - any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + any( + all( + any(portable_atomic_no_outline_atomics, target_env = "sgx"), + not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")), + ), + not(target_feature = "sse"), + ), )))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - #[cfg(target_feature = "sse")] - fn_alias! { - #[target_feature(enable = "avx")] - unsafe fn(dst: *mut u128, val: u128); - // atomic store by vmovdqa has at least release semantics. - atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release); - atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst); + { + #[cfg(all(portable_atomic_vmovdqa_atomic, target_feature = "avx"))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_store_vmovdqa(dst, val, order); } - match order { - // Relaxed and Release stores are equivalent in all implementations - // that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback). - // core::arch's cmpxchg16b will never called here. - Ordering::Relaxed | Ordering::Release => { - ifunc!(unsafe fn(dst: *mut u128, val: u128) { - load_store_detect! { - vmovdqa = atomic_store_vmovdqa_non_seqcst - cmpxchg16b = atomic_store_cmpxchg16b - fallback = atomic_store_non_seqcst - } - }); + #[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + #[cfg(target_feature = "sse")] + fn_alias! { + #[target_feature(enable = "avx")] + unsafe fn(dst: *mut u128, val: u128); + // atomic store by vmovdqa has at least release semantics. + atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release); + atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst); } - Ordering::SeqCst => { - ifunc!(unsafe fn(dst: *mut u128, val: u128) { - load_store_detect! { - vmovdqa = atomic_store_vmovdqa_seqcst - cmpxchg16b = atomic_store_cmpxchg16b - fallback = atomic_store_seqcst - } - }); + match order { + // Relaxed and Release stores are equivalent in all implementations + // that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback). + // core::arch's cmpxchg16b will never called here. + Ordering::Relaxed | Ordering::Release => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + load_store_detect! { + vmovdqa = atomic_store_vmovdqa_non_seqcst + cmpxchg16b = _atomic_store_cmpxchg16b + fallback = atomic_store_non_seqcst + } + }); + } + Ordering::SeqCst => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + load_store_detect! { + vmovdqa = atomic_store_vmovdqa_seqcst + cmpxchg16b = _atomic_store_cmpxchg16b + fallback = atomic_store_seqcst + } + }); + } + _ => unreachable!("{:?}", order), } - _ => unreachable!("{:?}", order), } } } @@ -381,7 +441,7 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), target_feature(enable = "cmpxchg16b") )] -unsafe fn atomic_store_cmpxchg16b(dst: *mut u128, val: u128) { +unsafe fn _atomic_store_cmpxchg16b(dst: *mut u128, val: u128) { // SAFETY: the caller must uphold the safety contract. unsafe { // cmpxchg16b is always SeqCst. diff --git a/tools/build.sh b/tools/build.sh index 81538eae..0d4b223a 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -111,6 +111,7 @@ known_cfgs=( portable_atomic_disable_fiq portable_atomic_no_outline_atomics portable_atomic_outline_atomics + portable_atomic_vmovdqa_atomic # Not public APIs portable_atomic_test_outline_atomics_detect_false @@ -567,6 +568,9 @@ build() { x_cargo "${args[@]}" "$@" ;; esac + # Sandy Bridge (the first Intel chip that introduced AVX) with portable_atomic_vmovdqa_atomic cfg + RUSTFLAGS="${target_rustflags} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic" \ + x_cargo "${args[@]}" --target-dir target/vmovdqa_atomic "$@" ;; aarch64* | arm64*) # macOS is skipped because it is +lse,+lse2 by default