diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a22b38d..31df748c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com ## [Unreleased] +- Optimize x86_64 128-bit atomic load/store on AMD CPU with AVX. ([#49](https://github.com/taiki-e/portable-atomic/pull/49)) + ## [0.3.16] - 2022-12-09 - Add `Atomic{I,U}*::{add,sub,and,or,xor}` and `AtomicBool::{and,or,xor}` methods. ([#47](https://github.com/taiki-e/portable-atomic/pull/47)) diff --git a/src/imp/atomic128/cpuid.rs b/src/imp/atomic128/cpuid.rs index a3e12af1..6052f3cb 100644 --- a/src/imp/atomic128/cpuid.rs +++ b/src/imp/atomic128/cpuid.rs @@ -23,7 +23,7 @@ pub(crate) struct CpuInfo(u32); impl CpuInfo { const INIT: u32 = 0; const HAS_CMPXCHG16B: u32 = 1; - const IS_INTEL_AND_HAS_AVX: u32 = 2; + const HAS_VMOVDQA_ATOMIC: u32 = 2; #[inline] fn set(&mut self, bit: u32) { @@ -51,8 +51,8 @@ impl CpuInfo { } } #[inline] - pub(crate) fn is_intel_and_has_avx(self) -> bool { - self.test(CpuInfo::IS_INTEL_AND_HAS_AVX) + pub(crate) fn has_vmovdqa_atomic(self) -> bool { + self.test(CpuInfo::HAS_VMOVDQA_ATOMIC) } } @@ -95,6 +95,10 @@ unsafe fn __cpuid(leaf: u32) -> CpuidResult { CpuidResult { eax, ebx, ecx, edx } } +// https://en.wikipedia.org/wiki/CPUID +const VENDOR_ID_INTEL: [u8; 12] = *b"GenuineIntel"; +const VENDOR_ID_AMD: [u8; 12] = *b"AuthenticAMD"; + #[inline] unsafe fn _vendor_id() -> [u8; 12] { // SAFETY: the caller must guarantee that CPU supports `cpuid`. @@ -131,7 +135,9 @@ fn _cpuid(info: &mut CpuInfo) { info.set(CpuInfo::HAS_CMPXCHG16B); } - if vendor_id == *b"GenuineIntel" { + // VMOVDQA is atomic on Intel and AMD CPUs with AVX. + // See https://gcc.gnu.org/bugzilla//show_bug.cgi?id=104688 for details. + if vendor_id == VENDOR_ID_INTEL || vendor_id == VENDOR_ID_AMD { // https://github.com/rust-lang/stdarch/blob/28335054b1f417175ab5005cf1d9cf7937737930/crates/std_detect/src/detect/os/x86.rs#L131-L224 let cpu_xsave = test(proc_info_ecx, 26); if cpu_xsave { @@ -141,7 +147,7 @@ fn _cpuid(info: &mut CpuInfo) { let xcr0 = unsafe { _xgetbv(0) }; let os_avx_support = xcr0 & 6 == 6; if os_avx_support && test(proc_info_ecx, 28) { - info.set(CpuInfo::IS_INTEL_AND_HAS_AVX); + info.set(CpuInfo::HAS_VMOVDQA_ATOMIC); } } } @@ -186,19 +192,19 @@ mod tests { let mut x = CpuInfo(0); assert!(!x.test(CpuInfo::INIT)); assert!(!x.test(CpuInfo::HAS_CMPXCHG16B)); - assert!(!x.test(CpuInfo::IS_INTEL_AND_HAS_AVX)); + assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC)); x.set(CpuInfo::INIT); assert!(x.test(CpuInfo::INIT)); assert!(!x.test(CpuInfo::HAS_CMPXCHG16B)); - assert!(!x.test(CpuInfo::IS_INTEL_AND_HAS_AVX)); + assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC)); x.set(CpuInfo::HAS_CMPXCHG16B); assert!(x.test(CpuInfo::INIT)); assert!(x.test(CpuInfo::HAS_CMPXCHG16B)); - assert!(!x.test(CpuInfo::IS_INTEL_AND_HAS_AVX)); - x.set(CpuInfo::IS_INTEL_AND_HAS_AVX); + assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC)); + x.set(CpuInfo::HAS_VMOVDQA_ATOMIC); assert!(x.test(CpuInfo::INIT)); assert!(x.test(CpuInfo::HAS_CMPXCHG16B)); - assert!(x.test(CpuInfo::IS_INTEL_AND_HAS_AVX)); + assert!(x.test(CpuInfo::HAS_VMOVDQA_ATOMIC)); } #[test] @@ -207,8 +213,11 @@ mod tests { #[cfg_attr(any(target_env = "sgx", miri), ignore)] fn test_cpuid() { assert_eq!(std::is_x86_feature_detected!("cmpxchg16b"), has_cmpxchg16b()); - if unsafe { _vendor_id() } == *b"GenuineIntel" { - assert_eq!(std::is_x86_feature_detected!("avx"), cpuid().is_intel_and_has_avx()); + let vendor_id = unsafe { _vendor_id() }; + if vendor_id == VENDOR_ID_INTEL || vendor_id == VENDOR_ID_AMD { + assert_eq!(std::is_x86_feature_detected!("avx"), cpuid().has_vmovdqa_atomic()); + } else { + assert!(!cpuid().has_vmovdqa_atomic()); } } } diff --git a/src/imp/atomic128/x86_64.rs b/src/imp/atomic128/x86_64.rs index 436611bb..6ead51e2 100644 --- a/src/imp/atomic128/x86_64.rs +++ b/src/imp/atomic128/x86_64.rs @@ -216,7 +216,7 @@ unsafe fn byte_wise_atomic_load(src: *mut u128) -> u128 { } } -// VMOVDQA is atomic on on Intel CPU with AVX. +// VMOVDQA is atomic on Intel and AMD CPUs with AVX. // See https://gcc.gnu.org/bugzilla//show_bug.cgi?id=104688 for details. // // Refs: https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 @@ -308,7 +308,7 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { { // Check CMPXCHG16B anyway to prevent mixing atomic and non-atomic access. let cpuid = detect::cpuid(); - if cpuid.has_cmpxchg16b() && cpuid.is_intel_and_has_avx() { + if cpuid.has_cmpxchg16b() && cpuid.has_vmovdqa_atomic() { _atomic_load_vmovdqa } else { _atomic_load_cmpxchg16b @@ -353,7 +353,7 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { { // Check CMPXCHG16B anyway to prevent mixing atomic and non-atomic access. let cpuid = detect::cpuid(); - if cpuid.has_cmpxchg16b() && cpuid.is_intel_and_has_avx() { + if cpuid.has_cmpxchg16b() && cpuid.has_vmovdqa_atomic() { _atomic_store_vmovdqa } else { _atomic_store_cmpxchg16b