Skip to content

Commit

Permalink
Merge #49
Browse files Browse the repository at this point in the history
49: Use SSE for 128-bit atomic load/store on AMD CPU with AVX r=taiki-e a=taiki-e

As mentioned in #10 (comment), AMD is also going to guarantee this.

Refs: https://gcc.gnu.org/bugzilla//show_bug.cgi?id=104688#c10

Co-authored-by: Taiki Endo <te316e89@gmail.com>
  • Loading branch information
bors[bot] and taiki-e committed Dec 14, 2022
2 parents 01a4fd4 + d244152 commit b733d7c
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 15 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com

## [Unreleased]

- Optimize x86_64 128-bit atomic load/store on AMD CPU with AVX. ([#49](https://github.com/taiki-e/portable-atomic/pull/49))

## [0.3.16] - 2022-12-09

- Add `Atomic{I,U}*::{add,sub,and,or,xor}` and `AtomicBool::{and,or,xor}` methods. ([#47](https://github.com/taiki-e/portable-atomic/pull/47))
Expand Down
33 changes: 21 additions & 12 deletions src/imp/atomic128/cpuid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pub(crate) struct CpuInfo(u32);
impl CpuInfo {
const INIT: u32 = 0;
const HAS_CMPXCHG16B: u32 = 1;
const IS_INTEL_AND_HAS_AVX: u32 = 2;
const HAS_VMOVDQA_ATOMIC: u32 = 2;

#[inline]
fn set(&mut self, bit: u32) {
Expand Down Expand Up @@ -51,8 +51,8 @@ impl CpuInfo {
}
}
#[inline]
pub(crate) fn is_intel_and_has_avx(self) -> bool {
self.test(CpuInfo::IS_INTEL_AND_HAS_AVX)
pub(crate) fn has_vmovdqa_atomic(self) -> bool {
self.test(CpuInfo::HAS_VMOVDQA_ATOMIC)
}
}

Expand Down Expand Up @@ -95,6 +95,10 @@ unsafe fn __cpuid(leaf: u32) -> CpuidResult {
CpuidResult { eax, ebx, ecx, edx }
}

// https://en.wikipedia.org/wiki/CPUID
const VENDOR_ID_INTEL: [u8; 12] = *b"GenuineIntel";
const VENDOR_ID_AMD: [u8; 12] = *b"AuthenticAMD";

#[inline]
unsafe fn _vendor_id() -> [u8; 12] {
// SAFETY: the caller must guarantee that CPU supports `cpuid`.
Expand Down Expand Up @@ -131,7 +135,9 @@ fn _cpuid(info: &mut CpuInfo) {
info.set(CpuInfo::HAS_CMPXCHG16B);
}

if vendor_id == *b"GenuineIntel" {
// VMOVDQA is atomic on Intel and AMD CPUs with AVX.
// See https://gcc.gnu.org/bugzilla//show_bug.cgi?id=104688 for details.
if vendor_id == VENDOR_ID_INTEL || vendor_id == VENDOR_ID_AMD {
// https://github.com/rust-lang/stdarch/blob/28335054b1f417175ab5005cf1d9cf7937737930/crates/std_detect/src/detect/os/x86.rs#L131-L224
let cpu_xsave = test(proc_info_ecx, 26);
if cpu_xsave {
Expand All @@ -141,7 +147,7 @@ fn _cpuid(info: &mut CpuInfo) {
let xcr0 = unsafe { _xgetbv(0) };
let os_avx_support = xcr0 & 6 == 6;
if os_avx_support && test(proc_info_ecx, 28) {
info.set(CpuInfo::IS_INTEL_AND_HAS_AVX);
info.set(CpuInfo::HAS_VMOVDQA_ATOMIC);
}
}
}
Expand Down Expand Up @@ -186,19 +192,19 @@ mod tests {
let mut x = CpuInfo(0);
assert!(!x.test(CpuInfo::INIT));
assert!(!x.test(CpuInfo::HAS_CMPXCHG16B));
assert!(!x.test(CpuInfo::IS_INTEL_AND_HAS_AVX));
assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC));
x.set(CpuInfo::INIT);
assert!(x.test(CpuInfo::INIT));
assert!(!x.test(CpuInfo::HAS_CMPXCHG16B));
assert!(!x.test(CpuInfo::IS_INTEL_AND_HAS_AVX));
assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC));
x.set(CpuInfo::HAS_CMPXCHG16B);
assert!(x.test(CpuInfo::INIT));
assert!(x.test(CpuInfo::HAS_CMPXCHG16B));
assert!(!x.test(CpuInfo::IS_INTEL_AND_HAS_AVX));
x.set(CpuInfo::IS_INTEL_AND_HAS_AVX);
assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC));
x.set(CpuInfo::HAS_VMOVDQA_ATOMIC);
assert!(x.test(CpuInfo::INIT));
assert!(x.test(CpuInfo::HAS_CMPXCHG16B));
assert!(x.test(CpuInfo::IS_INTEL_AND_HAS_AVX));
assert!(x.test(CpuInfo::HAS_VMOVDQA_ATOMIC));
}

#[test]
Expand All @@ -207,8 +213,11 @@ mod tests {
#[cfg_attr(any(target_env = "sgx", miri), ignore)]
fn test_cpuid() {
assert_eq!(std::is_x86_feature_detected!("cmpxchg16b"), has_cmpxchg16b());
if unsafe { _vendor_id() } == *b"GenuineIntel" {
assert_eq!(std::is_x86_feature_detected!("avx"), cpuid().is_intel_and_has_avx());
let vendor_id = unsafe { _vendor_id() };
if vendor_id == VENDOR_ID_INTEL || vendor_id == VENDOR_ID_AMD {
assert_eq!(std::is_x86_feature_detected!("avx"), cpuid().has_vmovdqa_atomic());
} else {
assert!(!cpuid().has_vmovdqa_atomic());
}
}
}
6 changes: 3 additions & 3 deletions src/imp/atomic128/x86_64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ unsafe fn byte_wise_atomic_load(src: *mut u128) -> u128 {
}
}

// VMOVDQA is atomic on on Intel CPU with AVX.
// VMOVDQA is atomic on Intel and AMD CPUs with AVX.
// See https://gcc.gnu.org/bugzilla//show_bug.cgi?id=104688 for details.
//
// Refs: https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
Expand Down Expand Up @@ -308,7 +308,7 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
{
// Check CMPXCHG16B anyway to prevent mixing atomic and non-atomic access.
let cpuid = detect::cpuid();
if cpuid.has_cmpxchg16b() && cpuid.is_intel_and_has_avx() {
if cpuid.has_cmpxchg16b() && cpuid.has_vmovdqa_atomic() {
_atomic_load_vmovdqa
} else {
_atomic_load_cmpxchg16b
Expand Down Expand Up @@ -353,7 +353,7 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
{
// Check CMPXCHG16B anyway to prevent mixing atomic and non-atomic access.
let cpuid = detect::cpuid();
if cpuid.has_cmpxchg16b() && cpuid.is_intel_and_has_avx() {
if cpuid.has_cmpxchg16b() && cpuid.has_vmovdqa_atomic() {
_atomic_store_vmovdqa
} else {
_atomic_store_cmpxchg16b
Expand Down

0 comments on commit b733d7c

Please sign in to comment.