diff --git a/src/inline_array/mod.rs b/src/inline_array/mod.rs index 9ce3773..f60e50f 100644 --- a/src/inline_array/mod.rs +++ b/src/inline_array/mod.rs @@ -11,7 +11,6 @@ use core::cmp::Ordering; use core::fmt::{Debug, Error, Formatter}; use core::hash::{Hash, Hasher}; use core::iter::FromIterator; -use core::marker::PhantomData; use core::mem::{self, MaybeUninit}; use core::ops::{Deref, DerefMut}; use core::ptr; @@ -25,7 +24,7 @@ pub use self::iter::{Drain, Iter}; /// This works like a vector, but allocated on the stack (and thus marginally /// faster than `Vec`), with the allocated space exactly matching the size of /// the given type `T`. The vector consists of a `usize` tracking its current -/// length, followed by zero or more elements of type `A`. The capacity is thus +/// length and zero or more elements of type `A`. The capacity is thus /// `( size_of::() - size_of::() ) / size_of::()`. This could lead /// to situations where the capacity is zero, if `size_of::()` is greater /// than `size_of::() - size_of::()`, which is not an error and @@ -72,9 +71,37 @@ pub use self::iter::{Drain, Iter}; /// /// Both of these will have the same size, and we can swap the `Inline` case out /// with the `Full` case once the `InlineArray` runs out of capacity. +#[repr(C)] pub struct InlineArray { + // Alignment tricks + // + // We need both the usize header and data to be properly aligned in memory. We do a few tricks + // to handle that. + // + // * An alignment is always power of 2. Therefore, with a pair of alignments, one is always + // a multiple of the other (one way or the other). + // * A struct is aligned to at least the max alignment of each of its fields. + // * A repr(C) struct follows the order of fields and pushes each as close to the previous one + // as allowed by alignment. + // + // By placing two "fake" fields that have 0 size, but an alignment first, we make sure that all + // 3 start at the beginning of the struct and that all of them are aligned to their maximum + // alignment. + // + // Furthermore, because we don't know if usize or A has bigger alignment, we decide on case by + // case basis if the header or the elements go first. By placing the one with higher alignment + // requirements first, we align that one and the other one will be aligned "automatically" when + // placed just after it. + // + // To the best of our knowledge, this is all guaranteed by the compiler. But just to make sure, + // we have bunch of asserts in the constructor to check; as these are invariants enforced by + // the compiler, it should be trivial for it to remove the checks so they are for free (if we + // are correct) or will save us (if we are not). + // + // Additionally, the [A; 0] serves as a form of PhantomData. + _header_align: [usize; 0], + _data_align: [A; 0], data: MaybeUninit, - phantom: PhantomData, } const fn capacity(host_size: usize, header_size: usize, element_size: usize) -> usize { @@ -89,6 +116,13 @@ impl InlineArray { const HOST_SIZE: usize = mem::size_of::(); const ELEMENT_SIZE: usize = mem::size_of::(); const HEADER_SIZE: usize = mem::size_of::(); + // Do we place the header before the elements or the other way around? + const HEADER_FIRST: bool = mem::align_of::() >= mem::align_of::(); + // Note: one of the following is always 0 + // How many usizes to skip before the first element? + const ELEMENT_SKIP: usize = Self::HEADER_FIRST as usize; + // How many elements to skip before the header + const HEADER_SKIP: usize = Self::CAPACITY * (1 - Self::ELEMENT_SKIP); /// The maximum number of elements the `InlineArray` can hold. pub const CAPACITY: usize = capacity(Self::HOST_SIZE, Self::HEADER_SIZE, Self::ELEMENT_SIZE); @@ -96,25 +130,53 @@ impl InlineArray { #[inline] #[must_use] unsafe fn len_const(&self) -> *const usize { - (&self.data) as *const _ as *const usize + let ptr = self + .data + .as_ptr() + .cast::() + .add(Self::HEADER_SKIP) + .cast::(); + debug_assert!(ptr as usize % mem::align_of::() == 0); + ptr } #[inline] #[must_use] pub(crate) unsafe fn len_mut(&mut self) -> *mut usize { - (&mut self.data) as *mut _ as *mut usize + let ptr = self + .data + .as_mut_ptr() + .cast::() + .add(Self::HEADER_SKIP) + .cast::(); + debug_assert!(ptr as usize % mem::align_of::() == 0); + ptr } #[inline] #[must_use] pub(crate) unsafe fn data(&self) -> *const A { - self.len_const().add(1) as *const _ as *const A + let ptr = self + .data + .as_ptr() + .cast::() + .add(Self::ELEMENT_SKIP) + .cast::(); + debug_assert!(ptr as usize % mem::align_of::() == 0); + ptr } #[inline] #[must_use] unsafe fn data_mut(&mut self) -> *mut A { - self.len_mut().add(1) as *mut _ as *mut A + let ptr = self + .data + .as_mut_ptr() + .cast::() + .add(Self::ELEMENT_SKIP) + .cast::(); + debug_assert!(ptr as usize % mem::align_of::() == 0); + ptr } #[inline] @@ -164,12 +226,27 @@ impl InlineArray { #[inline] #[must_use] pub fn new() -> Self { - debug_assert!(Self::HOST_SIZE > Self::HEADER_SIZE); + assert!(Self::HOST_SIZE > Self::HEADER_SIZE); let mut self_ = Self { + _header_align: [], + _data_align: [], data: MaybeUninit::uninit(), - phantom: PhantomData, }; - unsafe { *self_.len_mut() = 0 } + // Sanity check our assumptions about what is guaranteed by the compiler. If we are right, + // these should completely optimize out of the resulting binary. + assert_eq!( + &self_ as *const _ as usize, self_.data.as_ptr() as usize, + "Padding at the start of struct", + ); + assert_eq!(mem::size_of::(), mem::size_of::()); + assert_eq!(self_.data.as_ptr() as usize % mem::align_of::(), 0, "Unaligned header"); + assert_eq!(self_.data.as_ptr() as usize % mem::align_of::(), 0, "Unaligned elements"); + assert!( + mem::align_of::() % mem::align_of::() == 0 || + mem::align_of::() % mem::align_of::() == 0 + ); + assert!(Self::ELEMENT_SKIP == 0 || Self::HEADER_SKIP == 0); + unsafe { ptr::write(self_.len_mut(), 0usize) }; self_ } @@ -270,7 +347,7 @@ impl InlineArray { #[inline] unsafe fn drop_contents(&mut self) { - ptr::drop_in_place::<[A]>(&mut **self) + ptr::drop_in_place::<[A]>(&mut **self) // uses DerefMut } /// Discard the contents of the array. @@ -516,4 +593,21 @@ mod test { assert_eq!(65536, chunk.len()); assert_eq!(Some(()), chunk.pop()); } + + #[test] + fn low_align_base() { + let mut chunk: InlineArray = InlineArray::new(); + chunk.push("Hello".to_owned()); + assert_eq!(chunk[0], "Hello"); + } + + #[test] + fn big_align_elem() { + #[repr(align(256))] + struct BigAlign(usize); + + let mut chunk: InlineArray = InlineArray::new(); + chunk.push(BigAlign(42)); + assert_eq!(chunk.get(0).unwrap() as *const _ as usize % mem::align_of::(), 0); + } }