diff --git a/src/libstd/collections/hash/map.rs b/src/libstd/collections/hash/map.rs index 55a1a75d049e9..cb8034cf6b576 100644 --- a/src/libstd/collections/hash/map.rs +++ b/src/libstd/collections/hash/map.rs @@ -9,222 +9,20 @@ // except according to those terms. use self::Entry::*; -use self::VacantEntryState::*; -use intrinsics::unlikely; use collections::CollectionAllocErr; use cell::Cell; use borrow::Borrow; -use cmp::max; use fmt::{self, Debug}; #[allow(deprecated)] use hash::{Hash, Hasher, BuildHasher, SipHasher13}; use iter::{FromIterator, FusedIterator}; -use mem::{self, replace}; -use ops::{Deref, DerefMut, Index}; +use marker::PhantomData; +use mem; +use ops::Index; use sys; -use super::table::{self, Bucket, EmptyBucket, Fallibility, FullBucket, FullBucketMut, RawTable, - SafeHash}; -use super::table::BucketState::{Empty, Full}; -use super::table::Fallibility::{Fallible, Infallible}; - -const MIN_NONZERO_RAW_CAPACITY: usize = 32; // must be a power of two - -/// The default behavior of HashMap implements a maximum load factor of 90.9%. -#[derive(Clone)] -struct DefaultResizePolicy; - -impl DefaultResizePolicy { - #[inline] - fn new() -> DefaultResizePolicy { - DefaultResizePolicy - } - - /// A hash map's "capacity" is the number of elements it can hold without - /// being resized. Its "raw capacity" is the number of slots required to - /// provide that capacity, accounting for maximum loading. The raw capacity - /// is always zero or a power of two. - #[inline] - fn try_raw_capacity(&self, len: usize) -> Result { - if len == 0 { - Ok(0) - } else { - // 1. Account for loading: `raw_capacity >= len * 1.1`. - // 2. Ensure it is a power of two. - // 3. Ensure it is at least the minimum size. - let mut raw_cap = len.checked_mul(11) - .map(|l| l / 10) - .and_then(|l| l.checked_next_power_of_two()) - .ok_or(CollectionAllocErr::CapacityOverflow)?; - - raw_cap = max(MIN_NONZERO_RAW_CAPACITY, raw_cap); - Ok(raw_cap) - } - } - - #[inline] - fn raw_capacity(&self, len: usize) -> usize { - self.try_raw_capacity(len).expect("raw_capacity overflow") - } - - /// The capacity of the given raw capacity. - #[inline] - fn capacity(&self, raw_cap: usize) -> usize { - // This doesn't have to be checked for overflow since allocation size - // in bytes will overflow earlier than multiplication by 10. - // - // As per https://github.com/rust-lang/rust/pull/30991 this is updated - // to be: (raw_cap * den + den - 1) / num - (raw_cap * 10 + 10 - 1) / 11 - } -} - -// The main performance trick in this hashmap is called Robin Hood Hashing. -// It gains its excellent performance from one essential operation: -// -// If an insertion collides with an existing element, and that element's -// "probe distance" (how far away the element is from its ideal location) -// is higher than how far we've already probed, swap the elements. -// -// This massively lowers variance in probe distance, and allows us to get very -// high load factors with good performance. The 90% load factor I use is rather -// conservative. -// -// > Why a load factor of approximately 90%? -// -// In general, all the distances to initial buckets will converge on the mean. -// At a load factor of α, the odds of finding the target bucket after k -// probes is approximately 1-α^k. If we set this equal to 50% (since we converge -// on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round -// this down to make the math easier on the CPU and avoid its FPU. -// Since on average we start the probing in the middle of a cache line, this -// strategy pulls in two cache lines of hashes on every lookup. I think that's -// pretty good, but if you want to trade off some space, it could go down to one -// cache line on average with an α of 0.84. -// -// > Wait, what? Where did you get 1-α^k from? -// -// On the first probe, your odds of a collision with an existing element is α. -// The odds of doing this twice in a row is approximately α^2. For three times, -// α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT -// colliding after k tries is 1-α^k. -// -// The paper from 1986 cited below mentions an implementation which keeps track -// of the distance-to-initial-bucket histogram. This approach is not suitable -// for modern architectures because it requires maintaining an internal data -// structure. This allows very good first guesses, but we are most concerned -// with guessing entire cache lines, not individual indexes. Furthermore, array -// accesses are no longer linear and in one direction, as we have now. There -// is also memory and cache pressure that this would entail that would be very -// difficult to properly see in a microbenchmark. -// -// ## Future Improvements (FIXME!) -// -// Allow the load factor to be changed dynamically and/or at initialization. -// -// Also, would it be possible for us to reuse storage when growing the -// underlying table? This is exactly the use case for 'realloc', and may -// be worth exploring. -// -// ## Future Optimizations (FIXME!) -// -// Another possible design choice that I made without any real reason is -// parameterizing the raw table over keys and values. Technically, all we need -// is the size and alignment of keys and values, and the code should be just as -// efficient (well, we might need one for power-of-two size and one for not...). -// This has the potential to reduce code bloat in rust executables, without -// really losing anything except 4 words (key size, key alignment, val size, -// val alignment) which can be passed in to every call of a `RawTable` function. -// This would definitely be an avenue worth exploring if people start complaining -// about the size of rust executables. -// -// Annotate exceedingly likely branches in `table::make_hash` -// and `search_hashed` to reduce instruction cache pressure -// and mispredictions once it becomes possible (blocked on issue #11092). -// -// Shrinking the table could simply reallocate in place after moving buckets -// to the first half. -// -// The growth algorithm (fragment of the Proof of Correctness) -// -------------------- -// -// The growth algorithm is basically a fast path of the naive reinsertion- -// during-resize algorithm. Other paths should never be taken. -// -// Consider growing a robin hood hashtable of capacity n. Normally, we do this -// by allocating a new table of capacity `2n`, and then individually reinsert -// each element in the old table into the new one. This guarantees that the -// new table is a valid robin hood hashtable with all the desired statistical -// properties. Remark that the order we reinsert the elements in should not -// matter. For simplicity and efficiency, we will consider only linear -// reinsertions, which consist of reinserting all elements in the old table -// into the new one by increasing order of index. However we will not be -// starting our reinsertions from index 0 in general. If we start from index -// i, for the purpose of reinsertion we will consider all elements with real -// index j < i to have virtual index n + j. -// -// Our hash generation scheme consists of generating a 64-bit hash and -// truncating the most significant bits. When moving to the new table, we -// simply introduce a new bit to the front of the hash. Therefore, if an -// element has ideal index i in the old table, it can have one of two ideal -// locations in the new table. If the new bit is 0, then the new ideal index -// is i. If the new bit is 1, then the new ideal index is n + i. Intuitively, -// we are producing two independent tables of size n, and for each element we -// independently choose which table to insert it into with equal probability. -// However, rather than wrapping around themselves on overflowing their -// indexes, the first table overflows into the second, and the second into the -// first. Visually, our new table will look something like: -// -// [yy_xxx_xxxx_xxx|xx_yyy_yyyy_yyy] -// -// Where x's are elements inserted into the first table, y's are elements -// inserted into the second, and _'s are empty sections. We now define a few -// key concepts that we will use later. Note that this is a very abstract -// perspective of the table. A real resized table would be at least half -// empty. -// -// Theorem: A linear robin hood reinsertion from the first ideal element -// produces identical results to a linear naive reinsertion from the same -// element. -// -// FIXME(Gankro, pczarn): review the proof and put it all in a separate README.md -// -// Adaptive early resizing -// ---------------------- -// To protect against degenerate performance scenarios (including DOS attacks), -// the implementation includes an adaptive behavior that can resize the map -// early (before its capacity is exceeded) when suspiciously long probe sequences -// are encountered. -// -// With this algorithm in place it would be possible to turn a CPU attack into -// a memory attack due to the aggressive resizing. To prevent that the -// adaptive behavior only triggers when the map is at least half full. -// This reduces the effectiveness of the algorithm but also makes it completely safe. -// -// The previous safety measure also prevents degenerate interactions with -// really bad quality hash algorithms that can make normal inputs look like a -// DOS attack. -// -const DISPLACEMENT_THRESHOLD: usize = 128; -// -// The threshold of 128 is chosen to minimize the chance of exceeding it. -// In particular, we want that chance to be less than 10^-8 with a load of 90%. -// For displacement, the smallest constant that fits our needs is 90, -// so we round that up to 128. -// -// At a load factor of α, the odds of finding the target bucket after exactly n -// unsuccessful probes[1] are -// -// Pr_α{displacement = n} = -// (1 - α) / α * ∑_{k≥1} e^(-kα) * (kα)^(k+n) / (k + n)! * (1 - kα / (k + n + 1)) -// -// We use this formula to find the probability of triggering the adaptive behavior -// -// Pr_0.909{displacement > 128} = 1.601 * 10^-11 -// -// 1. Alfredo Viola (2005). Distributional analysis of Robin Hood linear probing -// hashing with buckets. +use super::raw::{Bucket, RawDrain, RawIntoIter, RawIter, RawTable}; /// A hash map implemented with linear probing and Robin Hood bucket stealing. /// @@ -417,274 +215,14 @@ pub struct HashMap { // All hashes are keyed on these values, to prevent hash collision attacks. hash_builder: S, - table: RawTable, - - resize_policy: DefaultResizePolicy, -} - -/// Search for a pre-hashed key. -/// If you don't already know the hash, use search or search_mut instead -#[inline] -fn search_hashed(table: M, hash: SafeHash, is_match: F) -> InternalEntry - where M: Deref>, - F: FnMut(&K) -> bool -{ - // This is the only function where capacity can be zero. To avoid - // undefined behavior when Bucket::new gets the raw bucket in this - // case, immediately return the appropriate search result. - if table.capacity() == 0 { - return InternalEntry::TableIsEmpty; - } - - search_hashed_nonempty(table, hash, is_match, true) -} - -/// Search for a pre-hashed key when the hash map is known to be non-empty. -#[inline] -fn search_hashed_nonempty(table: M, hash: SafeHash, mut is_match: F, - compare_hashes: bool) - -> InternalEntry - where M: Deref>, - F: FnMut(&K) -> bool -{ - // Do not check the capacity as an extra branch could slow the lookup. - - let size = table.size(); - let mut probe = Bucket::new(table, hash); - let mut displacement = 0; - - loop { - let full = match probe.peek() { - Empty(bucket) => { - // Found a hole! - return InternalEntry::Vacant { - hash, - elem: NoElem(bucket, displacement), - }; - } - Full(bucket) => bucket, - }; - - let probe_displacement = full.displacement(); - - if probe_displacement < displacement { - // Found a luckier bucket than me. - // We can finish the search early if we hit any bucket - // with a lower distance to initial bucket than we've probed. - return InternalEntry::Vacant { - hash, - elem: NeqElem(full, probe_displacement), - }; - } - - // If the hash doesn't match, it can't be this one.. - if !compare_hashes || hash == full.hash() { - // If the key doesn't match, it can't be this one.. - if is_match(full.read().0) { - return InternalEntry::Occupied { elem: full }; - } - } - displacement += 1; - probe = full.next(); - debug_assert!(displacement <= size); - } + table: RawTable<(K, V)>, } -/// Same as `search_hashed_nonempty` but for mutable access. #[inline] -fn search_hashed_nonempty_mut(table: M, hash: SafeHash, mut is_match: F, - compare_hashes: bool) - -> InternalEntry - where M: DerefMut>, - F: FnMut(&K) -> bool -{ - // Do not check the capacity as an extra branch could slow the lookup. - - let size = table.size(); - let mut probe = Bucket::new(table, hash); - let mut displacement = 0; - - loop { - let mut full = match probe.peek() { - Empty(bucket) => { - // Found a hole! - return InternalEntry::Vacant { - hash, - elem: NoElem(bucket, displacement), - }; - } - Full(bucket) => bucket, - }; - - let probe_displacement = full.displacement(); - - if probe_displacement < displacement { - // Found a luckier bucket than me. - // We can finish the search early if we hit any bucket - // with a lower distance to initial bucket than we've probed. - return InternalEntry::Vacant { - hash, - elem: NeqElem(full, probe_displacement), - }; - } - - // If the hash doesn't match, it can't be this one.. - if hash == full.hash() || !compare_hashes { - // If the key doesn't match, it can't be this one.. - if is_match(full.read_mut().0) { - return InternalEntry::Occupied { elem: full }; - } - } - displacement += 1; - probe = full.next(); - debug_assert!(displacement <= size); - } -} - -fn pop_internal(starting_bucket: FullBucketMut) - -> (K, V, &mut RawTable) -{ - let (empty, retkey, retval) = starting_bucket.take(); - let mut gap = match empty.gap_peek() { - Ok(b) => b, - Err(b) => return (retkey, retval, b.into_table()), - }; - - while gap.full().displacement() != 0 { - gap = match gap.shift() { - Ok(b) => b, - Err(b) => { - return (retkey, retval, b.into_table()); - }, - }; - } - - // Now we've done all our shifting. Return the value we grabbed earlier. - (retkey, retval, gap.into_table()) -} - -/// Perform robin hood bucket stealing at the given `bucket`. You must -/// also pass that bucket's displacement so we don't have to recalculate it. -/// -/// `hash`, `key`, and `val` are the elements to "robin hood" into the hashtable. -fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>, - mut displacement: usize, - mut hash: SafeHash, - mut key: K, - mut val: V) - -> FullBucketMut<'a, K, V> { - let size = bucket.table().size(); - let raw_capacity = bucket.table().capacity(); - // There can be at most `size - dib` buckets to displace, because - // in the worst case, there are `size` elements and we already are - // `displacement` buckets away from the initial one. - let idx_end = (bucket.index() + size - bucket.displacement()) % raw_capacity; - // Save the *starting point*. - let mut bucket = bucket.stash(); - - loop { - let (old_hash, old_key, old_val) = bucket.replace(hash, key, val); - hash = old_hash; - key = old_key; - val = old_val; - - loop { - displacement += 1; - let probe = bucket.next(); - debug_assert!(probe.index() != idx_end); - - let full_bucket = match probe.peek() { - Empty(bucket) => { - // Found a hole! - let bucket = bucket.put(hash, key, val); - // Now that it's stolen, just read the value's pointer - // right out of the table! Go back to the *starting point*. - // - // This use of `into_table` is misleading. It turns the - // bucket, which is a FullBucket on top of a - // FullBucketMut, into just one FullBucketMut. The "table" - // refers to the inner FullBucketMut in this context. - return bucket.into_table(); - } - Full(bucket) => bucket, - }; - - let probe_displacement = full_bucket.displacement(); - - bucket = full_bucket; - - // Robin hood! Steal the spot. - if probe_displacement < displacement { - displacement = probe_displacement; - break; - } - } - } -} - -impl HashMap - where K: Eq + Hash, - S: BuildHasher -{ - fn make_hash(&self, x: &X) -> SafeHash - where X: Hash - { - table::make_hash(&self.hash_builder, x) - } - - /// Search for a key, yielding the index if it's found in the hashtable. - /// If you already have the hash for the key lying around, or if you need an - /// InternalEntry, use search_hashed or search_hashed_nonempty. - #[inline] - fn search<'a, Q: ?Sized>(&'a self, q: &Q) - -> Option>> - where K: Borrow, - Q: Eq + Hash - { - if self.is_empty() { - return None; - } - - let hash = self.make_hash(q); - search_hashed_nonempty(&self.table, hash, |k| q.eq(k.borrow()), true) - .into_occupied_bucket() - } - - #[inline] - fn search_mut<'a, Q: ?Sized>(&'a mut self, q: &Q) - -> Option>> - where K: Borrow, - Q: Eq + Hash - { - if self.is_empty() { - return None; - } - - let hash = self.make_hash(q); - search_hashed_nonempty(&mut self.table, hash, |k| q.eq(k.borrow()), true) - .into_occupied_bucket() - } - - // The caller should ensure that invariants by Robin Hood Hashing hold - // and that there's space in the underlying table. - fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) { - let mut buckets = Bucket::new(&mut self.table, hash); - let start_index = buckets.index(); - - loop { - // We don't need to compare hashes for value swap. - // Not even DIBs for Robin Hood. - buckets = match buckets.peek() { - Empty(empty) => { - empty.put(hash, k, v); - return; - } - Full(b) => b.into_bucket(), - }; - buckets.next(); - debug_assert!(buckets.index() != start_index); - } - } +fn make_hash(hash_builder: &impl BuildHasher, val: &K) -> u64 { + let mut state = hash_builder.build_hasher(); + val.hash(&mut state); + state.finish() } impl HashMap { @@ -752,8 +290,7 @@ impl HashMap pub fn with_hasher(hash_builder: S) -> HashMap { HashMap { hash_builder, - resize_policy: DefaultResizePolicy::new(), - table: RawTable::new(0), + table: RawTable::new(), } } @@ -781,12 +318,9 @@ impl HashMap #[inline] #[stable(feature = "hashmap_build_hasher", since = "1.7.0")] pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> HashMap { - let resize_policy = DefaultResizePolicy::new(); - let raw_cap = resize_policy.raw_capacity(capacity); HashMap { hash_builder, - resize_policy, - table: RawTable::new(raw_cap), + table: RawTable::with_capacity(capacity), } } @@ -804,6 +338,7 @@ impl HashMap /// let map: HashMap = HashMap::with_hasher(hasher); /// let hasher: &RandomState = map.hasher(); /// ``` + #[inline] #[stable(feature = "hashmap_public_hasher", since = "1.9.0")] pub fn hasher(&self) -> &S { &self.hash_builder @@ -824,13 +359,13 @@ impl HashMap #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn capacity(&self) -> usize { - self.resize_policy.capacity(self.raw_capacity()) + self.table.capacity() } - /// Returns the hash map's raw capacity. + #[cfg(test)] #[inline] fn raw_capacity(&self) -> usize { - self.table.capacity() + self.table.buckets() } /// Reserves capacity for at least `additional` more elements to be inserted @@ -853,11 +388,9 @@ impl HashMap #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn reserve(&mut self, additional: usize) { - match self.reserve_internal(additional, Infallible) { - Err(CollectionAllocErr::CapacityOverflow) => panic!("capacity overflow"), - Err(CollectionAllocErr::AllocErr) => unreachable!(), - Ok(()) => { /* yay */ } - } + let hash_builder = &self.hash_builder; + self.table + .reserve(additional, |x| make_hash(hash_builder, &x.0)); } /// Tries to reserve capacity for at least `additional` more elements to be inserted @@ -877,92 +410,12 @@ impl HashMap /// let mut map: HashMap<&str, isize> = HashMap::new(); /// map.try_reserve(10).expect("why is the test harness OOMing on 10 bytes?"); /// ``` + #[inline] #[unstable(feature = "try_reserve", reason = "new API", issue="48043")] pub fn try_reserve(&mut self, additional: usize) -> Result<(), CollectionAllocErr> { - self.reserve_internal(additional, Fallible) - } - - #[inline] - fn reserve_internal(&mut self, additional: usize, fallibility: Fallibility) - -> Result<(), CollectionAllocErr> { - - let remaining = self.capacity() - self.len(); // this can't overflow - if remaining < additional { - let min_cap = self.len() - .checked_add(additional) - .ok_or(CollectionAllocErr::CapacityOverflow)?; - let raw_cap = self.resize_policy.try_raw_capacity(min_cap)?; - self.try_resize(raw_cap, fallibility)?; - } else if self.table.tag() && remaining <= self.len() { - // Probe sequence is too long and table is half full, - // resize early to reduce probing length. - let new_capacity = self.table.capacity() * 2; - self.try_resize(new_capacity, fallibility)?; - } - Ok(()) - } - - /// Resizes the internal vectors to a new capacity. It's your - /// responsibility to: - /// 1) Ensure `new_raw_cap` is enough for all the elements, accounting - /// for the load factor. - /// 2) Ensure `new_raw_cap` is a power of two or zero. - #[inline(never)] - #[cold] - fn try_resize( - &mut self, - new_raw_cap: usize, - fallibility: Fallibility, - ) -> Result<(), CollectionAllocErr> { - assert!(self.table.size() <= new_raw_cap); - assert!(new_raw_cap.is_power_of_two() || new_raw_cap == 0); - - let mut old_table = replace( - &mut self.table, - match fallibility { - Infallible => RawTable::new(new_raw_cap), - Fallible => RawTable::try_new(new_raw_cap)?, - } - ); - let old_size = old_table.size(); - - if old_table.size() == 0 { - return Ok(()); - } - - let mut bucket = Bucket::head_bucket(&mut old_table); - - // This is how the buckets might be laid out in memory: - // ($ marks an initialized bucket) - // ________________ - // |$$$_$$$$$$_$$$$$| - // - // But we've skipped the entire initial cluster of buckets - // and will continue iteration in this order: - // ________________ - // |$$$$$$_$$$$$ - // ^ wrap around once end is reached - // ________________ - // $$$_____________| - // ^ exit once table.size == 0 - loop { - bucket = match bucket.peek() { - Full(bucket) => { - let h = bucket.hash(); - let (b, k, v) = bucket.take(); - self.insert_hashed_ordered(h, k, v); - if b.table().size() == 0 { - break; - } - b.into_bucket() - } - Empty(b) => b.into_bucket(), - }; - bucket.next(); - } - - assert_eq!(self.table.size(), old_size); - Ok(()) + let hash_builder = &self.hash_builder; + self.table + .try_reserve(additional, |x| make_hash(hash_builder, &x.0)) } /// Shrinks the capacity of the map as much as possible. It will drop @@ -981,20 +434,11 @@ impl HashMap /// map.shrink_to_fit(); /// assert!(map.capacity() >= 2); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn shrink_to_fit(&mut self) { - let new_raw_cap = self.resize_policy.raw_capacity(self.len()); - if self.raw_capacity() != new_raw_cap { - let old_table = replace(&mut self.table, RawTable::new(new_raw_cap)); - let old_size = old_table.size(); - - // Shrink the table. Naive algorithm for resizing: - for (h, k, v) in old_table.into_iter() { - self.insert_hashed_nocheck(h, k, v); - } - - debug_assert_eq!(self.table.size(), old_size); - } + let hash_builder = &self.hash_builder; + self.table.shrink_to(0, |x| make_hash(hash_builder, &x.0)); } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -1019,40 +463,14 @@ impl HashMap /// map.shrink_to(0); /// assert!(map.capacity() >= 2); /// ``` + #[inline] #[unstable(feature = "shrink_to", reason = "new API", issue="56431")] pub fn shrink_to(&mut self, min_capacity: usize) { assert!(self.capacity() >= min_capacity, "Tried to shrink to a larger capacity"); - let new_raw_cap = self.resize_policy.raw_capacity(max(self.len(), min_capacity)); - if self.raw_capacity() != new_raw_cap { - let old_table = replace(&mut self.table, RawTable::new(new_raw_cap)); - let old_size = old_table.size(); - - // Shrink the table. Naive algorithm for resizing: - for (h, k, v) in old_table.into_iter() { - self.insert_hashed_nocheck(h, k, v); - } - - debug_assert_eq!(self.table.size(), old_size); - } - } - - /// Insert a pre-hashed key-value pair, without first checking - /// that there's enough room in the buckets. Returns a reference to the - /// newly insert value. - /// - /// If the key already exists, the hashtable will be returned untouched - /// and a reference to the existing element will be returned. - fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option { - let entry = search_hashed(&mut self.table, hash, |key| *key == k).into_entry(k); - match entry { - Some(Occupied(mut elem)) => Some(elem.insert(v)), - Some(Vacant(elem)) => { - elem.insert(v); - None - } - None => unreachable!(), - } + let hash_builder = &self.hash_builder; + self.table + .shrink_to(min_capacity, |x| make_hash(hash_builder, &x.0)); } /// An iterator visiting all keys in arbitrary order. @@ -1072,6 +490,7 @@ impl HashMap /// println!("{}", key); /// } /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn keys(&self) -> Keys { Keys { inner: self.iter() } @@ -1094,6 +513,7 @@ impl HashMap /// println!("{}", val); /// } /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn values(&self) -> Values { Values { inner: self.iter() } @@ -1121,6 +541,7 @@ impl HashMap /// println!("{}", val); /// } /// ``` + #[inline] #[stable(feature = "map_values_mut", since = "1.10.0")] pub fn values_mut(&mut self) -> ValuesMut { ValuesMut { inner: self.iter_mut() } @@ -1143,9 +564,16 @@ impl HashMap /// println!("key: {} val: {}", key, val); /// } /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn iter(&self) -> Iter { - Iter { inner: self.table.iter() } + // Here we tie the lifetime of self to the iter. + unsafe { + Iter { + inner: self.table.iter(), + _marker: PhantomData, + } + } } /// An iterator visiting all key-value pairs in arbitrary order, @@ -1171,9 +599,16 @@ impl HashMap /// println!("key: {} val: {}", key, val); /// } /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn iter_mut(&mut self) -> IterMut { - IterMut { inner: self.table.iter_mut() } + // Here we tie the lifetime of self to the iter. + unsafe { + IterMut { + inner: self.table.iter(), + _marker: PhantomData, + } + } } /// Gets the given key's corresponding entry in the map for in-place manipulation. @@ -1195,13 +630,28 @@ impl HashMap /// assert_eq!(letters[&'u'], 1); /// assert_eq!(letters.get(&'y'), None); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn entry(&mut self, key: K) -> Entry { - // Gotta resize now. - self.reserve(1); - let hash = self.make_hash(&key); - search_hashed(&mut self.table, hash, |q| q.eq(&key)) - .into_entry(key).expect("unreachable") + let hash = make_hash(&self.hash_builder, &key); + if let Some(elem) = self.table.find(hash, |q| q.0.eq(&key)) { + Entry::Occupied(OccupiedEntry { + key: Some(key), + elem, + table: &mut self.table, + }) + } else { + // Ideally we would put this in VacantEntry::insert, but Entry is not + // generic over the BuildHasher and adding a generic parameter would be + // a breaking change. + self.reserve(1); + + Entry::Vacant(VacantEntry { + hash, + key, + table: &mut self.table, + }) + } } /// Returns the number of elements in the map. @@ -1216,9 +666,10 @@ impl HashMap /// a.insert(1, "a"); /// assert_eq!(a.len(), 1); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn len(&self) -> usize { - self.table.size() + self.table.len() } /// Returns true if the map contains no elements. @@ -1261,7 +712,12 @@ impl HashMap #[inline] #[stable(feature = "drain", since = "1.6.0")] pub fn drain(&mut self) -> Drain { - Drain { inner: self.table.drain() } + // Here we tie the lifetime of self to the iter. + unsafe { + Drain { + inner: self.table.drain(), + } + } } /// Clears the map, removing all key-value pairs. Keeps the allocated memory @@ -1280,7 +736,7 @@ impl HashMap #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn clear(&mut self) { - self.drain(); + self.table.clear(); } /// Returns a reference to the value corresponding to the key. @@ -1308,7 +764,7 @@ impl HashMap where K: Borrow, Q: Hash + Eq { - self.search(k).map(|bucket| bucket.into_refs().1) + self.get_key_value(k).map(|(_, v)| v) } /// Returns the key-value pair corresponding to the supplied key. @@ -1331,12 +787,19 @@ impl HashMap /// assert_eq!(map.get_key_value(&1), Some((&1, &"a"))); /// assert_eq!(map.get_key_value(&2), None); /// ``` + #[inline] #[unstable(feature = "map_get_key_value", issue = "49347")] pub fn get_key_value(&self, k: &Q) -> Option<(&K, &V)> where K: Borrow, Q: Hash + Eq { - self.search(k).map(|bucket| bucket.into_refs()) + let hash = make_hash(&self.hash_builder, k); + self.table + .find(hash, |x| k.eq(x.0.borrow())) + .map(|item| unsafe { + let &(ref key, ref value) = item.as_ref(); + (key, value) + }) } /// Returns true if the map contains a value for the specified key. @@ -1358,12 +821,13 @@ impl HashMap /// assert_eq!(map.contains_key(&1), true); /// assert_eq!(map.contains_key(&2), false); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn contains_key(&self, k: &Q) -> bool where K: Borrow, Q: Hash + Eq { - self.search(k).is_some() + self.get(k).is_some() } /// Returns a mutable reference to the value corresponding to the key. @@ -1387,12 +851,16 @@ impl HashMap /// } /// assert_eq!(map[&1], "b"); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn get_mut(&mut self, k: &Q) -> Option<&mut V> where K: Borrow, Q: Hash + Eq { - self.search_mut(k).map(|bucket| bucket.into_mut_refs().1) + let hash = make_hash(&self.hash_builder, k); + self.table + .find(hash, |x| k.eq(x.0.borrow())) + .map(|item| unsafe { &mut item.as_mut().1 }) } /// Inserts a key-value pair into the map. @@ -1420,11 +888,20 @@ impl HashMap /// assert_eq!(map.insert(37, "c"), Some("b")); /// assert_eq!(map[&37], "c"); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn insert(&mut self, k: K, v: V) -> Option { - let hash = self.make_hash(&k); - self.reserve(1); - self.insert_hashed_nocheck(hash, k, v) + unsafe { + let hash = make_hash(&self.hash_builder, &k); + if let Some(item) = self.table.find(hash, |x| k.eq(&x.0)) { + Some(mem::replace(&mut item.as_mut().1, v)) + } else { + let hash_builder = &self.hash_builder; + self.table + .insert(hash, (k, v), |x| make_hash(hash_builder, &x.0)); + None + } + } } /// Removes a key from the map, returning the value at the key if the key @@ -1447,12 +924,13 @@ impl HashMap /// assert_eq!(map.remove(&1), Some("a")); /// assert_eq!(map.remove(&1), None); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn remove(&mut self, k: &Q) -> Option where K: Borrow, Q: Hash + Eq { - self.search_mut(k).map(|bucket| pop_internal(bucket).1) + self.remove_entry(k).map(|(_, v)| v) } /// Removes a key from the map, returning the stored key and value if the @@ -1477,16 +955,21 @@ impl HashMap /// assert_eq!(map.remove(&1), None); /// # } /// ``` + #[inline] #[stable(feature = "hash_map_remove_entry", since = "1.27.0")] pub fn remove_entry(&mut self, k: &Q) -> Option<(K, V)> where K: Borrow, Q: Hash + Eq { - self.search_mut(k) - .map(|bucket| { - let (k, v, _) = pop_internal(bucket); - (k, v) - }) + unsafe { + let hash = make_hash(&self.hash_builder, &k); + if let Some(item) = self.table.find(hash, |x| k.eq(x.0.borrow())) { + self.table.erase_no_drop(&item); + Some(item.read()) + } else { + None + } + } } /// Retains only the elements specified by the predicate. @@ -1506,35 +989,16 @@ impl HashMap pub fn retain(&mut self, mut f: F) where F: FnMut(&K, &mut V) -> bool { - if self.table.size() == 0 { - return; - } - let mut elems_left = self.table.size(); - let mut bucket = Bucket::head_bucket(&mut self.table); - bucket.prev(); - let start_index = bucket.index(); - while elems_left != 0 { - bucket = match bucket.peek() { - Full(mut full) => { - elems_left -= 1; - let should_remove = { - let (k, v) = full.read_mut(); - !f(k, v) - }; - if should_remove { - let prev_raw = full.raw(); - let (_, _, t) = pop_internal(full); - Bucket::new_from(prev_raw, t) - } else { - full.into_bucket() - } - }, - Empty(b) => { - b.into_bucket() + // Here we only use `iter` as a temporary, preventing use-after-free + unsafe { + for item in self.table.iter() { + let &mut (ref key, ref mut value) = item.as_mut(); + if !f(key, value) { + // Erase the element from the table first since drop might panic. + self.table.erase_no_drop(&item); + item.drop(); } - }; - bucket.prev(); // reverse iteration - debug_assert!(elems_left == 0 || bucket.index() != start_index); + } } } } @@ -1574,7 +1038,7 @@ impl HashMap /// so that the map now contains keys which compare equal, search may start /// acting erratically, with two keys randomly masking each other. Implementations /// are free to assume this doesn't happen (within the limits of memory-safety). - #[inline(always)] + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn raw_entry_mut(&mut self) -> RawEntryBuilderMut { self.reserve(1); @@ -1596,6 +1060,7 @@ impl HashMap /// `get` should be preferred. /// /// Immutable raw entries have very limited use; you might instead want `raw_entry_mut`. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn raw_entry(&self) -> RawEntryBuilder { RawEntryBuilder { map: self } @@ -1642,6 +1107,7 @@ impl Default for HashMap S: BuildHasher + Default { /// Creates an empty `HashMap`, with the `Default` value for the hasher. + #[inline] fn default() -> HashMap { HashMap::with_hasher(Default::default()) } @@ -1675,14 +1141,19 @@ impl<'a, K, Q: ?Sized, V, S> Index<&'a Q> for HashMap /// [`HashMap`]: struct.HashMap.html #[stable(feature = "rust1", since = "1.0.0")] pub struct Iter<'a, K: 'a, V: 'a> { - inner: table::Iter<'a, K, V>, + inner: RawIter<(K, V)>, + _marker: PhantomData<&'a HashMap>, } // FIXME(#26925) Remove in favor of `#[derive(Clone)]` #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V> Clone for Iter<'a, K, V> { + #[inline] fn clone(&self) -> Iter<'a, K, V> { - Iter { inner: self.inner.clone() } + Iter { + inner: self.inner.clone(), + _marker: PhantomData, + } } } @@ -1704,7 +1175,20 @@ impl<'a, K: Debug, V: Debug> fmt::Debug for Iter<'a, K, V> { /// [`HashMap`]: struct.HashMap.html #[stable(feature = "rust1", since = "1.0.0")] pub struct IterMut<'a, K: 'a, V: 'a> { - inner: table::IterMut<'a, K, V>, + inner: RawIter<(K, V)>, + // To ensure invariance with respect to V + _marker: PhantomData<&'a mut V>, +} + +impl<'a, K, V> IterMut<'a, K, V> { + /// Returns a iterator of references over the remaining items. + #[inline] + pub(super) fn iter(&self) -> Iter { + Iter { + inner: self.inner.clone(), + _marker: PhantomData, + } + } } /// An owning iterator over the entries of a `HashMap`. @@ -1716,7 +1200,18 @@ pub struct IterMut<'a, K: 'a, V: 'a> { /// [`HashMap`]: struct.HashMap.html #[stable(feature = "rust1", since = "1.0.0")] pub struct IntoIter { - pub(super) inner: table::IntoIter, + inner: RawIntoIter<(K, V)>, +} + +impl IntoIter { + /// Returns a iterator of references over the remaining items. + #[inline] + pub(super) fn iter(&self) -> Iter { + Iter { + inner: self.inner.iter(), + _marker: PhantomData, + } + } } /// An iterator over the keys of a `HashMap`. @@ -1734,6 +1229,7 @@ pub struct Keys<'a, K: 'a, V: 'a> { // FIXME(#26925) Remove in favor of `#[derive(Clone)]` #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V> Clone for Keys<'a, K, V> { + #[inline] fn clone(&self) -> Keys<'a, K, V> { Keys { inner: self.inner.clone() } } @@ -1763,6 +1259,7 @@ pub struct Values<'a, K: 'a, V: 'a> { // FIXME(#26925) Remove in favor of `#[derive(Clone)]` #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V> Clone for Values<'a, K, V> { + #[inline] fn clone(&self) -> Values<'a, K, V> { Values { inner: self.inner.clone() } } @@ -1786,7 +1283,18 @@ impl<'a, K, V: Debug> fmt::Debug for Values<'a, K, V> { /// [`HashMap`]: struct.HashMap.html #[stable(feature = "drain", since = "1.6.0")] pub struct Drain<'a, K: 'a, V: 'a> { - pub(super) inner: table::Drain<'a, K, V>, + pub(super) inner: RawDrain<'a, (K, V)>, +} + +impl<'a, K, V> Drain<'a, K, V> { + /// Returns a iterator of references over the remaining items. + #[inline] + pub(super) fn iter(&self) -> Iter { + Iter { + inner: self.inner.iter(), + _marker: PhantomData, + } + } } /// A mutable iterator over the values of a `HashMap`. @@ -1801,47 +1309,6 @@ pub struct ValuesMut<'a, K: 'a, V: 'a> { inner: IterMut<'a, K, V>, } -enum InternalEntry { - Occupied { elem: FullBucket }, - Vacant { - hash: SafeHash, - elem: VacantEntryState, - }, - TableIsEmpty, -} - -impl InternalEntry { - #[inline] - fn into_occupied_bucket(self) -> Option> { - match self { - InternalEntry::Occupied { elem } => Some(elem), - _ => None, - } - } -} - -impl<'a, K, V> InternalEntry> { - #[inline] - fn into_entry(self, key: K) -> Option> { - match self { - InternalEntry::Occupied { elem } => { - Some(Occupied(OccupiedEntry { - key: Some(key), - elem, - })) - } - InternalEntry::Vacant { hash, elem } => { - Some(Vacant(VacantEntry { - hash, - key, - elem, - })) - } - InternalEntry::TableIsEmpty => None, - } - } -} - /// A builder for computing where in a HashMap a key-value pair would be stored. /// /// See the [`HashMap::raw_entry_mut`] docs for usage examples. @@ -1876,7 +1343,8 @@ pub enum RawEntryMut<'a, K: 'a, V: 'a, S: 'a> { /// [`RawEntryMut`]: enum.RawEntryMut.html #[unstable(feature = "hash_raw_entry", issue = "56167")] pub struct RawOccupiedEntryMut<'a, K: 'a, V: 'a> { - elem: FullBucket>, + elem: Bucket<(K, V)>, + table: &'a mut RawTable<(K, V)>, } /// A view into a vacant entry in a `HashMap`. @@ -1885,7 +1353,7 @@ pub struct RawOccupiedEntryMut<'a, K: 'a, V: 'a> { /// [`RawEntryMut`]: enum.RawEntryMut.html #[unstable(feature = "hash_raw_entry", issue = "56167")] pub struct RawVacantEntryMut<'a, K: 'a, V: 'a, S: 'a> { - elem: VacantEntryState>, + table: &'a mut RawTable<(K, V)>, hash_builder: &'a S, } @@ -1904,6 +1372,7 @@ impl<'a, K, V, S> RawEntryBuilderMut<'a, K, V, S> K: Eq + Hash, { /// Create a `RawEntryMut` from the given key. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn from_key(self, k: &Q) -> RawEntryMut<'a, K, V, S> where K: Borrow, @@ -1925,25 +1394,18 @@ impl<'a, K, V, S> RawEntryBuilderMut<'a, K, V, S> } #[inline] - fn search(self, hash: u64, is_match: F, compare_hashes: bool) -> RawEntryMut<'a, K, V, S> + fn search(self, hash: u64, mut is_match: F) -> RawEntryMut<'a, K, V, S> where for<'b> F: FnMut(&'b K) -> bool, { - match search_hashed_nonempty_mut(&mut self.map.table, - SafeHash::new(hash), - is_match, - compare_hashes) { - InternalEntry::Occupied { elem } => { - RawEntryMut::Occupied(RawOccupiedEntryMut { elem }) - } - InternalEntry::Vacant { elem, .. } => { - RawEntryMut::Vacant(RawVacantEntryMut { - elem, - hash_builder: &self.map.hash_builder, - }) - } - InternalEntry::TableIsEmpty => { - unreachable!() - } + match self.map.table.find(hash, |(k, _)| is_match(k)) { + Some(elem) => RawEntryMut::Occupied(RawOccupiedEntryMut { + elem, + table: &mut self.map.table, + }), + None => RawEntryMut::Vacant(RawVacantEntryMut { + table: &mut self.map.table, + hash_builder: &self.map.hash_builder, + }), } } /// Create a `RawEntryMut` from the given hash. @@ -1952,17 +1414,7 @@ impl<'a, K, V, S> RawEntryBuilderMut<'a, K, V, S> pub fn from_hash(self, hash: u64, is_match: F) -> RawEntryMut<'a, K, V, S> where for<'b> F: FnMut(&'b K) -> bool, { - self.search(hash, is_match, true) - } - - /// Search possible locations for an element with hash `hash` until `is_match` returns true for - /// one of them. There is no guarantee that all keys passed to `is_match` will have the provided - /// hash. - #[unstable(feature = "hash_raw_entry", issue = "56167")] - pub fn search_bucket(self, hash: u64, is_match: F) -> RawEntryMut<'a, K, V, S> - where for<'b> F: FnMut(&'b K) -> bool, - { - self.search(hash, is_match, false) + self.search(hash, is_match) } } @@ -1970,6 +1422,7 @@ impl<'a, K, V, S> RawEntryBuilder<'a, K, V, S> where S: BuildHasher, { /// Access an entry by key. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn from_key(self, k: &Q) -> Option<(&'a K, &'a V)> where K: Borrow, @@ -1981,6 +1434,7 @@ impl<'a, K, V, S> RawEntryBuilder<'a, K, V, S> } /// Access an entry by a key and its hash. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn from_key_hashed_nocheck(self, hash: u64, k: &Q) -> Option<(&'a K, &'a V)> where K: Borrow, @@ -1990,38 +1444,26 @@ impl<'a, K, V, S> RawEntryBuilder<'a, K, V, S> self.from_hash(hash, |q| q.borrow().eq(k)) } - fn search(self, hash: u64, is_match: F, compare_hashes: bool) -> Option<(&'a K, &'a V)> + #[inline] + fn search(self, hash: u64, mut is_match: F) -> Option<(&'a K, &'a V)> where F: FnMut(&K) -> bool { - if unsafe { unlikely(self.map.table.size() == 0) } { - return None; - } - match search_hashed_nonempty(&self.map.table, - SafeHash::new(hash), - is_match, - compare_hashes) { - InternalEntry::Occupied { elem } => Some(elem.into_refs()), - InternalEntry::Vacant { .. } => None, - InternalEntry::TableIsEmpty => unreachable!(), - } + self.map + .table + .find(hash, |(k, _)| is_match(k)) + .map(|item| unsafe { + let &(ref key, ref value) = item.as_ref(); + (key, value) + }) } /// Access an entry by hash. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn from_hash(self, hash: u64, is_match: F) -> Option<(&'a K, &'a V)> where F: FnMut(&K) -> bool { - self.search(hash, is_match, true) - } - - /// Search possible locations for an element with hash `hash` until `is_match` returns true for - /// one of them. There is no guarantee that all keys passed to `is_match` will have the provided - /// hash. - #[unstable(feature = "hash_raw_entry", issue = "56167")] - pub fn search_bucket(self, hash: u64, is_match: F) -> Option<(&'a K, &'a V)> - where F: FnMut(&K) -> bool - { - self.search(hash, is_match, false) + self.search(hash, is_match) } } @@ -2043,6 +1485,7 @@ impl<'a, K, V, S> RawEntryMut<'a, K, V, S> { /// *map.raw_entry_mut().from_key("poneyland").or_insert("poneyland", 10).1 *= 2; /// assert_eq!(map["poneyland"], 6); /// ``` + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn or_insert(self, default_key: K, default_val: V) -> (&'a mut K, &'a mut V) where K: Hash, @@ -2071,6 +1514,7 @@ impl<'a, K, V, S> RawEntryMut<'a, K, V, S> { /// /// assert_eq!(map["poneyland"], "hoho".to_string()); /// ``` + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn or_insert_with(self, default: F) -> (&'a mut K, &'a mut V) where F: FnOnce() -> (K, V), @@ -2109,6 +1553,7 @@ impl<'a, K, V, S> RawEntryMut<'a, K, V, S> { /// .or_insert("poneyland", 0); /// assert_eq!(map["poneyland"], 43); /// ``` + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn and_modify(self, f: F) -> Self where F: FnOnce(&mut K, &mut V) @@ -2128,91 +1573,116 @@ impl<'a, K, V, S> RawEntryMut<'a, K, V, S> { impl<'a, K, V> RawOccupiedEntryMut<'a, K, V> { /// Gets a reference to the key in the entry. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn key(&self) -> &K { - self.elem.read().0 + unsafe { &self.elem.as_ref().0 } } /// Gets a mutable reference to the key in the entry. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn key_mut(&mut self) -> &mut K { - self.elem.read_mut().0 + unsafe { &mut self.elem.as_mut().0 } } /// Converts the entry into a mutable reference to the key in the entry /// with a lifetime bound to the map itself. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn into_key(self) -> &'a mut K { - self.elem.into_mut_refs().0 + unsafe { &mut self.elem.as_mut().0 } } /// Gets a reference to the value in the entry. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn get(&self) -> &V { - self.elem.read().1 + unsafe { &self.elem.as_ref().1 } } /// Converts the OccupiedEntry into a mutable reference to the value in the entry /// with a lifetime bound to the map itself. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn into_mut(self) -> &'a mut V { - self.elem.into_mut_refs().1 + unsafe { &mut self.elem.as_mut().1 } } /// Gets a mutable reference to the value in the entry. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn get_mut(&mut self) -> &mut V { - self.elem.read_mut().1 + unsafe { &mut self.elem.as_mut().1 } } /// Gets a reference to the key and value in the entry. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn get_key_value(&mut self) -> (&K, &V) { - self.elem.read() + unsafe { + let &(ref key, ref value) = self.elem.as_ref(); + (key, value) + } } /// Gets a mutable reference to the key and value in the entry. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn get_key_value_mut(&mut self) -> (&mut K, &mut V) { - self.elem.read_mut() + unsafe { + let &mut (ref mut key, ref mut value) = self.elem.as_mut(); + (key, value) + } } /// Converts the OccupiedEntry into a mutable reference to the key and value in the entry /// with a lifetime bound to the map itself. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn into_key_value(self) -> (&'a mut K, &'a mut V) { - self.elem.into_mut_refs() + unsafe { + let &mut (ref mut key, ref mut value) = self.elem.as_mut(); + (key, value) + } } /// Sets the value of the entry, and returns the entry's old value. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn insert(&mut self, value: V) -> V { mem::replace(self.get_mut(), value) } /// Sets the value of the entry, and returns the entry's old value. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn insert_key(&mut self, key: K) -> K { mem::replace(self.key_mut(), key) } /// Takes the value out of the entry, and returns it. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn remove(self) -> V { - pop_internal(self.elem).1 + self.remove_entry().1 } /// Take the ownership of the key and value from the map. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn remove_entry(self) -> (K, V) { - let (k, v, _) = pop_internal(self.elem); - (k, v) + unsafe { + self.table.erase_no_drop(&self.elem); + self.elem.read() + } } } impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { /// Sets the value of the entry with the VacantEntry's key, /// and returns a mutable reference to it. + #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] pub fn insert(self, key: K, value: V) -> (&'a mut K, &'a mut V) where K: Hash, @@ -2227,23 +1697,19 @@ impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { /// and returns a mutable reference to it. #[inline] #[unstable(feature = "hash_raw_entry", issue = "56167")] - pub fn insert_hashed_nocheck(self, hash: u64, key: K, value: V) -> (&'a mut K, &'a mut V) { - let hash = SafeHash::new(hash); - let b = match self.elem { - NeqElem(mut bucket, disp) => { - if disp >= DISPLACEMENT_THRESHOLD { - bucket.table_mut().set_tag(true); - } - robin_hood(bucket, disp, hash, key, value) - }, - NoElem(mut bucket, disp) => { - if disp >= DISPLACEMENT_THRESHOLD { - bucket.table_mut().set_tag(true); - } - bucket.put(hash, key, value) - }, - }; - b.into_mut_refs() + pub fn insert_hashed_nocheck(self, hash: u64, key: K, value: V) -> (&'a mut K, &'a mut V) + where + K: Hash, + S: BuildHasher, + { + unsafe { + let hash_builder = self.hash_builder; + let elem = self + .table + .insert(hash, (key, value), |x| make_hash(hash_builder, &x.0)); + let &mut (ref mut key, ref mut value) = elem.as_mut(); + (key, value) + } } } @@ -2343,7 +1809,23 @@ impl<'a, K: 'a + Debug, V: 'a + Debug> Debug for Entry<'a, K, V> { #[stable(feature = "rust1", since = "1.0.0")] pub struct OccupiedEntry<'a, K: 'a, V: 'a> { key: Option, - elem: FullBucket>, + elem: Bucket<(K, V)>, + table: &'a mut RawTable<(K, V)>, +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<'a, K, V> Send for OccupiedEntry<'a, K, V> +where + K: Send, + V: Send, +{ +} +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<'a, K, V> Sync for OccupiedEntry<'a, K, V> +where + K: Sync, + V: Sync, +{ } #[stable(feature= "debug_hash_map", since = "1.12.0")] @@ -2362,9 +1844,9 @@ impl<'a, K: 'a + Debug, V: 'a + Debug> Debug for OccupiedEntry<'a, K, V> { /// [`Entry`]: enum.Entry.html #[stable(feature = "rust1", since = "1.0.0")] pub struct VacantEntry<'a, K: 'a, V: 'a> { - hash: SafeHash, + hash: u64, key: K, - elem: VacantEntryState>, + table: &'a mut RawTable<(K, V)>, } #[stable(feature= "debug_hash_map", since = "1.12.0")] @@ -2376,15 +1858,6 @@ impl<'a, K: 'a + Debug, V: 'a> Debug for VacantEntry<'a, K, V> { } } -/// Possible states of a VacantEntry. -enum VacantEntryState { - /// The index is occupied, but the key to insert has precedence, - /// and will kick the current one out on insertion. - NeqElem(FullBucket, usize), - /// The index is genuinely vacant. - NoElem(EmptyBucket, usize), -} - #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V, S> IntoIterator for &'a HashMap where K: Eq + Hash, @@ -2393,6 +1866,7 @@ impl<'a, K, V, S> IntoIterator for &'a HashMap type Item = (&'a K, &'a V); type IntoIter = Iter<'a, K, V>; + #[inline] fn into_iter(self) -> Iter<'a, K, V> { self.iter() } @@ -2406,6 +1880,7 @@ impl<'a, K, V, S> IntoIterator for &'a mut HashMap type Item = (&'a K, &'a mut V); type IntoIter = IterMut<'a, K, V>; + #[inline] fn into_iter(self) -> IterMut<'a, K, V> { self.iter_mut() } @@ -2436,6 +1911,7 @@ impl IntoIterator for HashMap /// // Not possible with .iter() /// let vec: Vec<(&str, i32)> = map.into_iter().collect(); /// ``` + #[inline] fn into_iter(self) -> IntoIter { IntoIter { inner: self.table.into_iter() } } @@ -2447,7 +1923,10 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> { #[inline] fn next(&mut self) -> Option<(&'a K, &'a V)> { - self.inner.next() + self.inner.next().map(|x| unsafe { + let r = x.as_ref(); + (&r.0, &r.1) + }) } #[inline] fn size_hint(&self) -> (usize, Option) { @@ -2471,7 +1950,10 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> { #[inline] fn next(&mut self) -> Option<(&'a K, &'a mut V)> { - self.inner.next() + self.inner.next().map(|x| unsafe { + let r = x.as_mut(); + (&r.0, &mut r.1) + }) } #[inline] fn size_hint(&self) -> (usize, Option) { @@ -2495,7 +1977,7 @@ impl<'a, K, V> fmt::Debug for IterMut<'a, K, V> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_list() - .entries(self.inner.iter()) + .entries(self.iter()) .finish() } } @@ -2506,7 +1988,7 @@ impl Iterator for IntoIter { #[inline] fn next(&mut self) -> Option<(K, V)> { - self.inner.next().map(|(_, k, v)| (k, v)) + self.inner.next() } #[inline] fn size_hint(&self) -> (usize, Option) { @@ -2527,7 +2009,7 @@ impl FusedIterator for IntoIter {} impl fmt::Debug for IntoIter { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_list() - .entries(self.inner.iter()) + .entries(self.iter()) .finish() } } @@ -2608,7 +2090,7 @@ impl<'a, K, V> fmt::Debug for ValuesMut<'a, K, V> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_list() - .entries(self.inner.inner.iter()) + .entries(self.inner.iter()) .finish() } } @@ -2619,7 +2101,7 @@ impl<'a, K, V> Iterator for Drain<'a, K, V> { #[inline] fn next(&mut self) -> Option<(K, V)> { - self.inner.next().map(|(_, k, v)| (k, v)) + self.inner.next() } #[inline] fn size_hint(&self) -> (usize, Option) { @@ -2643,7 +2125,7 @@ impl<'a, K, V> fmt::Debug for Drain<'a, K, V> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_list() - .entries(self.inner.iter()) + .entries(self.iter()) .finish() } } @@ -2666,6 +2148,7 @@ impl<'a, K, V> Entry<'a, K, V> { /// *map.entry("poneyland").or_insert(10) *= 2; /// assert_eq!(map["poneyland"], 6); /// ``` + #[inline] pub fn or_insert(self, default: V) -> &'a mut V { match self { Occupied(entry) => entry.into_mut(), @@ -2689,6 +2172,7 @@ impl<'a, K, V> Entry<'a, K, V> { /// /// assert_eq!(map["poneyland"], "hoho".to_string()); /// ``` + #[inline] pub fn or_insert_with V>(self, default: F) -> &'a mut V { match self { Occupied(entry) => entry.into_mut(), @@ -2706,6 +2190,7 @@ impl<'a, K, V> Entry<'a, K, V> { /// let mut map: HashMap<&str, u32> = HashMap::new(); /// assert_eq!(map.entry("poneyland").key(), &"poneyland"); /// ``` + #[inline] #[stable(feature = "map_entry_keys", since = "1.10.0")] pub fn key(&self) -> &K { match *self { @@ -2734,6 +2219,7 @@ impl<'a, K, V> Entry<'a, K, V> { /// .or_insert(42); /// assert_eq!(map["poneyland"], 43); /// ``` + #[inline] #[stable(feature = "entry_and_modify", since = "1.26.0")] pub fn and_modify(self, f: F) -> Self where F: FnOnce(&mut V) @@ -2766,6 +2252,7 @@ impl<'a, K, V: Default> Entry<'a, K, V> { /// assert_eq!(map["poneyland"], None); /// # } /// ``` + #[inline] pub fn or_default(self) -> &'a mut V { match self { Occupied(entry) => entry.into_mut(), @@ -2786,9 +2273,10 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { /// map.entry("poneyland").or_insert(12); /// assert_eq!(map.entry("poneyland").key(), &"poneyland"); /// ``` + #[inline] #[stable(feature = "map_entry_keys", since = "1.10.0")] pub fn key(&self) -> &K { - self.elem.read().0 + unsafe { &self.elem.as_ref().0 } } /// Take the ownership of the key and value from the map. @@ -2809,10 +2297,13 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { /// /// assert_eq!(map.contains_key("poneyland"), false); /// ``` + #[inline] #[stable(feature = "map_entry_recover_keys2", since = "1.12.0")] pub fn remove_entry(self) -> (K, V) { - let (k, v, _) = pop_internal(self.elem); - (k, v) + unsafe { + self.table.erase_no_drop(&self.elem); + self.elem.read() + } } /// Gets a reference to the value in the entry. @@ -2830,9 +2321,10 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { /// assert_eq!(o.get(), &12); /// } /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn get(&self) -> &V { - self.elem.read().1 + unsafe { &self.elem.as_ref().1 } } /// Gets a mutable reference to the value in the entry. @@ -2862,9 +2354,10 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { /// /// assert_eq!(map["poneyland"], 24); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn get_mut(&mut self) -> &mut V { - self.elem.read_mut().1 + unsafe { &mut self.elem.as_mut().1 } } /// Converts the OccupiedEntry into a mutable reference to the value in the entry @@ -2890,9 +2383,10 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { /// /// assert_eq!(map["poneyland"], 22); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn into_mut(self) -> &'a mut V { - self.elem.into_mut_refs().1 + unsafe { &mut self.elem.as_mut().1 } } /// Sets the value of the entry, and returns the entry's old value. @@ -2912,6 +2406,7 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { /// /// assert_eq!(map["poneyland"], 15); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn insert(&mut self, mut value: V) -> V { let old_value = self.get_mut(); @@ -2936,16 +2431,10 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { /// /// assert_eq!(map.contains_key("poneyland"), false); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn remove(self) -> V { - pop_internal(self.elem).1 - } - - /// Returns a key that was used for search. - /// - /// The key was retained for further use. - fn take_key(&mut self) -> Option { - self.key.take() + self.remove_entry().1 } /// Replaces the entry, returning the old key and value. The new key in the hash map will be @@ -2969,12 +2458,13 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { /// } /// /// ``` + #[inline] #[unstable(feature = "map_entry_replace", issue = "44286")] - pub fn replace_entry(mut self, value: V) -> (K, V) { - let (old_key, old_value) = self.elem.read_mut(); + pub fn replace_entry(self, value: V) -> (K, V) { + let entry = unsafe { self.elem.as_mut() }; - let old_key = mem::replace(old_key, self.key.unwrap()); - let old_value = mem::replace(old_value, value); + let old_key = mem::replace(&mut entry.0, self.key.unwrap()); + let old_value = mem::replace(&mut entry.1, value); (old_key, old_value) } @@ -3004,10 +2494,11 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { /// } /// } /// ``` + #[inline] #[unstable(feature = "map_entry_replace", issue = "44286")] - pub fn replace_key(mut self) -> K { - let (old_key, _) = self.elem.read_mut(); - mem::replace(old_key, self.key.unwrap()) + pub fn replace_key(self) -> K { + let entry = unsafe { self.elem.as_mut() }; + mem::replace(&mut entry.0, self.key.unwrap()) } } @@ -3023,6 +2514,7 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { /// let mut map: HashMap<&str, u32> = HashMap::new(); /// assert_eq!(map.entry("poneyland").key(), &"poneyland"); /// ``` + #[inline] #[stable(feature = "map_entry_keys", since = "1.10.0")] pub fn key(&self) -> &K { &self.key @@ -3042,6 +2534,7 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { /// v.into_key(); /// } /// ``` + #[inline] #[stable(feature = "map_entry_recover_keys2", since = "1.12.0")] pub fn into_key(self) -> K { self.key @@ -3063,23 +2556,11 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { /// } /// assert_eq!(map["poneyland"], 37); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn insert(self, value: V) -> &'a mut V { - let b = match self.elem { - NeqElem(mut bucket, disp) => { - if disp >= DISPLACEMENT_THRESHOLD { - bucket.table_mut().set_tag(true); - } - robin_hood(bucket, disp, self.hash, self.key, value) - }, - NoElem(mut bucket, disp) => { - if disp >= DISPLACEMENT_THRESHOLD { - bucket.table_mut().set_tag(true); - } - bucket.put(self.hash, self.key, value) - }, - }; - b.into_mut_refs().1 + let bucket = self.table.insert_no_grow(self.hash, (self.key, value)); + unsafe { &mut bucket.as_mut().1 } } } @@ -3088,6 +2569,7 @@ impl FromIterator<(K, V)> for HashMap where K: Eq + Hash, S: BuildHasher + Default { + #[inline] fn from_iter>(iter: T) -> HashMap { let mut map = HashMap::with_hasher(Default::default()); map.extend(iter); @@ -3100,6 +2582,7 @@ impl Extend<(K, V)> for HashMap where K: Eq + Hash, S: BuildHasher { + #[inline] fn extend>(&mut self, iter: T) { // Keys may be already present or show multiple times in the iterator. // Reserve the entire hint lower bound if the map is empty. @@ -3124,6 +2607,7 @@ impl<'a, K, V, S> Extend<(&'a K, &'a V)> for HashMap V: Copy, S: BuildHasher { + #[inline] fn extend>(&mut self, iter: T) { self.extend(iter.into_iter().map(|(&key, &value)| (key, value))); } @@ -3223,6 +2707,7 @@ impl DefaultHasher { /// instances created through `new` or `default`. #[stable(feature = "hashmap_default_hasher", since = "1.13.0")] #[allow(deprecated)] + #[inline] pub fn new() -> DefaultHasher { DefaultHasher(SipHasher13::new_with_keys(0, 0)) } @@ -3232,6 +2717,7 @@ impl DefaultHasher { impl Default for DefaultHasher { /// Creates a new `DefaultHasher` using [`new`][DefaultHasher::new]. /// See its documentation for more. + #[inline] fn default() -> DefaultHasher { DefaultHasher::new() } @@ -3266,39 +2752,6 @@ impl fmt::Debug for RandomState { } } -impl super::Recover for HashMap - where K: Eq + Hash + Borrow, - S: BuildHasher, - Q: Eq + Hash -{ - type Key = K; - - #[inline] - fn get(&self, key: &Q) -> Option<&K> { - self.search(key).map(|bucket| bucket.into_refs().0) - } - - fn take(&mut self, key: &Q) -> Option { - self.search_mut(key).map(|bucket| pop_internal(bucket).0) - } - - #[inline] - fn replace(&mut self, key: K) -> Option { - self.reserve(1); - - match self.entry(key) { - Occupied(mut occupied) => { - let key = occupied.take_key().unwrap(); - Some(mem::replace(occupied.elem.read_mut().0, key)) - } - Vacant(vacant) => { - vacant.insert(()); - None - } - } - } -} - #[allow(dead_code)] fn assert_covariance() { fn map_key<'new>(v: HashMap<&'static str, u8>) -> HashMap<&'new str, u8> { @@ -3345,7 +2798,6 @@ mod test_map { use cell::RefCell; use rand::{thread_rng, Rng}; use realstd::collections::CollectionAllocErr::*; - use realstd::mem::size_of; use realstd::usize; #[test] @@ -3861,7 +3313,7 @@ mod test_map { let mut m = HashMap::new(); assert_eq!(m.len(), 0); - assert_eq!(m.raw_capacity(), 0); + assert_eq!(m.raw_capacity(), 1); assert!(m.is_empty()); m.insert(0, 0); @@ -3901,7 +3353,7 @@ mod test_map { m.shrink_to_fit(); assert_eq!(m.raw_capacity(), raw_cap); // again, a little more than half full - for _ in 0..raw_cap / 2 - 1 { + for _ in 0..raw_cap / 2 { i -= 1; m.remove(&i); } @@ -4208,51 +3660,20 @@ mod test_map { assert_eq!(map[&6], 60); } - #[test] - fn test_adaptive() { - const TEST_LEN: usize = 5000; - // by cloning we get maps with the same hasher seed - let mut first = HashMap::new(); - let mut second = first.clone(); - first.extend((0..TEST_LEN).map(|i| (i, i))); - second.extend((TEST_LEN..TEST_LEN * 2).map(|i| (i, i))); - - for (&k, &v) in &second { - let prev_cap = first.capacity(); - let expect_grow = first.len() == prev_cap; - first.insert(k, v); - if !expect_grow && first.capacity() != prev_cap { - return; - } - } - panic!("Adaptive early resize failed"); - } - #[test] fn test_try_reserve() { - - let mut empty_bytes: HashMap = HashMap::new(); + let mut empty_bytes: HashMap = HashMap::new(); const MAX_USIZE: usize = usize::MAX; - // HashMap and RawTables use complicated size calculations - // hashes_size is sizeof(HashUint) * capacity; - // pairs_size is sizeof((K. V)) * capacity; - // alignment_hashes_size is 8 - // alignment_pairs size is 4 - let size_of_multiplier = (size_of::() + size_of::<(u8, u8)>()).next_power_of_two(); - // The following formula is used to calculate the new capacity - let max_no_ovf = ((MAX_USIZE / 11) * 10) / size_of_multiplier - 1; - if let Err(CapacityOverflow) = empty_bytes.try_reserve(MAX_USIZE) { - } else { panic!("usize::MAX should trigger an overflow!"); } + } else { + panic!("usize::MAX should trigger an overflow!"); + } - if size_of::() < 8 { - if let Err(CapacityOverflow) = empty_bytes.try_reserve(max_no_ovf) { - } else { panic!("isize::MAX + 1 should trigger a CapacityOverflow!") } + if let Err(AllocErr) = empty_bytes.try_reserve(MAX_USIZE / 8) { } else { - if let Err(AllocErr) = empty_bytes.try_reserve(max_no_ovf) { - } else { panic!("isize::MAX + 1 should trigger an OOM!") } + panic!("usize::MAX / 8 should trigger an OOM!") } } @@ -4284,7 +3705,6 @@ mod test_map { assert_eq!(map.raw_entry().from_key(&1).unwrap(), (&1, &100)); assert_eq!(map.raw_entry().from_hash(hash1, |k| *k == 1).unwrap(), (&1, &100)); assert_eq!(map.raw_entry().from_key_hashed_nocheck(hash1, &1).unwrap(), (&1, &100)); - assert_eq!(map.raw_entry().search_bucket(hash1, |k| *k == 1).unwrap(), (&1, &100)); assert_eq!(map.len(), 6); // Existing key (update) @@ -4300,7 +3720,6 @@ mod test_map { assert_eq!(map.raw_entry().from_key(&2).unwrap(), (&2, &200)); assert_eq!(map.raw_entry().from_hash(hash2, |k| *k == 2).unwrap(), (&2, &200)); assert_eq!(map.raw_entry().from_key_hashed_nocheck(hash2, &2).unwrap(), (&2, &200)); - assert_eq!(map.raw_entry().search_bucket(hash2, |k| *k == 2).unwrap(), (&2, &200)); assert_eq!(map.len(), 6); // Existing key (take) @@ -4314,7 +3733,6 @@ mod test_map { assert_eq!(map.raw_entry().from_key(&3), None); assert_eq!(map.raw_entry().from_hash(hash3, |k| *k == 3), None); assert_eq!(map.raw_entry().from_key_hashed_nocheck(hash3, &3), None); - assert_eq!(map.raw_entry().search_bucket(hash3, |k| *k == 3), None); assert_eq!(map.len(), 5); @@ -4337,7 +3755,6 @@ mod test_map { assert_eq!(map.raw_entry().from_key(&k), kv); assert_eq!(map.raw_entry().from_hash(hash, |q| *q == k), kv); assert_eq!(map.raw_entry().from_key_hashed_nocheck(hash, &k), kv); - assert_eq!(map.raw_entry().search_bucket(hash, |q| *q == k), kv); match map.raw_entry_mut().from_key(&k) { Occupied(mut o) => assert_eq!(Some(o.get_key_value()), kv), @@ -4351,10 +3768,6 @@ mod test_map { Occupied(mut o) => assert_eq!(Some(o.get_key_value()), kv), Vacant(_) => assert_eq!(v, None), } - match map.raw_entry_mut().search_bucket(hash, |q| *q == k) { - Occupied(mut o) => assert_eq!(Some(o.get_key_value()), kv), - Vacant(_) => assert_eq!(v, None), - } } } diff --git a/src/libstd/collections/hash/mod.rs b/src/libstd/collections/hash/mod.rs index 7a22bec5a3f89..82f6bc0d0e25e 100644 --- a/src/libstd/collections/hash/mod.rs +++ b/src/libstd/collections/hash/mod.rs @@ -11,14 +11,6 @@ //! Unordered containers, implemented as hash-tables mod bench; -mod table; +mod raw; pub mod map; pub mod set; - -trait Recover { - type Key; - - fn get(&self, key: &Q) -> Option<&Self::Key>; - fn take(&mut self, key: &Q) -> Option; - fn replace(&mut self, key: Self::Key) -> Option; -} diff --git a/src/libstd/collections/hash/raw/bitmask.rs b/src/libstd/collections/hash/raw/bitmask.rs new file mode 100644 index 0000000000000..11193a0aad8e8 --- /dev/null +++ b/src/libstd/collections/hash/raw/bitmask.rs @@ -0,0 +1,100 @@ +use super::imp::{BitMaskWord, BITMASK_MASK, BITMASK_STRIDE}; +use core::intrinsics; + +/// A bit mask which contains the result of a `Match` operation on a `Group` and +/// allows iterating through them. +/// +/// The bit mask is arranged so that low-order bits represent lower memory +/// addresses for group match results. +/// +/// For implementation reasons, the bits in the set may be sparsely packed, so +/// that there is only one bit-per-byte used (the high bit, 7). If this is the +/// case, `BITMASK_STRIDE` will be 8 to indicate a divide-by-8 should be +/// performed on counts/indices to normalize this difference. `BITMASK_MASK` is +/// similarly a mask of all the actually-used bits. +#[derive(Copy, Clone)] +pub struct BitMask(pub BitMaskWord); + +impl BitMask { + /// Returns a new `BitMask` with all bits inverted. + #[inline] + #[must_use] + pub fn invert(self) -> BitMask { + BitMask(self.0 ^ BITMASK_MASK) + } + + /// Returns a new `BitMask` with the lowest bit removed. + #[inline] + #[must_use] + pub fn remove_lowest_bit(self) -> BitMask { + BitMask(self.0 & (self.0 - 1)) + } + /// Returns whether the `BitMask` has at least one set bit. + #[inline] + pub fn any_bit_set(self) -> bool { + self.0 != 0 + } + + /// Returns the first set bit in the `BitMask`, if there is one. + #[inline] + pub fn lowest_set_bit(self) -> Option { + if self.0 == 0 { + None + } else { + Some(unsafe { self.lowest_set_bit_nonzero() }) + } + } + + /// Returns the first set bit in the `BitMask`, if there is one. The + /// bitmask must not be empty. + #[inline] + pub unsafe fn lowest_set_bit_nonzero(self) -> usize { + intrinsics::cttz_nonzero(self.0) as usize / BITMASK_STRIDE + } + + /// Returns the number of trailing zeroes in the `BitMask`. + #[inline] + pub fn trailing_zeros(self) -> usize { + // ARM doesn't have a trailing_zeroes instruction, and instead uses + // reverse_bits (RBIT) + leading_zeroes (CLZ). However older ARM + // versions (pre-ARMv7) don't have RBIT and need to emulate it + // instead. Since we only have 1 bit set in each byte on ARM, we can + // use swap_bytes (REV) + leading_zeroes instead. + if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 { + self.0.swap_bytes().leading_zeros() as usize / BITMASK_STRIDE + } else { + self.0.trailing_zeros() as usize / BITMASK_STRIDE + } + } + + /// Returns the number of leading zeroes in the `BitMask`. + #[inline] + pub fn leading_zeros(self) -> usize { + self.0.leading_zeros() as usize / BITMASK_STRIDE + } +} + +impl IntoIterator for BitMask { + type Item = usize; + type IntoIter = BitMaskIter; + + #[inline] + fn into_iter(self) -> BitMaskIter { + BitMaskIter(self) + } +} + +/// Iterator over the contents of a `BitMask`, returning the indicies of set +/// bits. +pub struct BitMaskIter(BitMask); + +impl Iterator for BitMaskIter { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + let bit = self.0.lowest_set_bit()?; + self.0 = self.0.remove_lowest_bit(); + Some(bit) + } +} diff --git a/src/libstd/collections/hash/raw/generic.rs b/src/libstd/collections/hash/raw/generic.rs new file mode 100644 index 0000000000000..0e144fb332ee0 --- /dev/null +++ b/src/libstd/collections/hash/raw/generic.rs @@ -0,0 +1,141 @@ +use super::bitmask::BitMask; +use super::EMPTY; +use core::{mem, ptr}; + +// Use the native word size as the group size. Using a 64-bit group size on +// a 32-bit architecture will just end up being more expensive because +// shifts and multiplies will need to be emulated. +#[cfg(any( + target_pointer_width = "64", + target_arch = "aarch64", + target_arch = "x86_64", +))] +type GroupWord = u64; +#[cfg(all( + target_pointer_width = "32", + not(target_arch = "aarch64"), + not(target_arch = "x86_64"), +))] +type GroupWord = u32; + +pub type BitMaskWord = GroupWord; +pub const BITMASK_STRIDE: usize = 8; +// We only care about the highest bit of each byte for the mask. +pub const BITMASK_MASK: BitMaskWord = 0x8080_8080_8080_8080u64 as GroupWord; + +/// Helper function to replicate a byte across a `GroupWord`. +#[inline] +fn repeat(byte: u8) -> GroupWord { + let repeat = byte as GroupWord; + let repeat = repeat | repeat.wrapping_shl(8); + let repeat = repeat | repeat.wrapping_shl(16); + // This last line is a no-op with a 32-bit GroupWord + repeat | repeat.wrapping_shl(32) +} + +/// Abstraction over a group of control bytes which can be scanned in +/// parallel. +/// +/// This implementation uses a word-sized integer. +#[derive(Copy, Clone)] +pub struct Group(GroupWord); + +// We perform all operations in the native endianess, and convert to +// little-endian just before creating a BitMask. The can potentially +// enable the compiler to eliminate unnecessary byte swaps if we are +// only checking whether a BitMask is empty. +impl Group { + /// Number of bytes in the group. + pub const WIDTH: usize = mem::size_of::(); + + /// Returns a full group of empty bytes, suitable for use as the initial + /// value for an empty hash table. + /// + /// This is guaranteed to be aligned to the group size. + #[inline] + pub fn static_empty() -> &'static [u8] { + union AlignedBytes { + _align: Group, + bytes: [u8; Group::WIDTH], + }; + const ALIGNED_BYTES: AlignedBytes = AlignedBytes { + bytes: [EMPTY; Group::WIDTH], + }; + unsafe { &ALIGNED_BYTES.bytes } + } + + /// Loads a group of bytes starting at the given address. + #[inline] + pub unsafe fn load(ptr: *const u8) -> Group { + Group(ptr::read_unaligned(ptr as *const _)) + } + + /// Loads a group of bytes starting at the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub unsafe fn load_aligned(ptr: *const u8) -> Group { + debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); + Group(ptr::read(ptr as *const _)) + } + + /// Stores the group of bytes to the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub unsafe fn store_aligned(&self, ptr: *mut u8) { + debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); + ptr::write(ptr as *mut _, self.0); + } + + /// Returns a `BitMask` indicating all bytes in the group which *may* + /// have the given value. + /// + /// This function may return a false positive in certain cases where + /// the byte in the group differs from the searched value only in its + /// lowest bit. This is fine because: + /// - This never happens for `EMPTY` and `DELETED`, only full entries. + /// - The check for key equality will catch these. + /// - This only happens if there is at least 1 true match. + /// - The chance of this happening is very low (< 1% chance per byte). + #[inline] + pub fn match_byte(&self, byte: u8) -> BitMask { + // This algorithm is derived from + // http://graphics.stanford.edu/~seander/bithacks.html##ValueInWord + let cmp = self.0 ^ repeat(byte); + BitMask((cmp.wrapping_sub(repeat(0x01)) & !cmp & repeat(0x80)).to_le()) + } + + /// Returns a `BitMask` indicating all bytes in the group which are + /// `EMPTY`. + #[inline] + pub fn match_empty(&self) -> BitMask { + // If the high bit is set, then the byte must be either: + // 1111_1111 (EMPTY) or 1000_0000 (DELETED). + // So we can just check if the top two bits are 1 by ANDing them. + BitMask((self.0 & (self.0 << 1) & repeat(0x80)).to_le()) + } + + /// Returns a `BitMask` indicating all bytes in the group which are + /// `EMPTY` or `DELETED`. + #[inline] + pub fn match_empty_or_deleted(&self) -> BitMask { + // A byte is EMPTY or DELETED iff the high bit is set + BitMask((self.0 & repeat(0x80)).to_le()) + } + + /// Performs the following transformation on all bytes in the group: + /// - `EMPTY => EMPTY` + /// - `DELETED => EMPTY` + /// - `FULL => DELETED` + #[inline] + pub fn convert_special_to_empty_and_full_to_deleted(&self) -> Group { + // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 + // and high_bit = 0 (FULL) to 1000_0000 + // + // Here's this logic expanded to concrete values: + // let full = 1000_0000 (true) or 0000_0000 (false) + // !1000_0000 + 1 = 0111_1111 + 1 = 1000_0000 (no carry) + // !0000_0000 + 0 = 1111_1111 + 0 = 1111_1111 (no carry) + let full = !self.0 & repeat(0x80); + Group(!full + (full >> 7)) + } +} diff --git a/src/libstd/collections/hash/raw/mod.rs b/src/libstd/collections/hash/raw/mod.rs new file mode 100644 index 0000000000000..fc8e2dd0120fe --- /dev/null +++ b/src/libstd/collections/hash/raw/mod.rs @@ -0,0 +1,1212 @@ +use self::scopeguard::guard; +use alloc::{alloc, dealloc, handle_alloc_error}; +use collections::CollectionAllocErr; +use core::alloc::Layout; +use core::hint; +use core::iter::FusedIterator; +use core::marker::PhantomData; +use core::mem; +use core::mem::ManuallyDrop; +use core::ops::Range; +use core::ptr::NonNull; + +// Extracted from the scopeguard crate +mod scopeguard { + use core::ops::{Deref, DerefMut}; + pub struct ScopeGuard + where + F: FnMut(&mut T), + { + dropfn: F, + value: T, + } + #[inline] + pub fn guard(value: T, dropfn: F) -> ScopeGuard + where + F: FnMut(&mut T), + { + ScopeGuard { dropfn, value } + } + impl Deref for ScopeGuard + where + F: FnMut(&mut T), + { + type Target = T; + #[inline] + fn deref(&self) -> &T { + &self.value + } + } + impl DerefMut for ScopeGuard + where + F: FnMut(&mut T), + { + #[inline] + fn deref_mut(&mut self) -> &mut T { + &mut self.value + } + } + impl Drop for ScopeGuard + where + F: FnMut(&mut T), + { + #[inline] + fn drop(&mut self) { + (self.dropfn)(&mut self.value) + } + } +} + +// Branch prediction hint. This is currently only available on nightly but it +// consistently improves performance by 10-15%. +use core::intrinsics::{likely, unlikely}; + +#[inline] +unsafe fn offset_from(to: *const T, from: *const T) -> usize { + to.offset_from(from) as usize +} + +// Use the SSE2 implementation if possible: it allows us to scan 16 buckets at +// once instead of 8. We don't bother with AVX since it would require runtime +// dispatch and wouldn't gain us much anyways: the probability of finding a +// match drops off drastically after the first few buckets. +// +// I attempted an implementation on ARM using NEON instructions, but it turns +// out that most NEON instructions have multi-cycle latency, which in the end +// outweighs any gains over the generic implementation. +#[cfg(all( + not(stage0), + target_feature = "sse2", + any(target_arch = "x86", target_arch = "x86_64") +))] +#[path = "sse2.rs"] +mod imp; +#[cfg(not(all( + not(stage0), + target_feature = "sse2", + any(target_arch = "x86", target_arch = "x86_64") +)))] +#[path = "generic.rs"] +mod imp; + +mod bitmask; + +use self::bitmask::BitMask; +use self::imp::Group; + +/// Whether memory allocation errors should return an error or abort. +enum Fallibility { + Fallible, + Infallible, +} + +impl Fallibility { + /// Error to return on capacity overflow. + #[inline] + fn capacity_overflow(&self) -> CollectionAllocErr { + match *self { + Fallibility::Fallible => CollectionAllocErr::CapacityOverflow, + Fallibility::Infallible => panic!("Hash table capacity overflow"), + } + } + + /// Error to return on allocation error. + #[inline] + fn alloc_err(&self, layout: Layout) -> CollectionAllocErr { + match *self { + Fallibility::Fallible => CollectionAllocErr::AllocErr, + Fallibility::Infallible => handle_alloc_error(layout), + } + } +} + +/// Control byte value for an empty bucket. +const EMPTY: u8 = 0b11111111; + +/// Control byte value for a deleted bucket. +const DELETED: u8 = 0b10000000; + +/// Checks whether a control byte represents a full bucket (top bit is clear). +#[inline] +fn is_full(ctrl: u8) -> bool { + ctrl & 0x80 == 0 +} + +/// Checks whether a control byte represents a special value (top bit is set). +#[inline] +fn is_special(ctrl: u8) -> bool { + ctrl & 0x80 != 0 +} + +/// Checks whether a special control value is EMPTY (just check 1 bit). +#[inline] +fn special_is_empty(ctrl: u8) -> bool { + debug_assert!(is_special(ctrl)); + ctrl & 0x01 != 0 +} + +/// Primary hash function, used to select the initial bucket to probe from. +#[inline] +fn h1(hash: u64) -> usize { + hash as usize +} + +/// Secondary hash function, saved in the low 7 bits of the control byte. +#[inline] +fn h2(hash: u64) -> u8 { + // Grab the top 7 bits of the hash. While the hash is normally a full 64-bit + // value, some hash functions (such as FxHash) produce a usize result + // instead, which means that the top 32 bits are 0 on 32-bit platforms. + let hash_len = usize::min(mem::size_of::(), mem::size_of::()); + let top7 = hash >> (hash_len * 8 - 7); + (top7 & 0x7f) as u8 +} + +/// Probe sequence based on triangular numbers, which is guaranteed (since our +/// table size is a power of two) to visit every group of elements exactly once. +struct ProbeSeq { + mask: usize, + offset: usize, + index: usize, +} + +impl Iterator for ProbeSeq { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // We should have found an empty bucket by now and ended the probe. + debug_assert!(self.index <= self.mask, "Went past end of probe sequence"); + + let result = self.offset; + self.index += Group::WIDTH; + self.offset += self.index; + self.offset &= self.mask; + Some(result) + } +} + +/// Returns the number of buckets needed to hold the given number of items, +/// taking the maximum load factor into account. +/// +/// Returns `None` if an overflow occurs. +#[inline] +fn capacity_to_buckets(cap: usize) -> Option { + let adjusted_cap = if cap < 8 { + // Need at least 1 free bucket on small tables + cap + 1 + } else { + // Otherwise require 1/8 buckets to be empty (87.5% load) + // + // Be careful when modifying this, calculate_layout relies on the + // overflow check here. + cap.checked_mul(8)? / 7 + }; + + // Any overflows will have been caught by the checked_mul. + Some(adjusted_cap.next_power_of_two()) +} + +/// Returns the maximum effective capacity for the given bucket mask, taking +/// the maximum load factor into account. +#[inline] +fn bucket_mask_to_capacity(bucket_mask: usize) -> usize { + if bucket_mask < 8 { + bucket_mask + } else { + ((bucket_mask + 1) / 8) * 7 + } +} + +// Returns a Layout which describes the allocation required for a hash table, +// and the offset of the buckets in the allocation. +/// +/// Returns `None` if an overflow occurs. +#[inline] +fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { + debug_assert!(buckets.is_power_of_two()); + + // Array of buckets + let data = Layout::array::(buckets).ok()?; + + // Array of control bytes. This must be aligned to the group size. + // + // We add `Group::WIDTH` control bytes at the end of the array which + // replicate the bytes at the start of the array and thus avoids the need to + // perform bounds-checking while probing. + // + // There is no possible overflow here since buckets is a power of two and + // Group::WIDTH is a small number. + let ctrl = unsafe { Layout::from_size_align_unchecked(buckets + Group::WIDTH, Group::WIDTH) }; + + ctrl.extend(data).ok() +} + +/// A reference to a hash table bucket containing a `T`. +pub struct Bucket { + ptr: NonNull, +} + +// This Send impl is needed for rayon support. This is safe since Bucket is +// never exposed in a public API. +unsafe impl Send for Bucket {} + +impl Clone for Bucket { + #[inline] + fn clone(&self) -> Self { + Bucket { ptr: self.ptr } + } +} + +impl Bucket { + #[inline] + unsafe fn from_ptr(ptr: *const T) -> Self { + Bucket { + ptr: NonNull::new_unchecked(ptr as *mut T), + } + } + #[inline] + pub unsafe fn drop(&self) { + self.ptr.as_ptr().drop_in_place(); + } + #[inline] + pub unsafe fn read(&self) -> T { + self.ptr.as_ptr().read() + } + #[inline] + pub unsafe fn write(&self, val: T) { + self.ptr.as_ptr().write(val); + } + #[inline] + pub unsafe fn as_ref<'a>(&self) -> &'a T { + &*self.ptr.as_ptr() + } + #[inline] + pub unsafe fn as_mut<'a>(&self) -> &'a mut T { + &mut *self.ptr.as_ptr() + } +} + +/// A raw hash table with an unsafe API. +pub struct RawTable { + ctrl: NonNull, + bucket_mask: usize, + data: NonNull, + items: usize, + growth_left: usize, +} + +impl RawTable { + /// Creates a new empty hash table without allocating any memory. + /// + /// In effect this returns a table with exactly 1 bucket. However we can + /// leave the data pointer dangling since that bucket is never written to + /// due to our load factor forcing us to always have at least 1 free bucket. + #[inline] + pub fn new() -> RawTable { + RawTable { + data: NonNull::dangling(), + ctrl: NonNull::from(&Group::static_empty()[0]), + bucket_mask: 0, + items: 0, + growth_left: 0, + } + } + + /// Allocates a new hash table with the given number of buckets. + /// + /// The control bytes are left uninitialized. + #[inline] + unsafe fn new_uninitialized( + buckets: usize, + fallability: Fallibility, + ) -> Result, CollectionAllocErr> { + let (layout, data_offset) = + calculate_layout::(buckets).ok_or_else(|| fallability.capacity_overflow())?; + let ctrl = NonNull::new(alloc(layout)).ok_or_else(|| fallability.alloc_err(layout))?; + let data = NonNull::new_unchecked(ctrl.as_ptr().add(data_offset) as *mut T); + Ok(RawTable { + data, + ctrl, + bucket_mask: buckets - 1, + items: 0, + growth_left: bucket_mask_to_capacity(buckets - 1), + }) + } + + /// Attempts to allocate a new hash table with at least enough capacity + /// for inserting the given number of elements without reallocating. + fn try_with_capacity( + capacity: usize, + fallability: Fallibility, + ) -> Result, CollectionAllocErr> { + if capacity == 0 { + Ok(RawTable::new()) + } else { + unsafe { + let buckets = + capacity_to_buckets(capacity).ok_or_else(|| fallability.capacity_overflow())?; + let result = RawTable::new_uninitialized(buckets, fallability)?; + result + .ctrl(0) + .write_bytes(EMPTY, result.buckets() + Group::WIDTH); + + // If we have fewer buckets than the group width then we need to + // fill in unused spaces in the trailing control bytes with + // DELETED entries. See the comments in set_ctrl. + if result.buckets() < Group::WIDTH { + result + .ctrl(result.buckets()) + .write_bytes(DELETED, Group::WIDTH - result.buckets()); + } + + Ok(result) + } + } + } + + /// Allocates a new hash table with at least enough capacity for inserting + /// the given number of elements without reallocating. + pub fn with_capacity(capacity: usize) -> RawTable { + RawTable::try_with_capacity(capacity, Fallibility::Infallible) + .unwrap_or_else(|_| unsafe { hint::unreachable_unchecked() }) + } + + /// Deallocates the table without dropping any entries. + #[inline] + unsafe fn free_buckets(&mut self) { + let (layout, _) = + calculate_layout::(self.buckets()).unwrap_or_else(|| hint::unreachable_unchecked()); + dealloc(self.ctrl.as_ptr(), layout); + } + + /// Returns the index of a bucket from a `Bucket`. + #[inline] + unsafe fn bucket_index(&self, bucket: &Bucket) -> usize { + offset_from(bucket.ptr.as_ptr(), self.data.as_ptr()) + } + + /// Returns a pointer to a control byte. + #[inline] + unsafe fn ctrl(&self, index: usize) -> *mut u8 { + debug_assert!(index < self.buckets() + Group::WIDTH); + self.ctrl.as_ptr().add(index) + } + + /// Returns a pointer to an element in the table. + #[inline] + pub unsafe fn bucket(&self, index: usize) -> Bucket { + debug_assert_ne!(self.bucket_mask, 0); + debug_assert!(index < self.buckets()); + Bucket::from_ptr(self.data.as_ptr().add(index)) + } + + /// Erases an element from the table without dropping it. + #[inline] + pub unsafe fn erase_no_drop(&mut self, item: &Bucket) { + let index = self.bucket_index(item); + let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask; + let empty_before = Group::load(self.ctrl(index_before)).match_empty(); + let empty_after = Group::load(self.ctrl(index)).match_empty(); + + // If we are inside a continuous block of Group::WIDTH full or deleted + // cells then a probe window may have seen a full block when trying to + // insert. We therefore need to keep that block non-empty so that + // lookups will continue searching to the next probe window. + let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH { + DELETED + } else { + self.growth_left += 1; + EMPTY + }; + self.set_ctrl(index, ctrl); + self.items -= 1; + } + + /// Returns an iterator for a probe sequence on the table. + /// + /// This iterator never terminates, but is guaranteed to visit each bucket + /// group exactly once. + #[inline] + fn probe_seq(&self, hash: u64) -> ProbeSeq { + ProbeSeq { + mask: self.bucket_mask, + offset: h1(hash) & self.bucket_mask, + index: 0, + } + } + + /// Sets a control byte, and possibly also the replicated control byte at + /// the end of the array. + #[inline] + unsafe fn set_ctrl(&self, index: usize, ctrl: u8) { + // Replicate the first Group::WIDTH control bytes at the end of + // the array without using a branch: + // - If index >= Group::WIDTH then index == index2. + // - Otherwise index2 == self.bucket_mask + 1 + index. + // + // The very last replicated control byte is never actually read because + // we mask the initial index for unaligned loads, but we write it + // anyways because it makes the set_ctrl implementation simpler. + // + // If there are fewer buckets than Group::WIDTH then this code will + // replicate the buckets at the end of the trailing group. For example + // with 2 buckets and a group size of 4, the control bytes will look + // like this: + // + // Real | Replicated + // ------------------------------------------------- + // | [A] | [B] | [DELETED] | [DELETED] | [A] | [B] | + // ------------------------------------------------- + let index2 = ((index.wrapping_sub(Group::WIDTH)) & self.bucket_mask) + Group::WIDTH; + + *self.ctrl(index) = ctrl; + *self.ctrl(index2) = ctrl; + } + + /// Searches for an empty or deleted bucket which is suitable for inserting + /// a new element. + /// + /// There must be at least 1 empty bucket in the table. + #[inline] + fn find_insert_slot(&self, hash: u64) -> usize { + for pos in self.probe_seq(hash) { + unsafe { + let group = Group::load(self.ctrl(pos)); + if let Some(bit) = group.match_empty_or_deleted().lowest_set_bit() { + let result = (pos + bit) & self.bucket_mask; + + // In tables smaller than the group width, trailing control + // bytes outside the range of the table are filled with + // DELETED entries. These will unfortunately trigger a + // match, but once masked will point to a full bucket that + // is already occupied. We detect this situation here and + // perform a second scan starting at the begining of the + // table. This second scan is guaranteed to find an empty + // slot (due to the load factor) before hitting the trailing + // control bytes (containing DELETED). + if unlikely(is_full(*self.ctrl(result))) { + debug_assert!(self.bucket_mask < Group::WIDTH); + debug_assert_ne!(pos, 0); + return Group::load_aligned(self.ctrl(0)) + .match_empty_or_deleted() + .lowest_set_bit_nonzero(); + } else { + return result; + } + } + } + } + + // probe_seq never returns. + unreachable!(); + } + + /// Marks all table buckets as empty without dropping their contents. + #[inline] + pub fn clear_no_drop(&mut self) { + if self.bucket_mask != 0 { + unsafe { + self.ctrl(0) + .write_bytes(EMPTY, self.buckets() + Group::WIDTH); + } + } + self.items = 0; + self.growth_left = bucket_mask_to_capacity(self.bucket_mask); + } + + /// Removes all elements from the table without freeing the backing memory. + #[inline] + pub fn clear(&mut self) { + // Ensure that the table is reset even if one of the drops panic + let self_ = guard(self, |self_| self_.clear_no_drop()); + + if mem::needs_drop::() { + unsafe { + for item in self_.iter() { + item.drop(); + } + } + } + } + + /// Shrinks the table to fit `max(self.len(), min_size)` elements. + #[inline] + pub fn shrink_to(&mut self, min_size: usize, hasher: impl Fn(&T) -> u64) { + let min_size = usize::max(self.items, min_size); + if self.bucket_mask != 0 && bucket_mask_to_capacity(self.bucket_mask) >= min_size * 2 { + self.resize(min_size, hasher, Fallibility::Infallible) + .unwrap_or_else(|_| unsafe { hint::unreachable_unchecked() }); + } + } + + /// Ensures that at least `additional` items can be inserted into the table + /// without reallocation. + #[inline] + pub fn reserve(&mut self, additional: usize, hasher: impl Fn(&T) -> u64) { + if additional > self.growth_left { + self.reserve_rehash(additional, hasher, Fallibility::Infallible) + .unwrap_or_else(|_| unsafe { hint::unreachable_unchecked() }); + } + } + + /// Tries to ensure that at least `additional` items can be inserted into + /// the table without reallocation. + #[inline] + pub fn try_reserve( + &mut self, + additional: usize, + hasher: impl Fn(&T) -> u64, + ) -> Result<(), CollectionAllocErr> { + if additional > self.growth_left { + self.reserve_rehash(additional, hasher, Fallibility::Fallible) + } else { + Ok(()) + } + } + + /// Out-of-line slow path for `reserve` and `try_reserve`. + #[cold] + #[inline(never)] + fn reserve_rehash( + &mut self, + additional: usize, + hasher: impl Fn(&T) -> u64, + fallability: Fallibility, + ) -> Result<(), CollectionAllocErr> { + let new_items = self + .items + .checked_add(additional) + .ok_or_else(|| fallability.capacity_overflow())?; + + // Rehash in-place without re-allocating if we have plenty of spare + // capacity that is locked up due to DELETED entries. + if new_items < bucket_mask_to_capacity(self.bucket_mask) / 2 { + self.rehash_in_place(hasher); + Ok(()) + } else { + self.resize(new_items, hasher, fallability) + } + } + + /// Rehashes the contents of the table in place (i.e. without changing the + /// allocation). + /// + /// If `hasher` panics then some the table's contents may be lost. + fn rehash_in_place(&mut self, hasher: impl Fn(&T) -> u64) { + unsafe { + // Bulk convert all full control bytes to DELETED, and all DELETED + // control bytes to EMPTY. This effectively frees up all buckets + // containing a DELETED entry. + for i in (0..self.buckets()).step_by(Group::WIDTH) { + let group = Group::load_aligned(self.ctrl(i)); + let group = group.convert_special_to_empty_and_full_to_deleted(); + group.store_aligned(self.ctrl(i)); + } + + // Fix up the trailing control bytes. See the comments in set_ctrl. + if self.buckets() < Group::WIDTH { + self.ctrl(0) + .copy_to(self.ctrl(Group::WIDTH), self.buckets()); + self.ctrl(self.buckets()) + .write_bytes(DELETED, Group::WIDTH - self.buckets()); + } else { + self.ctrl(0) + .copy_to(self.ctrl(self.buckets()), Group::WIDTH); + } + + // If the hash function panics then properly clean up any elements + // that we haven't rehashed yet. We unfortunately can't preserve the + // element since we lost their hash and have no way of recovering it + // without risking another panic. + let mut guard = guard(self, |self_| { + if mem::needs_drop::() { + for i in 0..self_.buckets() { + if *self_.ctrl(i) == DELETED { + self_.set_ctrl(i, EMPTY); + self_.bucket(i).drop(); + self_.items -= 1; + } + } + } + self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items; + }); + + // At this point, DELETED elements are elements that we haven't + // rehashed yet. Find them and re-insert them at their ideal + // position. + 'outer: for i in 0..guard.buckets() { + if *guard.ctrl(i) != DELETED { + continue; + } + 'inner: loop { + // Hash the current item + let item = guard.bucket(i); + let hash = hasher(item.as_ref()); + + // Search for a suitable place to put it + let new_i = guard.find_insert_slot(hash); + + // Probing works by scanning through all of the control + // bytes in groups, which may not be aligned to the group + // size. If both the new and old position fall within the + // same unaligned group, then there is no benefit in moving + // it and we can just continue to the next item. + let probe_index = |pos: usize| { + (pos.wrapping_sub(guard.probe_seq(hash).offset) & guard.bucket_mask) + / Group::WIDTH + }; + if likely(probe_index(i) == probe_index(new_i)) { + guard.set_ctrl(i, h2(hash)); + continue 'outer; + } + + // We are moving the current item to a new position. Write + // our H2 to the control byte of the new position. + let prev_ctrl = *guard.ctrl(new_i); + guard.set_ctrl(new_i, h2(hash)); + + if prev_ctrl == EMPTY { + // If the target slot is empty, simply move the current + // element into the new slot and clear the old control + // byte. + guard.set_ctrl(i, EMPTY); + guard.bucket(new_i).write(item.read()); + continue 'outer; + } else { + // If the target slot is occupied, swap the two elements + // and then continue processing the element that we just + // swapped into the old slot. + debug_assert_eq!(prev_ctrl, DELETED); + mem::swap(guard.bucket(new_i).as_mut(), item.as_mut()); + continue 'inner; + } + } + } + + guard.growth_left = bucket_mask_to_capacity(guard.bucket_mask) - guard.items; + mem::forget(guard); + } + } + + /// Allocates a new table of a different size and moves the contents of the + /// current table into it. + fn resize( + &mut self, + capacity: usize, + hasher: impl Fn(&T) -> u64, + fallability: Fallibility, + ) -> Result<(), CollectionAllocErr> { + unsafe { + debug_assert!(self.items <= capacity); + + // Allocate and initialize the new table. + let mut new_table = RawTable::try_with_capacity(capacity, fallability)?; + new_table.growth_left -= self.items; + new_table.items = self.items; + + // The hash function may panic, in which case we simply free the new + // table without dropping any elements that may have been copied into + // it. + let mut new_table = guard(ManuallyDrop::new(new_table), |new_table| { + if new_table.bucket_mask != 0 { + new_table.free_buckets(); + } + }); + + // Copy all elements to the new table. + for item in self.iter() { + // This may panic. + let hash = hasher(item.as_ref()); + + // We can use a simpler version of insert() here since: + // - there are no DELETED entries. + // - we know there is enough space in the table. + // - all elements are unique. + let index = new_table.find_insert_slot(hash); + new_table.set_ctrl(index, h2(hash)); + new_table.bucket(index).write(item.read()); + } + + // We successfully copied all elements without panicking. Now replace + // self with the new table. The old table will have its memory freed but + // the items will not be dropped (since they have been moved into the + // new table). + mem::swap(self, &mut new_table); + + Ok(()) + } + } + + /// Inserts a new element into the table. + /// + /// This does not check if the given element already exists in the table. + #[inline] + pub fn insert(&mut self, hash: u64, value: T, hasher: impl Fn(&T) -> u64) -> Bucket { + self.reserve(1, hasher); + self.insert_no_grow(hash, value) + } + + /// Inserts a new element into the table, without growing the table. + /// + /// There must be enough space in the table to insert the new element. + /// + /// This does not check if the given element already exists in the table. + #[inline] + pub fn insert_no_grow(&mut self, hash: u64, value: T) -> Bucket { + unsafe { + let index = self.find_insert_slot(hash); + let bucket = self.bucket(index); + + // If we are replacing a DELETED entry then we don't need to update + // the load counter. + let old_ctrl = *self.ctrl(index); + self.growth_left -= special_is_empty(old_ctrl) as usize; + + self.set_ctrl(index, h2(hash)); + bucket.write(value); + self.items += 1; + bucket + } + } + + /// Searches for an element in the table. + #[inline] + pub fn find(&self, hash: u64, mut eq: impl FnMut(&T) -> bool) -> Option> { + unsafe { + for pos in self.probe_seq(hash) { + let group = Group::load(self.ctrl(pos)); + for bit in group.match_byte(h2(hash)) { + let index = (pos + bit) & self.bucket_mask; + let bucket = self.bucket(index); + if likely(eq(bucket.as_ref())) { + return Some(bucket); + } + } + if likely(group.match_empty().any_bit_set()) { + return None; + } + } + } + + // probe_seq never returns. + unreachable!(); + } + + /// Returns the number of elements the map can hold without reallocating. + /// + /// This number is a lower bound; the table might be able to hold + /// more, but is guaranteed to be able to hold at least this many. + #[inline] + pub fn capacity(&self) -> usize { + self.items + self.growth_left + } + + /// Returns the number of elements in the table. + #[inline] + pub fn len(&self) -> usize { + self.items + } + + /// Returns the number of buckets in the table. + #[inline] + pub fn buckets(&self) -> usize { + self.bucket_mask + 1 + } + + /// Returns an iterator over every element in the table. It is up to + /// the caller to ensure that the `RawTable` outlives the `RawIter`. + /// Because we cannot make the `next` method unsafe on the `RawIter` + /// struct, we have to make the `iter` method unsafe. + #[inline] + pub unsafe fn iter(&self) -> RawIter { + RawIter { + iter: RawIterRange::new(self.ctrl.as_ptr(), self.data.as_ptr(), 0..self.buckets()), + items: self.items, + } + } + + /// Returns an iterator which removes all elements from the table without + /// freeing the memory. It is up to the caller to ensure that the `RawTable` + /// outlives the `RawDrain`. Because we cannot make the `next` method unsafe + /// on the `RawDrain`, we have to make the `drain` method unsafe. + #[inline] + pub unsafe fn drain(&mut self) -> RawDrain { + RawDrain { + iter: self.iter(), + table: NonNull::from(self), + _marker: PhantomData, + } + } + + /// Converts the table into a raw allocation. The contents of the table + /// should be dropped using a `RawIter` before freeing the allocation. + #[inline] + pub fn into_alloc(self) -> Option<(NonNull, Layout)> { + let alloc = if self.bucket_mask != 0 { + let (layout, _) = calculate_layout::(self.buckets()) + .unwrap_or_else(|| unsafe { hint::unreachable_unchecked() }); + Some((self.ctrl.cast(), layout)) + } else { + None + }; + mem::forget(self); + alloc + } +} + +unsafe impl Send for RawTable where T: Send {} +unsafe impl Sync for RawTable where T: Sync {} + +impl Clone for RawTable { + fn clone(&self) -> Self { + if self.bucket_mask == 0 { + Self::new() + } else { + unsafe { + let mut new_table = ManuallyDrop::new( + Self::new_uninitialized(self.buckets(), Fallibility::Infallible) + .unwrap_or_else(|_| hint::unreachable_unchecked()), + ); + + // Copy the control bytes unchanged. We do this in a single pass + self.ctrl(0) + .copy_to_nonoverlapping(new_table.ctrl(0), self.buckets() + Group::WIDTH); + + { + // The cloning of elements may panic, in which case we need + // to make sure we drop only the elements that have been + // cloned so far. + let mut guard = guard((0, &mut new_table), |(index, new_table)| { + if mem::needs_drop::() { + for i in 0..=*index { + if is_full(*new_table.ctrl(i)) { + new_table.bucket(i).drop(); + } + } + } + new_table.free_buckets(); + }); + + for from in self.iter() { + let index = self.bucket_index(&from); + let to = guard.1.bucket(index); + to.write(from.as_ref().clone()); + + // Update the index in case we need to unwind. + guard.0 = index; + } + + // Successfully cloned all items, no need to clean up. + mem::forget(guard); + } + + // Return the newly created table. + new_table.items = self.items; + new_table.growth_left = self.growth_left; + ManuallyDrop::into_inner(new_table) + } + } + } +} + +unsafe impl<#[may_dangle] T> Drop for RawTable { + #[inline] + fn drop(&mut self) { + if self.bucket_mask != 0 { + unsafe { + if mem::needs_drop::() { + for item in self.iter() { + item.drop(); + } + } + self.free_buckets(); + } + } + } +} + +impl IntoIterator for RawTable { + type Item = T; + type IntoIter = RawIntoIter; + + #[inline] + fn into_iter(self) -> RawIntoIter { + unsafe { + let iter = self.iter(); + let alloc = self.into_alloc(); + RawIntoIter { iter, alloc } + } + } +} + +/// Iterator over a a sub-range of a table. Unlike `RawIter` this iterator does +/// not track an item count. +pub struct RawIterRange { + // Using *const here for covariance + data: *const T, + ctrl: *const u8, + current_group: BitMask, + end: *const u8, +} + +impl RawIterRange { + /// Returns a `RawIterRange` covering a subset of a table. + /// + /// The start offset must be aligned to the group width. + #[inline] + unsafe fn new( + input_ctrl: *const u8, + input_data: *const T, + range: Range, + ) -> RawIterRange { + debug_assert_eq!(range.start % Group::WIDTH, 0); + let ctrl = input_ctrl.add(range.start); + let data = input_data.add(range.start); + let end = input_ctrl.add(range.end); + debug_assert_eq!(offset_from(end, ctrl), range.end - range.start); + let current_group = Group::load_aligned(ctrl).match_empty_or_deleted().invert(); + RawIterRange { + data, + ctrl, + current_group, + end, + } + } + + /// Splits a `RawIterRange` into two halves. + /// + /// This will fail if the total range is smaller than the group width. + #[inline] + #[cfg(feature = "rayon")] + pub fn split(mut self) -> (RawIterRange, Option>) { + unsafe { + let len = offset_from(self.end, self.ctrl); + debug_assert!(len.is_power_of_two()); + if len <= Group::WIDTH { + (self, None) + } else { + debug_assert_eq!(len % (Group::WIDTH * 2), 0); + let mid = len / 2; + let tail = RawIterRange::new(self.ctrl, self.data, mid..len); + debug_assert_eq!(self.data.add(mid), tail.data); + debug_assert_eq!(self.end, tail.end); + self.end = self.ctrl.add(mid); + debug_assert_eq!(self.end, tail.ctrl); + (self, Some(tail)) + } + } + } +} + +unsafe impl Send for RawIterRange where T: Send {} +unsafe impl Sync for RawIterRange where T: Sync {} + +impl Clone for RawIterRange { + #[inline] + fn clone(&self) -> Self { + RawIterRange { + data: self.data, + ctrl: self.ctrl, + current_group: self.current_group, + end: self.end, + } + } +} + +impl Iterator for RawIterRange { + type Item = Bucket; + + #[inline] + fn next(&mut self) -> Option> { + unsafe { + loop { + if let Some(index) = self.current_group.lowest_set_bit() { + self.current_group = self.current_group.remove_lowest_bit(); + return Some(Bucket::from_ptr(self.data.add(index))); + } + + self.ctrl = self.ctrl.add(Group::WIDTH); + if self.ctrl >= self.end { + return None; + } + + self.data = self.data.add(Group::WIDTH); + self.current_group = Group::load_aligned(self.ctrl) + .match_empty_or_deleted() + .invert(); + } + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + // We don't have an item count, so just guess based on the range size. + (0, Some(unsafe { offset_from(self.end, self.ctrl) })) + } +} + +impl FusedIterator for RawIterRange {} + +/// Iterator which returns a raw pointer to every full bucket in the table. +pub struct RawIter { + pub iter: RawIterRange, + items: usize, +} + +impl Clone for RawIter { + #[inline] + fn clone(&self) -> Self { + RawIter { + iter: self.iter.clone(), + items: self.items, + } + } +} + +impl Iterator for RawIter { + type Item = Bucket; + + #[inline] + fn next(&mut self) -> Option> { + match self.iter.next() { + Some(b) => { + self.items -= 1; + Some(b) + } + None => { + // We don't check against items == 0 here to allow the + // compiler to optimize away the item count entirely if the + // iterator length is never queried. + debug_assert_eq!(self.items, 0); + None + } + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.items, Some(self.items)) + } +} + +impl ExactSizeIterator for RawIter {} +impl FusedIterator for RawIter {} + +/// Iterator which consumes a table and returns elements. +pub struct RawIntoIter { + iter: RawIter, + alloc: Option<(NonNull, Layout)>, +} + +impl<'a, T> RawIntoIter { + #[inline] + pub fn iter(&self) -> RawIter { + self.iter.clone() + } +} + +unsafe impl Send for RawIntoIter where T: Send {} +unsafe impl Sync for RawIntoIter where T: Sync {} + +impl Drop for RawIntoIter { + #[inline] + fn drop(&mut self) { + unsafe { + // Drop all remaining elements + if mem::needs_drop::() { + while let Some(item) = self.iter.next() { + item.drop(); + } + } + + // Free the table + if let Some((ptr, layout)) = self.alloc { + dealloc(ptr.as_ptr(), layout); + } + } + } +} + +impl Iterator for RawIntoIter { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + unsafe { Some(self.iter.next()?.read()) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl ExactSizeIterator for RawIntoIter {} +impl FusedIterator for RawIntoIter {} + +/// Iterator which consumes elements without freeing the table storage. +pub struct RawDrain<'a, T: 'a> { + iter: RawIter, + + // We don't use a &'a RawTable because we want RawDrain to be covariant + // over 'a. + table: NonNull>, + _marker: PhantomData<&'a RawTable>, +} + +impl<'a, T> RawDrain<'a, T> { + #[inline] + pub fn iter(&self) -> RawIter { + self.iter.clone() + } +} + +unsafe impl<'a, T> Send for RawDrain<'a, T> where T: Send {} +unsafe impl<'a, T> Sync for RawDrain<'a, T> where T: Sync {} + +impl<'a, T> Drop for RawDrain<'a, T> { + #[inline] + fn drop(&mut self) { + unsafe { + // Ensure that the table is reset even if one of the drops panic + let _guard = guard(self.table, |table| table.as_mut().clear_no_drop()); + + // Drop all remaining elements + if mem::needs_drop::() { + while let Some(item) = self.iter.next() { + item.drop(); + } + } + } + } +} + +impl<'a, T> Iterator for RawDrain<'a, T> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + unsafe { + let item = self.iter.next()?; + + // Mark the item as DELETED in the table and decrement the item + // counter. We don't need to use the full delete algorithm like + // erase_no_drop since we will just clear the control bytes when + // the RawDrain is dropped. + let index = self.table.as_ref().bucket_index(&item); + *self.table.as_mut().ctrl(index) = DELETED; + self.table.as_mut().items -= 1; + + Some(item.read()) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl<'a, T> ExactSizeIterator for RawDrain<'a, T> {} +impl<'a, T> FusedIterator for RawDrain<'a, T> {} diff --git a/src/libstd/collections/hash/raw/sse2.rs b/src/libstd/collections/hash/raw/sse2.rs new file mode 100644 index 0000000000000..ecb238f24d128 --- /dev/null +++ b/src/libstd/collections/hash/raw/sse2.rs @@ -0,0 +1,107 @@ +use super::bitmask::BitMask; +use super::EMPTY; +use core::mem; + +#[cfg(target_arch = "x86")] +use core::arch::x86; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64 as x86; + +pub type BitMaskWord = u16; +pub const BITMASK_STRIDE: usize = 1; +pub const BITMASK_MASK: BitMaskWord = 0xffff; + +/// Abstraction over a group of control bytes which can be scanned in +/// parallel. +/// +/// This implementation uses a 128-bit SSE value. +#[derive(Copy, Clone)] +pub struct Group(x86::__m128i); + +impl Group { + /// Number of bytes in the group. + pub const WIDTH: usize = mem::size_of::(); + + /// Returns a full group of empty bytes, suitable for use as the initial + /// value for an empty hash table. + /// + /// This is guaranteed to be aligned to the group size. + #[inline] + pub fn static_empty() -> &'static [u8] { + union AlignedBytes { + _align: Group, + bytes: [u8; Group::WIDTH], + }; + const ALIGNED_BYTES: AlignedBytes = AlignedBytes { + bytes: [EMPTY; Group::WIDTH], + }; + unsafe { &ALIGNED_BYTES.bytes } + } + + /// Loads a group of bytes starting at the given address. + #[inline] + pub unsafe fn load(ptr: *const u8) -> Group { + Group(x86::_mm_loadu_si128(ptr as *const _)) + } + + /// Loads a group of bytes starting at the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub unsafe fn load_aligned(ptr: *const u8) -> Group { + debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); + Group(x86::_mm_load_si128(ptr as *const _)) + } + + /// Stores the group of bytes to the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub unsafe fn store_aligned(&self, ptr: *mut u8) { + debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); + x86::_mm_store_si128(ptr as *mut _, self.0); + } + + /// Returns a `BitMask` indicating all bytes in the group which have + /// the given value. + #[inline] + pub fn match_byte(&self, byte: u8) -> BitMask { + unsafe { + let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(byte as i8)); + BitMask(x86::_mm_movemask_epi8(cmp) as u16) + } + } + + /// Returns a `BitMask` indicating all bytes in the group which are + /// `EMPTY`. + #[inline] + pub fn match_empty(&self) -> BitMask { + self.match_byte(EMPTY) + } + + /// Returns a `BitMask` indicating all bytes in the group which are + /// `EMPTY` or `DELETED`. + #[inline] + pub fn match_empty_or_deleted(&self) -> BitMask { + // A byte is EMPTY or DELETED iff the high bit is set + unsafe { BitMask(x86::_mm_movemask_epi8(self.0) as u16) } + } + + /// Performs the following transformation on all bytes in the group: + /// - `EMPTY => EMPTY` + /// - `DELETED => EMPTY` + /// - `FULL => DELETED` + #[inline] + pub fn convert_special_to_empty_and_full_to_deleted(&self) -> Group { + // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 + // and high_bit = 0 (FULL) to 1000_0000 + // + // Here's this logic expanded to concrete values: + // let special = 0 > byte = 1111_1111 (true) or 0000_0000 (false) + // 1111_1111 | 1000_0000 = 1111_1111 + // 0000_0000 | 1000_0000 = 1000_0000 + unsafe { + let zero = x86::_mm_setzero_si128(); + let special = x86::_mm_cmpgt_epi8(zero, self.0); + Group(x86::_mm_or_si128(special, x86::_mm_set1_epi8(0x80u8 as i8))) + } + } +} diff --git a/src/libstd/collections/hash/set.rs b/src/libstd/collections/hash/set.rs index d3267e4e8015a..a753c216de3e0 100644 --- a/src/libstd/collections/hash/set.rs +++ b/src/libstd/collections/hash/set.rs @@ -9,12 +9,12 @@ // except according to those terms. use borrow::Borrow; +use collections::CollectionAllocErr; use fmt; use hash::{Hash, BuildHasher}; use iter::{Chain, FromIterator, FusedIterator}; use ops::{BitOr, BitAnd, BitXor, Sub}; -use super::Recover; use super::map::{self, HashMap, Keys, RandomState}; // Future Optimization (FIXME!) @@ -230,6 +230,7 @@ impl HashSet /// let set: HashSet = HashSet::with_hasher(hasher); /// let hasher: &RandomState = set.hasher(); /// ``` + #[inline] #[stable(feature = "hashmap_public_hasher", since = "1.9.0")] pub fn hasher(&self) -> &S { self.map.hasher() @@ -266,11 +267,35 @@ impl HashSet /// set.reserve(10); /// assert!(set.capacity() >= 10); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn reserve(&mut self, additional: usize) { self.map.reserve(additional) } + /// Tries to reserve capacity for at least `additional` more elements to be inserted + /// in the given `HashSet`. The collection may reserve more space to avoid + /// frequent reallocations. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// #![feature(try_reserve)] + /// use std::collections::HashSet; + /// let mut set: HashSet = HashSet::new(); + /// set.try_reserve(10).expect("why is the test harness OOMing on 10 bytes?"); + /// ``` + #[inline] + #[unstable(feature = "try_reserve", reason = "new API", issue="48043")] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), CollectionAllocErr> { + self.map.try_reserve(additional) + } + /// Shrinks the capacity of the set as much as possible. It will drop /// down as much as possible while maintaining the internal rules /// and possibly leaving some space in accordance with the resize policy. @@ -287,6 +312,7 @@ impl HashSet /// set.shrink_to_fit(); /// assert!(set.capacity() >= 2); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn shrink_to_fit(&mut self) { self.map.shrink_to_fit() @@ -336,6 +362,7 @@ impl HashSet /// println!("{}", x); /// } /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn iter(&self) -> Iter { Iter { iter: self.map.keys() } @@ -364,6 +391,7 @@ impl HashSet /// let diff: HashSet<_> = b.difference(&a).collect(); /// assert_eq!(diff, [4].iter().collect()); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn difference<'a>(&'a self, other: &'a HashSet) -> Difference<'a, T, S> { Difference { @@ -393,6 +421,7 @@ impl HashSet /// assert_eq!(diff1, diff2); /// assert_eq!(diff1, [1, 4].iter().collect()); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn symmetric_difference<'a>(&'a self, other: &'a HashSet) @@ -418,6 +447,7 @@ impl HashSet /// let intersection: HashSet<_> = a.intersection(&b).collect(); /// assert_eq!(intersection, [2, 3].iter().collect()); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn intersection<'a>(&'a self, other: &'a HashSet) -> Intersection<'a, T, S> { Intersection { @@ -444,6 +474,7 @@ impl HashSet /// let union: HashSet<_> = a.union(&b).collect(); /// assert_eq!(union, [1, 2, 3, 4].iter().collect()); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn union<'a>(&'a self, other: &'a HashSet) -> Union<'a, T, S> { Union { iter: self.iter().chain(other.difference(self)) } @@ -461,6 +492,7 @@ impl HashSet /// v.insert(1); /// assert_eq!(v.len(), 1); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn len(&self) -> usize { self.map.len() @@ -478,6 +510,7 @@ impl HashSet /// v.insert(1); /// assert!(!v.is_empty()); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn is_empty(&self) -> bool { self.map.is_empty() @@ -518,6 +551,7 @@ impl HashSet /// v.clear(); /// assert!(v.is_empty()); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn clear(&mut self) { self.map.clear() @@ -541,6 +575,7 @@ impl HashSet /// /// [`Eq`]: ../../std/cmp/trait.Eq.html /// [`Hash`]: ../../std/hash/trait.Hash.html + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn contains(&self, value: &Q) -> bool where T: Borrow, @@ -567,12 +602,13 @@ impl HashSet /// /// [`Eq`]: ../../std/cmp/trait.Eq.html /// [`Hash`]: ../../std/hash/trait.Hash.html + #[inline] #[stable(feature = "set_recovery", since = "1.9.0")] pub fn get(&self, value: &Q) -> Option<&T> where T: Borrow, Q: Hash + Eq { - Recover::get(&self.map, value) + self.map.get_key_value(value).map(|(k, _)| k) } /// Returns `true` if `self` has no elements in common with `other`. @@ -662,6 +698,7 @@ impl HashSet /// assert_eq!(set.insert(2), false); /// assert_eq!(set.len(), 1); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()).is_none() @@ -682,9 +719,16 @@ impl HashSet /// set.replace(Vec::with_capacity(10)); /// assert_eq!(set.get(&[][..]).unwrap().capacity(), 10); /// ``` + #[inline] #[stable(feature = "set_recovery", since = "1.9.0")] pub fn replace(&mut self, value: T) -> Option { - Recover::replace(&mut self.map, value) + match self.map.entry(value) { + map::Entry::Occupied(occupied) => Some(occupied.replace_key()), + map::Entry::Vacant(vacant) => { + vacant.insert(()); + None + } + } } /// Removes a value from the set. Returns `true` if the value was @@ -708,6 +752,7 @@ impl HashSet /// /// [`Eq`]: ../../std/cmp/trait.Eq.html /// [`Hash`]: ../../std/hash/trait.Hash.html + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn remove(&mut self, value: &Q) -> bool where T: Borrow, @@ -734,12 +779,13 @@ impl HashSet /// /// [`Eq`]: ../../std/cmp/trait.Eq.html /// [`Hash`]: ../../std/hash/trait.Hash.html + #[inline] #[stable(feature = "set_recovery", since = "1.9.0")] pub fn take(&mut self, value: &Q) -> Option where T: Borrow, Q: Hash + Eq { - Recover::take(&mut self.map, value) + self.map.remove_entry(value).map(|(k, _)| k) } /// Retains only the elements specified by the predicate. @@ -800,6 +846,7 @@ impl FromIterator for HashSet where T: Eq + Hash, S: BuildHasher + Default { + #[inline] fn from_iter>(iter: I) -> HashSet { let mut set = HashSet::with_hasher(Default::default()); set.extend(iter); @@ -812,6 +859,7 @@ impl Extend for HashSet where T: Eq + Hash, S: BuildHasher { + #[inline] fn extend>(&mut self, iter: I) { self.map.extend(iter.into_iter().map(|k| (k, ()))); } @@ -822,6 +870,7 @@ impl<'a, T, S> Extend<&'a T> for HashSet where T: 'a + Eq + Hash + Copy, S: BuildHasher { + #[inline] fn extend>(&mut self, iter: I) { self.extend(iter.into_iter().cloned()); } @@ -833,6 +882,7 @@ impl Default for HashSet S: BuildHasher + Default { /// Creates an empty `HashSet` with the `Default` value for the hasher. + #[inline] fn default() -> HashSet { HashSet { map: HashMap::default() } } @@ -1064,6 +1114,7 @@ impl<'a, T, S> IntoIterator for &'a HashSet type Item = &'a T; type IntoIter = Iter<'a, T>; + #[inline] fn into_iter(self) -> Iter<'a, T> { self.iter() } @@ -1097,6 +1148,7 @@ impl IntoIterator for HashSet /// println!("{}", x); /// } /// ``` + #[inline] fn into_iter(self) -> IntoIter { IntoIter { iter: self.map.into_iter() } } @@ -1104,6 +1156,7 @@ impl IntoIterator for HashSet #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K> Clone for Iter<'a, K> { + #[inline] fn clone(&self) -> Iter<'a, K> { Iter { iter: self.iter.clone() } } @@ -1112,15 +1165,18 @@ impl<'a, K> Clone for Iter<'a, K> { impl<'a, K> Iterator for Iter<'a, K> { type Item = &'a K; + #[inline] fn next(&mut self) -> Option<&'a K> { self.iter.next() } + #[inline] fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } } #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K> ExactSizeIterator for Iter<'a, K> { + #[inline] fn len(&self) -> usize { self.iter.len() } @@ -1139,15 +1195,18 @@ impl<'a, K: fmt::Debug> fmt::Debug for Iter<'a, K> { impl Iterator for IntoIter { type Item = K; + #[inline] fn next(&mut self) -> Option { self.iter.next().map(|(k, _)| k) } + #[inline] fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } } #[stable(feature = "rust1", since = "1.0.0")] impl ExactSizeIterator for IntoIter { + #[inline] fn len(&self) -> usize { self.iter.len() } @@ -1159,7 +1218,6 @@ impl FusedIterator for IntoIter {} impl fmt::Debug for IntoIter { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let entries_iter = self.iter - .inner .iter() .map(|(k, _)| k); f.debug_list().entries(entries_iter).finish() @@ -1170,15 +1228,18 @@ impl fmt::Debug for IntoIter { impl<'a, K> Iterator for Drain<'a, K> { type Item = K; + #[inline] fn next(&mut self) -> Option { self.iter.next().map(|(k, _)| k) } + #[inline] fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } } #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K> ExactSizeIterator for Drain<'a, K> { + #[inline] fn len(&self) -> usize { self.iter.len() } @@ -1190,7 +1251,6 @@ impl<'a, K> FusedIterator for Drain<'a, K> {} impl<'a, K: fmt::Debug> fmt::Debug for Drain<'a, K> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let entries_iter = self.iter - .inner .iter() .map(|(k, _)| k); f.debug_list().entries(entries_iter).finish() @@ -1199,6 +1259,7 @@ impl<'a, K: fmt::Debug> fmt::Debug for Drain<'a, K> { #[stable(feature = "rust1", since = "1.0.0")] impl<'a, T, S> Clone for Intersection<'a, T, S> { + #[inline] fn clone(&self) -> Intersection<'a, T, S> { Intersection { iter: self.iter.clone(), ..*self } } @@ -1211,6 +1272,7 @@ impl<'a, T, S> Iterator for Intersection<'a, T, S> { type Item = &'a T; + #[inline] fn next(&mut self) -> Option<&'a T> { loop { let elt = self.iter.next()?; @@ -1220,6 +1282,7 @@ impl<'a, T, S> Iterator for Intersection<'a, T, S> } } + #[inline] fn size_hint(&self) -> (usize, Option) { let (_, upper) = self.iter.size_hint(); (0, upper) @@ -1245,6 +1308,7 @@ impl<'a, T, S> FusedIterator for Intersection<'a, T, S> #[stable(feature = "rust1", since = "1.0.0")] impl<'a, T, S> Clone for Difference<'a, T, S> { + #[inline] fn clone(&self) -> Difference<'a, T, S> { Difference { iter: self.iter.clone(), ..*self } } @@ -1257,6 +1321,7 @@ impl<'a, T, S> Iterator for Difference<'a, T, S> { type Item = &'a T; + #[inline] fn next(&mut self) -> Option<&'a T> { loop { let elt = self.iter.next()?; @@ -1266,6 +1331,7 @@ impl<'a, T, S> Iterator for Difference<'a, T, S> } } + #[inline] fn size_hint(&self) -> (usize, Option) { let (_, upper) = self.iter.size_hint(); (0, upper) @@ -1291,6 +1357,7 @@ impl<'a, T, S> fmt::Debug for Difference<'a, T, S> #[stable(feature = "rust1", since = "1.0.0")] impl<'a, T, S> Clone for SymmetricDifference<'a, T, S> { + #[inline] fn clone(&self) -> SymmetricDifference<'a, T, S> { SymmetricDifference { iter: self.iter.clone() } } @@ -1303,9 +1370,11 @@ impl<'a, T, S> Iterator for SymmetricDifference<'a, T, S> { type Item = &'a T; + #[inline] fn next(&mut self) -> Option<&'a T> { self.iter.next() } + #[inline] fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } @@ -1330,6 +1399,7 @@ impl<'a, T, S> fmt::Debug for SymmetricDifference<'a, T, S> #[stable(feature = "rust1", since = "1.0.0")] impl<'a, T, S> Clone for Union<'a, T, S> { + #[inline] fn clone(&self) -> Union<'a, T, S> { Union { iter: self.iter.clone() } } @@ -1359,9 +1429,11 @@ impl<'a, T, S> Iterator for Union<'a, T, S> { type Item = &'a T; + #[inline] fn next(&mut self) -> Option<&'a T> { self.iter.next() } + #[inline] fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } diff --git a/src/libstd/collections/hash/table.rs b/src/libstd/collections/hash/table.rs deleted file mode 100644 index 479e6dccb90dd..0000000000000 --- a/src/libstd/collections/hash/table.rs +++ /dev/null @@ -1,1139 +0,0 @@ -// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use alloc::{Global, Alloc, Layout, LayoutErr, handle_alloc_error}; -use collections::CollectionAllocErr; -use hash::{BuildHasher, Hash, Hasher}; -use marker; -use mem::{size_of, needs_drop}; -use mem; -use ops::{Deref, DerefMut}; -use ptr::{self, Unique, NonNull}; -use hint; - -use self::BucketState::*; - -/// Integer type used for stored hash values. -/// -/// No more than bit_width(usize) bits are needed to select a bucket. -/// -/// The most significant bit is ours to use for tagging `SafeHash`. -/// -/// (Even if we could have usize::MAX bytes allocated for buckets, -/// each bucket stores at least a `HashUint`, so there can be no more than -/// usize::MAX / size_of(usize) buckets.) -type HashUint = usize; - -const EMPTY_BUCKET: HashUint = 0; -const EMPTY: usize = 1; - -/// Special `Unique` that uses the lower bit of the pointer -/// to expose a boolean tag. -/// Note: when the pointer is initialized to EMPTY `.ptr()` will return -/// null and the tag functions shouldn't be used. -struct TaggedHashUintPtr(Unique); - -impl TaggedHashUintPtr { - #[inline] - unsafe fn new(ptr: *mut HashUint) -> Self { - debug_assert!(ptr as usize & 1 == 0 || ptr as usize == EMPTY as usize); - TaggedHashUintPtr(Unique::new_unchecked(ptr)) - } - - #[inline] - fn set_tag(&mut self, value: bool) { - let mut usize_ptr = self.0.as_ptr() as usize; - unsafe { - if value { - usize_ptr |= 1; - } else { - usize_ptr &= !1; - } - self.0 = Unique::new_unchecked(usize_ptr as *mut HashUint) - } - } - - #[inline] - fn tag(&self) -> bool { - (self.0.as_ptr() as usize) & 1 == 1 - } - - #[inline] - fn ptr(&self) -> *mut HashUint { - (self.0.as_ptr() as usize & !1) as *mut HashUint - } -} - -/// The raw hashtable, providing safe-ish access to the unzipped and highly -/// optimized arrays of hashes, and key-value pairs. -/// -/// This design is a lot faster than the naive -/// `Vec>`, because we don't pay for the overhead of an -/// option on every element, and we get a generally more cache-aware design. -/// -/// Essential invariants of this structure: -/// -/// - if `t.hashes[i] == EMPTY_BUCKET`, then `Bucket::at_index(&t, i).raw` -/// points to 'undefined' contents. Don't read from it. This invariant is -/// enforced outside this module with the `EmptyBucket`, `FullBucket`, -/// and `SafeHash` types. -/// -/// - An `EmptyBucket` is only constructed at an index with -/// a hash of EMPTY_BUCKET. -/// -/// - A `FullBucket` is only constructed at an index with a -/// non-EMPTY_BUCKET hash. -/// -/// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get -/// around hashes of zero by changing them to 0x8000_0000_0000_0000, -/// which will likely map to the same bucket, while not being confused -/// with "empty". -/// -/// - Both "arrays represented by pointers" are the same length: -/// `capacity`. This is set at creation and never changes. The arrays -/// are unzipped and are more cache aware (scanning through 8 hashes -/// brings in at most 2 cache lines, since they're all right beside each -/// other). This layout may waste space in padding such as in a map from -/// u64 to u8, but is a more cache conscious layout as the key-value pairs -/// are only very shortly probed and the desired value will be in the same -/// or next cache line. -/// -/// You can kind of think of this module/data structure as a safe wrapper -/// around just the "table" part of the hashtable. It enforces some -/// invariants at the type level and employs some performance trickery, -/// but in general is just a tricked out `Vec>`. -/// -/// The hashtable also exposes a special boolean tag. The tag defaults to false -/// when the RawTable is created and is accessible with the `tag` and `set_tag` -/// functions. -pub struct RawTable { - capacity_mask: usize, - size: usize, - hashes: TaggedHashUintPtr, - - // Because K/V do not appear directly in any of the types in the struct, - // inform rustc that in fact instances of K and V are reachable from here. - marker: marker::PhantomData<(K, V)>, -} - -// An unsafe view of a RawTable bucket -// Valid indexes are within [0..table_capacity) -pub struct RawBucket { - hash_start: *mut HashUint, - // We use *const to ensure covariance with respect to K and V - pair_start: *const (K, V), - idx: usize, - _marker: marker::PhantomData<(K, V)>, -} - -impl Copy for RawBucket {} -impl Clone for RawBucket { - fn clone(&self) -> RawBucket { - *self - } -} - -pub struct Bucket { - raw: RawBucket, - table: M, -} - -impl Copy for Bucket {} -impl Clone for Bucket { - fn clone(&self) -> Bucket { - *self - } -} - -pub struct EmptyBucket { - raw: RawBucket, - table: M, -} - -pub struct FullBucket { - raw: RawBucket, - table: M, -} - -pub type FullBucketMut<'table, K, V> = FullBucket>; - -pub enum BucketState { - Empty(EmptyBucket), - Full(FullBucket), -} - -// A GapThenFull encapsulates the state of two consecutive buckets at once. -// The first bucket, called the gap, is known to be empty. -// The second bucket is full. -pub struct GapThenFull { - gap: EmptyBucket, - full: FullBucket, -} - -/// A hash that is not zero, since we use a hash of zero to represent empty -/// buckets. -#[derive(PartialEq, Copy, Clone)] -pub struct SafeHash { - hash: HashUint, -} - -impl SafeHash { - /// Peek at the hash value, which is guaranteed to be non-zero. - #[inline(always)] - pub fn inspect(&self) -> HashUint { - self.hash - } - - #[inline(always)] - pub fn new(hash: u64) -> Self { - // We need to avoid 0 in order to prevent collisions with - // EMPTY_HASH. We can maintain our precious uniform distribution - // of initial indexes by unconditionally setting the MSB, - // effectively reducing the hashes by one bit. - // - // Truncate hash to fit in `HashUint`. - let hash_bits = size_of::() * 8; - SafeHash { hash: (1 << (hash_bits - 1)) | (hash as HashUint) } - } -} - -/// We need to remove hashes of 0. That's reserved for empty buckets. -/// This function wraps up `hash_keyed` to be the only way outside this -/// module to generate a SafeHash. -pub fn make_hash(hash_state: &S, t: &T) -> SafeHash - where T: Hash, - S: BuildHasher -{ - let mut state = hash_state.build_hasher(); - t.hash(&mut state); - SafeHash::new(state.finish()) -} - -// `replace` casts a `*HashUint` to a `*SafeHash`. Since we statically -// ensure that a `FullBucket` points to an index with a non-zero hash, -// and a `SafeHash` is just a `HashUint` with a different name, this is -// safe. -// -// This test ensures that a `SafeHash` really IS the same size as a -// `HashUint`. If you need to change the size of `SafeHash` (and -// consequently made this test fail), `replace` needs to be -// modified to no longer assume this. -#[test] -fn can_alias_safehash_as_hash() { - assert_eq!(size_of::(), size_of::()) -} - -// RawBucket methods are unsafe as it's possible to -// make a RawBucket point to invalid memory using safe code. -impl RawBucket { - unsafe fn hash(&self) -> *mut HashUint { - self.hash_start.add(self.idx) - } - unsafe fn pair(&self) -> *mut (K, V) { - self.pair_start.add(self.idx) as *mut (K, V) - } - unsafe fn hash_pair(&self) -> (*mut HashUint, *mut (K, V)) { - (self.hash(), self.pair()) - } -} - -// Buckets hold references to the table. -impl FullBucket { - /// Borrow a reference to the table. - pub fn table(&self) -> &M { - &self.table - } - /// Borrow a mutable reference to the table. - pub fn table_mut(&mut self) -> &mut M { - &mut self.table - } - /// Move out the reference to the table. - pub fn into_table(self) -> M { - self.table - } - /// Get the raw index. - pub fn index(&self) -> usize { - self.raw.idx - } - /// Get the raw bucket. - pub fn raw(&self) -> RawBucket { - self.raw - } -} - -impl EmptyBucket { - /// Borrow a reference to the table. - pub fn table(&self) -> &M { - &self.table - } - /// Borrow a mutable reference to the table. - pub fn table_mut(&mut self) -> &mut M { - &mut self.table - } -} - -impl Bucket { - /// Get the raw index. - pub fn index(&self) -> usize { - self.raw.idx - } - /// get the table. - pub fn into_table(self) -> M { - self.table - } -} - -impl Deref for FullBucket - where M: Deref> -{ - type Target = RawTable; - fn deref(&self) -> &RawTable { - &self.table - } -} - -/// `Put` is implemented for types which provide access to a table and cannot be invalidated -/// by filling a bucket. A similar implementation for `Take` is possible. -pub trait Put { - unsafe fn borrow_table_mut(&mut self) -> &mut RawTable; -} - - -impl<'t, K, V> Put for &'t mut RawTable { - unsafe fn borrow_table_mut(&mut self) -> &mut RawTable { - *self - } -} - -impl Put for Bucket - where M: Put -{ - unsafe fn borrow_table_mut(&mut self) -> &mut RawTable { - self.table.borrow_table_mut() - } -} - -impl Put for FullBucket - where M: Put -{ - unsafe fn borrow_table_mut(&mut self) -> &mut RawTable { - self.table.borrow_table_mut() - } -} - -impl>> Bucket { - #[inline] - pub fn new(table: M, hash: SafeHash) -> Bucket { - Bucket::at_index(table, hash.inspect() as usize) - } - - pub fn new_from(r: RawBucket, t: M) - -> Bucket - { - Bucket { - raw: r, - table: t, - } - } - - #[inline] - pub fn at_index(table: M, ib_index: usize) -> Bucket { - // if capacity is 0, then the RawBucket will be populated with bogus pointers. - // This is an uncommon case though, so avoid it in release builds. - debug_assert!(table.capacity() > 0, - "Table should have capacity at this point"); - let ib_index = ib_index & table.capacity_mask; - Bucket { - raw: table.raw_bucket_at(ib_index), - table, - } - } - - pub fn first(table: M) -> Bucket { - Bucket { - raw: table.raw_bucket_at(0), - table, - } - } - - // "So a few of the first shall be last: for many be called, - // but few chosen." - // - // We'll most likely encounter a few buckets at the beginning that - // have their initial buckets near the end of the table. They were - // placed at the beginning as the probe wrapped around the table - // during insertion. We must skip forward to a bucket that won't - // get reinserted too early and won't unfairly steal others spot. - // This eliminates the need for robin hood. - pub fn head_bucket(table: M) -> Bucket { - let mut bucket = Bucket::first(table); - - loop { - bucket = match bucket.peek() { - Full(full) => { - if full.displacement() == 0 { - // This bucket occupies its ideal spot. - // It indicates the start of another "cluster". - bucket = full.into_bucket(); - break; - } - // Leaving this bucket in the last cluster for later. - full.into_bucket() - } - Empty(b) => { - // Encountered a hole between clusters. - b.into_bucket() - } - }; - bucket.next(); - } - bucket - } - - /// Reads a bucket at a given index, returning an enum indicating whether - /// it's initialized or not. You need to match on this enum to get - /// the appropriate types to call most of the other functions in - /// this module. - pub fn peek(self) -> BucketState { - match unsafe { *self.raw.hash() } { - EMPTY_BUCKET => { - Empty(EmptyBucket { - raw: self.raw, - table: self.table, - }) - } - _ => { - Full(FullBucket { - raw: self.raw, - table: self.table, - }) - } - } - } - - /// Modifies the bucket in place to make it point to the next slot. - pub fn next(&mut self) { - self.raw.idx = self.raw.idx.wrapping_add(1) & self.table.capacity_mask; - } - - /// Modifies the bucket in place to make it point to the previous slot. - pub fn prev(&mut self) { - self.raw.idx = self.raw.idx.wrapping_sub(1) & self.table.capacity_mask; - } -} - -impl>> EmptyBucket { - #[inline] - pub fn next(self) -> Bucket { - let mut bucket = self.into_bucket(); - bucket.next(); - bucket - } - - #[inline] - pub fn into_bucket(self) -> Bucket { - Bucket { - raw: self.raw, - table: self.table, - } - } - - pub fn gap_peek(self) -> Result, Bucket> { - let gap = EmptyBucket { - raw: self.raw, - table: (), - }; - - match self.next().peek() { - Full(bucket) => { - Ok(GapThenFull { - gap, - full: bucket, - }) - } - Empty(e) => Err(e.into_bucket()), - } - } -} - -impl EmptyBucket - where M: Put -{ - /// Puts given key and value pair, along with the key's hash, - /// into this bucket in the hashtable. Note how `self` is 'moved' into - /// this function, because this slot will no longer be empty when - /// we return! A `FullBucket` is returned for later use, pointing to - /// the newly-filled slot in the hashtable. - /// - /// Use `make_hash` to construct a `SafeHash` to pass to this function. - pub fn put(mut self, hash: SafeHash, key: K, value: V) -> FullBucket { - unsafe { - *self.raw.hash() = hash.inspect(); - ptr::write(self.raw.pair(), (key, value)); - - self.table.borrow_table_mut().size += 1; - } - - FullBucket { - raw: self.raw, - table: self.table, - } - } -} - -impl>> FullBucket { - #[inline] - pub fn next(self) -> Bucket { - let mut bucket = self.into_bucket(); - bucket.next(); - bucket - } - - #[inline] - pub fn into_bucket(self) -> Bucket { - Bucket { - raw: self.raw, - table: self.table, - } - } - - /// Duplicates the current position. This can be useful for operations - /// on two or more buckets. - pub fn stash(self) -> FullBucket { - FullBucket { - raw: self.raw, - table: self, - } - } - - /// Get the distance between this bucket and the 'ideal' location - /// as determined by the key's hash stored in it. - /// - /// In the cited blog posts above, this is called the "distance to - /// initial bucket", or DIB. Also known as "probe count". - pub fn displacement(&self) -> usize { - // Calculates the distance one has to travel when going from - // `hash mod capacity` onwards to `idx mod capacity`, wrapping around - // if the destination is not reached before the end of the table. - (self.raw.idx.wrapping_sub(self.hash().inspect() as usize)) & self.table.capacity_mask - } - - #[inline] - pub fn hash(&self) -> SafeHash { - unsafe { SafeHash { hash: *self.raw.hash() } } - } - - /// Gets references to the key and value at a given index. - pub fn read(&self) -> (&K, &V) { - unsafe { - let pair_ptr = self.raw.pair(); - (&(*pair_ptr).0, &(*pair_ptr).1) - } - } -} - -// We take a mutable reference to the table instead of accepting anything that -// implements `DerefMut` to prevent fn `take` from being called on `stash`ed -// buckets. -impl<'t, K, V> FullBucket> { - /// Removes this bucket's key and value from the hashtable. - /// - /// This works similarly to `put`, building an `EmptyBucket` out of the - /// taken bucket. - pub fn take(self) -> (EmptyBucket>, K, V) { - self.table.size -= 1; - - unsafe { - *self.raw.hash() = EMPTY_BUCKET; - let (k, v) = ptr::read(self.raw.pair()); - (EmptyBucket { - raw: self.raw, - table: self.table, - }, - k, - v) - } - } -} - -// This use of `Put` is misleading and restrictive, but safe and sufficient for our use cases -// where `M` is a full bucket or table reference type with mutable access to the table. -impl FullBucket - where M: Put -{ - pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { - unsafe { - let old_hash = ptr::replace(self.raw.hash() as *mut SafeHash, h); - let (old_key, old_val) = ptr::replace(self.raw.pair(), (k, v)); - - (old_hash, old_key, old_val) - } - } -} - -impl FullBucket - where M: Deref> + DerefMut -{ - /// Gets mutable references to the key and value at a given index. - pub fn read_mut(&mut self) -> (&mut K, &mut V) { - unsafe { - let pair_ptr = self.raw.pair(); - (&mut (*pair_ptr).0, &mut (*pair_ptr).1) - } - } -} - -impl<'t, K, V, M> FullBucket - where M: Deref> + 't -{ - /// Exchange a bucket state for immutable references into the table. - /// Because the underlying reference to the table is also consumed, - /// no further changes to the structure of the table are possible; - /// in exchange for this, the returned references have a longer lifetime - /// than the references returned by `read()`. - pub fn into_refs(self) -> (&'t K, &'t V) { - unsafe { - let pair_ptr = self.raw.pair(); - (&(*pair_ptr).0, &(*pair_ptr).1) - } - } -} - -impl<'t, K, V, M> FullBucket - where M: Deref> + DerefMut + 't -{ - /// This works similarly to `into_refs`, exchanging a bucket state - /// for mutable references into the table. - pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { - unsafe { - let pair_ptr = self.raw.pair(); - (&mut (*pair_ptr).0, &mut (*pair_ptr).1) - } - } -} - -impl GapThenFull - where M: Deref> -{ - #[inline] - pub fn full(&self) -> &FullBucket { - &self.full - } - - pub fn into_table(self) -> M { - self.full.into_table() - } - - pub fn shift(mut self) -> Result, Bucket> { - unsafe { - let (gap_hash, gap_pair) = self.gap.raw.hash_pair(); - let (full_hash, full_pair) = self.full.raw.hash_pair(); - *gap_hash = mem::replace(&mut *full_hash, EMPTY_BUCKET); - ptr::copy_nonoverlapping(full_pair, gap_pair, 1); - } - - let FullBucket { raw: prev_raw, .. } = self.full; - - match self.full.next().peek() { - Full(bucket) => { - self.gap.raw = prev_raw; - - self.full = bucket; - - Ok(self) - } - Empty(b) => Err(b.into_bucket()), - } - } -} - -// Returns a Layout which describes the allocation required for a hash table, -// and the offset of the array of (key, value) pairs in the allocation. -#[inline(always)] -fn calculate_layout(capacity: usize) -> Result<(Layout, usize), LayoutErr> { - let hashes = Layout::array::(capacity)?; - let pairs = Layout::array::<(K, V)>(capacity)?; - hashes.extend(pairs).map(|(layout, _)| { - // LLVM seems to have trouble properly const-propagating pairs.align(), - // possibly due to the use of NonZeroUsize. This little hack allows it - // to generate optimal code. - // - // See https://github.com/rust-lang/rust/issues/51346 for more details. - ( - layout, - hashes.size() + hashes.padding_needed_for(mem::align_of::<(K, V)>()), - ) - }) -} - -pub(crate) enum Fallibility { - Fallible, - Infallible, -} - -use self::Fallibility::*; - -impl RawTable { - /// Does not initialize the buckets. The caller should ensure they, - /// at the very least, set every hash to EMPTY_BUCKET. - /// Returns an error if it cannot allocate or capacity overflows. - unsafe fn new_uninitialized_internal( - capacity: usize, - fallibility: Fallibility, - ) -> Result, CollectionAllocErr> { - if capacity == 0 { - return Ok(RawTable { - size: 0, - capacity_mask: capacity.wrapping_sub(1), - hashes: TaggedHashUintPtr::new(EMPTY as *mut HashUint), - marker: marker::PhantomData, - }); - } - - // Allocating hashmaps is a little tricky. We need to allocate two - // arrays, but since we know their sizes and alignments up front, - // we just allocate a single array, and then have the subarrays - // point into it. - let (layout, _) = calculate_layout::(capacity)?; - let buffer = Global.alloc(layout).map_err(|e| match fallibility { - Infallible => handle_alloc_error(layout), - Fallible => e, - })?; - - Ok(RawTable { - capacity_mask: capacity.wrapping_sub(1), - size: 0, - hashes: TaggedHashUintPtr::new(buffer.cast().as_ptr()), - marker: marker::PhantomData, - }) - } - - /// Does not initialize the buckets. The caller should ensure they, - /// at the very least, set every hash to EMPTY_BUCKET. - unsafe fn new_uninitialized(capacity: usize) -> RawTable { - match Self::new_uninitialized_internal(capacity, Infallible) { - Err(CollectionAllocErr::CapacityOverflow) => panic!("capacity overflow"), - Err(CollectionAllocErr::AllocErr) => unreachable!(), - Ok(table) => { table } - } - } - - #[inline(always)] - fn raw_bucket_at(&self, index: usize) -> RawBucket { - let (_, pairs_offset) = calculate_layout::(self.capacity()) - .unwrap_or_else(|_| unsafe { hint::unreachable_unchecked() }); - let buffer = self.hashes.ptr() as *mut u8; - unsafe { - RawBucket { - hash_start: buffer as *mut HashUint, - pair_start: buffer.add(pairs_offset) as *const (K, V), - idx: index, - _marker: marker::PhantomData, - } - } - } - - fn new_internal( - capacity: usize, - fallibility: Fallibility, - ) -> Result, CollectionAllocErr> { - unsafe { - let ret = RawTable::new_uninitialized_internal(capacity, fallibility)?; - if capacity > 0 { - ptr::write_bytes(ret.hashes.ptr(), 0, capacity); - } - Ok(ret) - } - } - - /// Tries to create a new raw table from a given capacity. If it cannot allocate, - /// it returns with AllocErr. - pub fn try_new(capacity: usize) -> Result, CollectionAllocErr> { - Self::new_internal(capacity, Fallible) - } - - /// Creates a new raw table from a given capacity. All buckets are - /// initially empty. - pub fn new(capacity: usize) -> RawTable { - match Self::new_internal(capacity, Infallible) { - Err(CollectionAllocErr::CapacityOverflow) => panic!("capacity overflow"), - Err(CollectionAllocErr::AllocErr) => unreachable!(), - Ok(table) => { table } - } - } - - /// The hashtable's capacity, similar to a vector's. - pub fn capacity(&self) -> usize { - self.capacity_mask.wrapping_add(1) - } - - /// The number of elements ever `put` in the hashtable, minus the number - /// of elements ever `take`n. - pub fn size(&self) -> usize { - self.size - } - - fn raw_buckets(&self) -> RawBuckets { - RawBuckets { - raw: self.raw_bucket_at(0), - elems_left: self.size, - marker: marker::PhantomData, - } - } - - pub fn iter(&self) -> Iter { - Iter { - iter: self.raw_buckets(), - } - } - - pub fn iter_mut(&mut self) -> IterMut { - IterMut { - iter: self.raw_buckets(), - _marker: marker::PhantomData, - } - } - - pub fn into_iter(self) -> IntoIter { - let RawBuckets { raw, elems_left, .. } = self.raw_buckets(); - // Replace the marker regardless of lifetime bounds on parameters. - IntoIter { - iter: RawBuckets { - raw, - elems_left, - marker: marker::PhantomData, - }, - table: self, - } - } - - pub fn drain(&mut self) -> Drain { - let RawBuckets { raw, elems_left, .. } = self.raw_buckets(); - // Replace the marker regardless of lifetime bounds on parameters. - Drain { - iter: RawBuckets { - raw, - elems_left, - marker: marker::PhantomData, - }, - table: NonNull::from(self), - marker: marker::PhantomData, - } - } - - /// Drops buckets in reverse order. It leaves the table in an inconsistent - /// state and should only be used for dropping the table's remaining - /// entries. It's used in the implementation of Drop. - unsafe fn rev_drop_buckets(&mut self) { - // initialize the raw bucket past the end of the table - let mut raw = self.raw_bucket_at(self.capacity()); - let mut elems_left = self.size; - - while elems_left != 0 { - raw.idx -= 1; - - if *raw.hash() != EMPTY_BUCKET { - elems_left -= 1; - ptr::drop_in_place(raw.pair()); - } - } - } - - /// Set the table tag - pub fn set_tag(&mut self, value: bool) { - self.hashes.set_tag(value) - } - - /// Get the table tag - pub fn tag(&self) -> bool { - self.hashes.tag() - } -} - -/// A raw iterator. The basis for some other iterators in this module. Although -/// this interface is safe, it's not used outside this module. -struct RawBuckets<'a, K, V> { - raw: RawBucket, - elems_left: usize, - - // Strictly speaking, this should be &'a (K,V), but that would - // require that K:'a, and we often use RawBuckets<'static...> for - // move iterations, so that messes up a lot of other things. So - // just use `&'a (K,V)` as this is not a publicly exposed type - // anyway. - marker: marker::PhantomData<&'a ()>, -} - -// FIXME(#26925) Remove in favor of `#[derive(Clone)]` -impl<'a, K, V> Clone for RawBuckets<'a, K, V> { - fn clone(&self) -> RawBuckets<'a, K, V> { - RawBuckets { - raw: self.raw, - elems_left: self.elems_left, - marker: marker::PhantomData, - } - } -} - - -impl<'a, K, V> Iterator for RawBuckets<'a, K, V> { - type Item = RawBucket; - - fn next(&mut self) -> Option> { - if self.elems_left == 0 { - return None; - } - - loop { - unsafe { - let item = self.raw; - self.raw.idx += 1; - if *item.hash() != EMPTY_BUCKET { - self.elems_left -= 1; - return Some(item); - } - } - } - } - - fn size_hint(&self) -> (usize, Option) { - (self.elems_left, Some(self.elems_left)) - } -} - -impl<'a, K, V> ExactSizeIterator for RawBuckets<'a, K, V> { - fn len(&self) -> usize { - self.elems_left - } -} - -/// Iterator over shared references to entries in a table. -pub struct Iter<'a, K: 'a, V: 'a> { - iter: RawBuckets<'a, K, V>, -} - -unsafe impl<'a, K: Sync, V: Sync> Sync for Iter<'a, K, V> {} -unsafe impl<'a, K: Sync, V: Sync> Send for Iter<'a, K, V> {} - -// FIXME(#26925) Remove in favor of `#[derive(Clone)]` -impl<'a, K, V> Clone for Iter<'a, K, V> { - fn clone(&self) -> Iter<'a, K, V> { - Iter { - iter: self.iter.clone(), - } - } -} - -/// Iterator over mutable references to entries in a table. -pub struct IterMut<'a, K: 'a, V: 'a> { - iter: RawBuckets<'a, K, V>, - // To ensure invariance with respect to V - _marker: marker::PhantomData<&'a mut V>, -} - -unsafe impl<'a, K: Sync, V: Sync> Sync for IterMut<'a, K, V> {} -// Both K: Sync and K: Send are correct for IterMut's Send impl, -// but Send is the more useful bound -unsafe impl<'a, K: Send, V: Send> Send for IterMut<'a, K, V> {} - -impl<'a, K: 'a, V: 'a> IterMut<'a, K, V> { - pub fn iter(&self) -> Iter { - Iter { - iter: self.iter.clone(), - } - } -} - -/// Iterator over the entries in a table, consuming the table. -pub struct IntoIter { - table: RawTable, - iter: RawBuckets<'static, K, V>, -} - -unsafe impl Sync for IntoIter {} -unsafe impl Send for IntoIter {} - -impl IntoIter { - pub fn iter(&self) -> Iter { - Iter { - iter: self.iter.clone(), - } - } -} - -/// Iterator over the entries in a table, clearing the table. -pub struct Drain<'a, K: 'a, V: 'a> { - table: NonNull>, - iter: RawBuckets<'static, K, V>, - marker: marker::PhantomData<&'a RawTable>, -} - -unsafe impl<'a, K: Sync, V: Sync> Sync for Drain<'a, K, V> {} -unsafe impl<'a, K: Send, V: Send> Send for Drain<'a, K, V> {} - -impl<'a, K, V> Drain<'a, K, V> { - pub fn iter(&self) -> Iter { - Iter { - iter: self.iter.clone(), - } - } -} - -impl<'a, K, V> Iterator for Iter<'a, K, V> { - type Item = (&'a K, &'a V); - - fn next(&mut self) -> Option<(&'a K, &'a V)> { - self.iter.next().map(|raw| unsafe { - let pair_ptr = raw.pair(); - (&(*pair_ptr).0, &(*pair_ptr).1) - }) - } - - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } -} - -impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl<'a, K, V> Iterator for IterMut<'a, K, V> { - type Item = (&'a K, &'a mut V); - - fn next(&mut self) -> Option<(&'a K, &'a mut V)> { - self.iter.next().map(|raw| unsafe { - let pair_ptr = raw.pair(); - (&(*pair_ptr).0, &mut (*pair_ptr).1) - }) - } - - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } -} - -impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl Iterator for IntoIter { - type Item = (SafeHash, K, V); - - fn next(&mut self) -> Option<(SafeHash, K, V)> { - self.iter.next().map(|raw| { - self.table.size -= 1; - unsafe { - let (k, v) = ptr::read(raw.pair()); - (SafeHash { hash: *raw.hash() }, k, v) - } - }) - } - - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } -} - -impl ExactSizeIterator for IntoIter { - fn len(&self) -> usize { - self.iter().len() - } -} - -impl<'a, K, V> Iterator for Drain<'a, K, V> { - type Item = (SafeHash, K, V); - - #[inline] - fn next(&mut self) -> Option<(SafeHash, K, V)> { - self.iter.next().map(|raw| { - unsafe { - self.table.as_mut().size -= 1; - let (k, v) = ptr::read(raw.pair()); - (SafeHash { hash: ptr::replace(&mut *raw.hash(), EMPTY_BUCKET) }, k, v) - } - }) - } - - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } -} - -impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { - fn len(&self) -> usize { - self.iter.len() - } -} - -impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> { - fn drop(&mut self) { - self.for_each(drop); - } -} - -impl Clone for RawTable { - fn clone(&self) -> RawTable { - unsafe { - let cap = self.capacity(); - let mut new_ht = RawTable::new_uninitialized(cap); - - let mut new_buckets = new_ht.raw_bucket_at(0); - let mut buckets = self.raw_bucket_at(0); - while buckets.idx < cap { - *new_buckets.hash() = *buckets.hash(); - if *new_buckets.hash() != EMPTY_BUCKET { - let pair_ptr = buckets.pair(); - let kv = ((*pair_ptr).0.clone(), (*pair_ptr).1.clone()); - ptr::write(new_buckets.pair(), kv); - } - buckets.idx += 1; - new_buckets.idx += 1; - } - - new_ht.size = self.size(); - new_ht.set_tag(self.tag()); - - new_ht - } - } -} - -unsafe impl<#[may_dangle] K, #[may_dangle] V> Drop for RawTable { - fn drop(&mut self) { - if self.capacity() == 0 { - return; - } - - // This is done in reverse because we've likely partially taken - // some elements out with `.into_iter()` from the front. - // Check if the size is 0, so we don't do a useless scan when - // dropping empty tables such as on resize. - // Also avoid double drop of elements that have been already moved out. - unsafe { - if needs_drop::<(K, V)>() { - // avoid linear runtime for types that don't need drop - self.rev_drop_buckets(); - } - } - - let (layout, _) = calculate_layout::(self.capacity()) - .unwrap_or_else(|_| unsafe { hint::unreachable_unchecked() }); - unsafe { - Global.dealloc(NonNull::new_unchecked(self.hashes.ptr()).cast(), layout); - // Remember how everything was allocated out of one buffer - // during initialization? We only need one call to free here. - } - } -} diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index 90c8eaf0f7cbc..cf02cc55cdcb4 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -312,6 +312,7 @@ #![feature(non_exhaustive)] #![feature(alloc_layout_extra)] #![feature(maybe_uninit)] +#![feature(ptr_offset_from)] #![cfg_attr(target_env = "sgx", feature(global_asm, range_contains, slice_index_methods, decl_macro, coerce_unsized))] diff --git a/src/test/ui/issues/issue-21763.stderr b/src/test/ui/issues/issue-21763.stderr index 91db16e0f6d27..3eeb4a7cbaae6 100644 --- a/src/test/ui/issues/issue-21763.stderr +++ b/src/test/ui/issues/issue-21763.stderr @@ -4,10 +4,9 @@ error[E0277]: `std::rc::Rc<()>` cannot be sent between threads safely LL | foo::, Rc<()>>>(); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ `std::rc::Rc<()>` cannot be sent between threads safely | - = help: within `std::collections::HashMap, std::rc::Rc<()>>`, the trait `std::marker::Send` is not implemented for `std::rc::Rc<()>` + = help: within `(std::rc::Rc<()>, std::rc::Rc<()>)`, the trait `std::marker::Send` is not implemented for `std::rc::Rc<()>` = note: required because it appears within the type `(std::rc::Rc<()>, std::rc::Rc<()>)` - = note: required because it appears within the type `std::marker::PhantomData<(std::rc::Rc<()>, std::rc::Rc<()>)>` - = note: required because it appears within the type `std::collections::hash::table::RawTable, std::rc::Rc<()>>` + = note: required because of the requirements on the impl of `std::marker::Send` for `std::collections::hash::raw::RawTable<(std::rc::Rc<()>, std::rc::Rc<()>)>` = note: required because it appears within the type `std::collections::HashMap, std::rc::Rc<()>>` note: required by `foo` --> $DIR/issue-21763.rs:16:1