Skip to main content

osom_lib_hash_tables/abseil/hash_table/
abseil_hash_table.rs

1use core::borrow::Borrow;
2use core::ptr::NonNull;
3use core::{hash::Hash, marker::PhantomData, mem::ManuallyDrop, ptr::null_mut};
4
5use osom_lib_alloc::traits::Allocator as _;
6use osom_lib_macros::debug_check_or_release_hint;
7use osom_lib_primitives::{kvp::KVP, length::Length, power_of_two::PowerOfTwo32};
8use osom_lib_reprc::traits::ReprC;
9use osom_lib_try_clone::TryClone;
10
11use crate::errors::{HashTableError, TryCloneHashTableError};
12use crate::{
13    abseil::{
14        configuration::AbseilConfig,
15        hash_table::{
16            abseil_block::{AbseilBlock, CONTROL_BYTE_EMPTY},
17            abseil_layout::{ABSEIL_BLOCK_SIZE, AbseilLayout},
18            abseil_unsafe_iter::{AbseilUnsafeIter, AbseilUnsafeMutIter},
19            platform::{PlatformImpl, PlatformOps},
20        },
21        utils::probe_block_indexes,
22    },
23    helpers::{ptr_to_mut, ptr_to_ref},
24};
25
26/// The Abseil hash table.
27#[repr(C)]
28#[must_use]
29#[derive(Debug)]
30pub struct AbseilHashTable<TKey, TValue, TConfig>
31where
32    TKey: Eq + Hash,
33    TConfig: AbseilConfig,
34{
35    /// Pointer to the actual data.
36    pub(super) control_data: *mut u8,
37
38    /// Pointer to the (key, value) pairs. This is not an independent pointer.
39    /// The memory it points to is owned by control_data. This is just a
40    /// cached offset. For allocation and deallocation we use control_data only.
41    pub(super) kvp_data: *mut u8,
42
43    /// The number of elements in the table.
44    pub(super) elements_count: Length,
45
46    /// This fields indicates how many inserts can be performed
47    /// before the table needs to be resized.
48    pub(super) remaining_capacity: Length,
49
50    /// The total capacity of the table.
51    pub(super) total_capacity: PowerOfTwo32,
52
53    /// The configuration of the table.
54    pub(super) config: ManuallyDrop<TConfig>,
55
56    /// Marker for the key and value types.
57    _marker: PhantomData<KVP<TKey, TValue>>,
58}
59
60unsafe impl<TKey, TValue, TConfig> ReprC for AbseilHashTable<TKey, TValue, TConfig>
61where
62    TKey: Eq + Hash + ReprC,
63    TValue: ReprC,
64    TConfig: AbseilConfig + ReprC,
65{
66    const CHECK: () = const {
67        osom_lib_reprc::hidden::is_reprc::<*mut u8>();
68        osom_lib_reprc::hidden::is_reprc::<TConfig>();
69        osom_lib_reprc::hidden::is_reprc::<PhantomData<KVP<TKey, TValue>>>();
70    };
71}
72
73unsafe impl<TKey, TValue, TConfig> Send for AbseilHashTable<TKey, TValue, TConfig>
74where
75    TKey: Send + Eq + Hash,
76    TValue: Send,
77    TConfig: AbseilConfig + Send,
78{
79}
80
81unsafe impl<TKey, TValue, TConfig> Sync for AbseilHashTable<TKey, TValue, TConfig>
82where
83    TKey: Sync + Eq + Hash,
84    TValue: Sync,
85    TConfig: AbseilConfig + Sync,
86{
87}
88
89impl<TKey, TValue, TConfig> AbseilHashTable<TKey, TValue, TConfig>
90where
91    TKey: Eq + Hash,
92    TConfig: AbseilConfig,
93{
94    /// Creates a new [`AbseilHashTable`] with the default configuration.
95    #[inline(always)]
96    pub fn new() -> Self
97    where
98        TConfig: Default,
99    {
100        Self::with_config(TConfig::default())
101    }
102
103    /// Creates a new [`AbseilHashTable`] with the specified configuration.
104    #[inline]
105    pub const fn with_config(config: TConfig) -> Self {
106        Self {
107            control_data: null_mut(),
108            kvp_data: null_mut(),
109            elements_count: Length::ZERO,
110            remaining_capacity: Length::ZERO,
111            total_capacity: PowerOfTwo32::ZERO,
112            config: ManuallyDrop::new(config),
113            _marker: PhantomData,
114        }
115    }
116
117    /// Creates a new [`AbseilHashTable`] with the specified configuration and capacity.
118    ///
119    /// # Errors
120    ///
121    /// For details see [`HashTableError`].
122    #[inline]
123    pub fn with_capacity_and_config(capacity: Length, config: TConfig) -> Result<Self, HashTableError> {
124        let mut new_self = Self::with_config(config);
125        new_self.grow_for_size(capacity)?;
126        Ok(new_self)
127    }
128
129    /// Creates a new [`AbseilHashTable`] with the capacity and default configuration.
130    ///
131    ///
132    /// # Errors
133    ///
134    /// For details see [`HashTableError`].
135    #[inline(always)]
136    pub fn with_capacity(capacity: Length) -> Result<Self, HashTableError>
137    where
138        TConfig: Default,
139    {
140        Self::with_capacity_and_config(capacity, TConfig::default())
141    }
142
143    /// Returns the length of the [`AbseilHashTable`].
144    #[inline(always)]
145    pub const fn length(&self) -> Length {
146        self.elements_count
147    }
148
149    /// Returns the capacity of the [`AbseilHashTable`].
150    #[inline(always)]
151    pub const fn capacity(&self) -> Length {
152        unsafe {
153            let sum = self
154                .elements_count
155                .as_u32()
156                .unchecked_add(self.remaining_capacity.as_u32());
157            Length::new_unchecked(sum)
158        }
159    }
160
161    #[inline(always)]
162    pub(super) fn get_block_by_index(&self, index: usize) -> AbseilBlock<TKey, TValue> {
163        unsafe {
164            let control_block_ptr = self.control_data.cast::<[u8; ABSEIL_BLOCK_SIZE]>().add(index);
165            let key_values_ptr = self
166                .kvp_data
167                .cast::<[KVP<TKey, TValue>; ABSEIL_BLOCK_SIZE]>()
168                .add(index);
169            AbseilBlock::new(control_block_ptr, key_values_ptr)
170        }
171    }
172
173    #[inline]
174    pub(super) fn blocks_count(&self) -> PowerOfTwo32 {
175        let value = self.total_capacity.as_usize() / ABSEIL_BLOCK_SIZE;
176        debug_check_or_release_hint!(value == 0 || value.is_power_of_two());
177        unsafe { PowerOfTwo32::new_unchecked(value as u32) }
178    }
179
180    /// Allocates a new backing store (doubling capacity, or using 1 block if empty),
181    /// initialises all control bytes to EMPTY, and rehashes every live element
182    /// into the new table.  `remaining_capacity` is set to
183    /// `floor(new_total_capacity * load_factor) – elements_count`.
184    #[allow(clippy::cast_precision_loss, clippy::cast_sign_loss)]
185    pub(super) fn grow_for_size(&mut self, size: Length) -> Result<(), HashTableError> {
186        let size = size.as_usize() as u64;
187        if size >= u64::from(u32::MAX) {
188            return Err(HashTableError::LengthLimitExceeded);
189        }
190
191        let new_size = (size as f64) * 1f64 / self.config.load_factor().value();
192        if new_size >= f64::from(u32::MAX) {
193            return Err(HashTableError::LengthLimitExceeded);
194        }
195
196        let new_size = new_size as usize;
197        let blocks_capacity = unsafe { (new_size / ABSEIL_BLOCK_SIZE).unchecked_add(1) };
198        let new_blocks_capacity = blocks_capacity.next_power_of_two();
199        if new_blocks_capacity >= u32::MAX as usize / ABSEIL_BLOCK_SIZE {
200            return Err(HashTableError::LengthLimitExceeded);
201        }
202
203        let new_blocks_capacity = unsafe { PowerOfTwo32::new_unchecked(new_blocks_capacity as u32) };
204        let new_layout = AbseilLayout::<TKey, TValue>::new(new_blocks_capacity);
205
206        // Precalculate remaining_capacity
207        let elements_capacity = new_blocks_capacity.as_usize() * ABSEIL_BLOCK_SIZE;
208        let new_max_elements = elements_capacity as f64 * self.config.load_factor().value();
209        let new_max_elements = new_max_elements as u64;
210        if new_max_elements > u64::from(u32::MAX) {
211            return Err(HashTableError::LengthLimitExceeded);
212        }
213
214        let new_max_elements = new_max_elements as u32;
215
216        // Allocate new storage.
217        let new_ptr = self
218            .config
219            .allocator_mut()
220            .allocate(new_layout.total_layout())
221            .map_err(|_| HashTableError::AllocationError)?
222            .as_ptr();
223
224        // Fill initial control bytes.
225        let control_bytes = unsafe { core::slice::from_raw_parts_mut(new_ptr, new_layout.control_blocks_size()) };
226        control_bytes.fill(CONTROL_BYTE_EMPTY);
227
228        // Set new data
229        // We temporarily generate an unsafe copy of the config. But it will be discarded
230        // later on.
231        let new_config = unsafe { core::ptr::read(&raw const self.config) };
232        let mut new_self = Self {
233            control_data: new_ptr,
234            kvp_data: unsafe { new_ptr.add(new_layout.key_value_pairs_offset()) },
235            elements_count: Length::ZERO,
236            remaining_capacity: unsafe { Length::new_unchecked(new_max_elements) },
237            total_capacity: unsafe { PowerOfTwo32::new_unchecked(elements_capacity as u32) },
238            config: new_config,
239            _marker: PhantomData,
240        };
241
242        // Rehash data if needed
243        for kvp in AbseilUnsafeMutIter::from_hash_table(self) {
244            let kvp = unsafe { kvp.read() };
245            let (key, value) = kvp.unpack();
246            unsafe { new_self.insert_without_conflict_and_resize(key, value) };
247        }
248
249        // Drop old memory. We don't need to loop through items though, they have been moved.
250        // So just deallocate memory.
251        self.deconstruct_buffer();
252
253        // Swap self with new_self, and forget the previous value
254        core::mem::swap(self, &mut new_self);
255        core::mem::forget(new_self);
256        Ok(())
257    }
258
259    #[inline(always)]
260    fn deconstruct_buffer(&mut self) {
261        if self.control_data.is_null() {
262            return;
263        }
264        let data = unsafe { NonNull::new_unchecked(self.control_data) };
265        let layout = AbseilLayout::<TKey, TValue>::new(self.blocks_count());
266        unsafe {
267            self.config.allocator_mut().deallocate(data, layout.total_layout());
268        }
269    }
270
271    #[inline]
272    fn deconstruct_buffer_and_drop_data(&mut self) {
273        if self.control_data.is_null() {
274            return;
275        }
276
277        if core::mem::needs_drop::<TKey>() || core::mem::needs_drop::<TValue>() {
278            for kvp in AbseilUnsafeMutIter::from_hash_table(self) {
279                let (key, value) = ptr_to_mut!(kvp).as_mut_tuple();
280                unsafe {
281                    core::ptr::drop_in_place(key);
282                    core::ptr::drop_in_place(value);
283                }
284            }
285        }
286
287        self.deconstruct_buffer();
288    }
289
290    /// This function inserts (key, value) pair. It is an optimized variant of insert,
291    /// where we know for sure that `key` is not present, that resize won't happen,
292    /// and that table does not contain tombstones. This should be used on rehashing
293    /// or cloning only.
294    unsafe fn insert_without_conflict_and_resize(&mut self, key: TKey, value: TValue) {
295        let (h1, h2) = self.config.calculate_partial_hashes(&key);
296        let blocks_count = self.blocks_count();
297
298        for group_index in probe_block_indexes(h1, blocks_count) {
299            let block = self.get_block_by_index(group_index);
300            let control_bytes = ptr_to_mut!(block.control_block_ptr());
301            let mut scan_result = PlatformImpl::empty_scan(control_bytes);
302
303            if let Some(empty_idx) = scan_result.next() {
304                // Empty slot proves the key is absent. Prefer tombstone (reuse deleted slot)
305                // over the empty slot when available.
306                self.remaining_capacity =
307                    unsafe { Length::new_unchecked(self.remaining_capacity.as_u32().unchecked_sub(1)) };
308                let (target_group, target_slot) = (group_index, empty_idx);
309
310                let target_block = self.get_block_by_index(target_group);
311                let target_ctrl = ptr_to_mut!(target_block.control_block_ptr());
312                let target_kvp_ptr = target_block.key_value_pair_at_index(target_slot);
313
314                unsafe {
315                    *target_ctrl.get_unchecked_mut(target_slot) = h2;
316                    target_kvp_ptr.write(KVP { key, value });
317                };
318
319                unsafe {
320                    self.elements_count = Length::new_unchecked(self.elements_count.as_u32().unchecked_add(1));
321                }
322
323                return;
324            }
325        }
326
327        // With correct remaining_capacity management there is always at least one
328        // empty slot in the table, so this path is unreachable.
329        unreachable!("no empty slot found despite remaining_capacity > 0")
330    }
331
332    pub(super) fn get_key_value_raw<Q>(&self, key: &Q) -> Option<*mut KVP<TKey, TValue>>
333    where
334        TKey: Borrow<Q>,
335        Q: Eq + Hash + ?Sized,
336    {
337        let blocks_count = self.blocks_count();
338        let (h1, h2) = self.config.calculate_partial_hashes(key);
339
340        for group_index in probe_block_indexes(h1, blocks_count) {
341            let block = self.get_block_by_index(group_index);
342            let control_bytes = ptr_to_ref!(block.control_block_ptr());
343            let mut scan_result = PlatformImpl::matching_block_scan(control_bytes, h2);
344            for matching_index in scan_result.matching_indexes {
345                let kvp = block.key_value_pair_at_index(matching_index);
346                let kvp_ref = ptr_to_ref!(kvp);
347                if kvp_ref.key.borrow() == key {
348                    return Some(kvp);
349                }
350            }
351
352            if scan_result.empty_buckets.next().is_some() {
353                return None;
354            }
355        }
356
357        None
358    }
359}
360
361impl<TKey, TValue, TConfig> Default for AbseilHashTable<TKey, TValue, TConfig>
362where
363    TKey: Eq + Hash,
364    TConfig: AbseilConfig + Default,
365{
366    fn default() -> Self {
367        Self::new()
368    }
369}
370
371impl<TKey, TValue, TConfig> TryClone for AbseilHashTable<TKey, TValue, TConfig>
372where
373    TKey: Eq + Hash + TryClone,
374    TValue: TryClone,
375    TConfig: AbseilConfig + TryClone,
376{
377    type Error = TryCloneHashTableError;
378
379    fn try_clone(&self) -> Result<Self, Self::Error> {
380        let new_config = self
381            .config
382            .try_clone()
383            .map_err(|_| HashTableError::AllocatorCloningError)?;
384        let mut new_hashtable = Self::with_capacity_and_config(self.elements_count, new_config)?;
385        for kvp in AbseilUnsafeIter::from_hash_table(self) {
386            let kvp = ptr_to_ref!(kvp);
387            let key_clone = kvp
388                .key
389                .try_clone()
390                .map_err(|_| TryCloneHashTableError::KeyOrValueError)?;
391            let value_clone = kvp
392                .value
393                .try_clone()
394                .map_err(|_| TryCloneHashTableError::KeyOrValueError)?;
395            unsafe { new_hashtable.insert_without_conflict_and_resize(key_clone, value_clone) };
396        }
397        Ok(new_hashtable)
398    }
399}
400
401impl<TKey, TValue, TConfig> Clone for AbseilHashTable<TKey, TValue, TConfig>
402where
403    TKey: Eq + Hash + TryClone,
404    TValue: TryClone,
405    TConfig: AbseilConfig + TryClone,
406{
407    fn clone(&self) -> Self {
408        self.try_clone().expect("[AbseilHashTable::try_clone] failure")
409    }
410}
411
412impl<TKey, TValue, TConfig> PartialEq for AbseilHashTable<TKey, TValue, TConfig>
413where
414    TKey: Eq + Hash,
415    TValue: PartialEq,
416    TConfig: AbseilConfig,
417{
418    fn eq(&self, other: &Self) -> bool {
419        use crate::traits::ImmutableHashTable;
420        if self.length() != other.length() {
421            return false;
422        }
423
424        for kvp in self.iter() {
425            let key = kvp.key;
426            let Some(other_value) = other.get(key) else {
427                return false;
428            };
429            let value = kvp.value;
430            if other_value != value {
431                return false;
432            }
433        }
434        true
435    }
436}
437
438impl<TKey, TValue, TConfig> Eq for AbseilHashTable<TKey, TValue, TConfig>
439where
440    TKey: Eq + Hash,
441    TValue: Eq,
442    TConfig: AbseilConfig,
443{
444}
445
446impl<TKey, TValue, TConfig> core::hash::Hash for AbseilHashTable<TKey, TValue, TConfig>
447where
448    TKey: Eq + Hash,
449    TValue: Hash,
450    TConfig: AbseilConfig,
451{
452    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
453        use crate::traits::ImmutableHashTable;
454        use core::hash::Hasher;
455        use osom_lib_hashes::siphash::SipHashBuilder;
456
457        // We need to ensure that calculating the hash does not depend on the order of iteration,
458        // which is not guaranteed at all.
459        //
460        // Therefore, we first calculate a temporary hash, and then add all of those hashes
461        // together. We take advantage of the fact that addition is commutative and associative.
462        let sip_hash_builder = SipHashBuilder::with_keys(0, u64::from(self.length().as_u32()));
463        let mut result = 0u64;
464        for kvp in self.iter() {
465            let mut sip_hash = sip_hash_builder.create_hasher();
466            kvp.key.hash(&mut sip_hash);
467            kvp.value.hash(&mut sip_hash);
468            result = result.wrapping_add(sip_hash.finish());
469        }
470        state.write_u64(result);
471    }
472}
473
474impl<TKey, TValue, TConfig> Drop for AbseilHashTable<TKey, TValue, TConfig>
475where
476    TKey: Eq + Hash,
477    TConfig: AbseilConfig,
478{
479    fn drop(&mut self) {
480        self.deconstruct_buffer_and_drop_data();
481        unsafe {
482            ManuallyDrop::drop(&mut self.config);
483        }
484    }
485}