Skip to main content

osom_lib_hash_tables/bytell/hash_table/
bytell_hash_table.rs

1use core::{
2    alloc::Layout,
3    borrow::Borrow,
4    hash::{BuildHasher, Hash},
5    marker::PhantomData,
6    mem::ManuallyDrop,
7    ops::Deref,
8    ptr::{self, NonNull},
9};
10
11#[allow(unused_imports)]
12use core::fmt::Debug;
13
14use osom_lib_alloc::traits::Allocator;
15use osom_lib_macros::debug_check_or_release_hint;
16use osom_lib_primitives::{kvp::KVP, length::Length, power_of_two::PowerOfTwo32};
17use osom_lib_reprc::traits::ReprC;
18use osom_lib_try_clone::TryClone;
19
20use crate::{
21    bytell::{
22        configuration::BytellConfig,
23        constants::JUMP_DISTANCES,
24        hash_table::{block_layout::BlockLayoutHolder, control_byte::ControlByte, entry::Entry},
25        hash_to_index::HashToIndex,
26    },
27    errors::{HashTableError, TryCloneHashTableError},
28    helpers::{ptr_to_mut, ptr_to_ref},
29    traits::{ImmutableHashTable, MutableHashTable},
30};
31
32/// The bytell hash table.
33#[repr(C)]
34#[must_use]
35#[derive(Debug)]
36pub struct BytellHashTable<TKey, TValue, TConfig>
37where
38    TKey: Eq + Hash,
39    TConfig: BytellConfig,
40{
41    pub(super) data: *mut u8,
42    pub(super) elements_count: Length,
43    pub(super) blocks_count: PowerOfTwo32,
44    pub(super) config: ManuallyDrop<TConfig>,
45    _marker: PhantomData<KVP<TKey, TValue>>,
46}
47
48unsafe impl<TKey, TValue, TConfig> ReprC for BytellHashTable<TKey, TValue, TConfig>
49where
50    TKey: Eq + Hash + ReprC,
51    TValue: ReprC,
52    TConfig: BytellConfig + ReprC,
53{
54    const CHECK: () = const {
55        osom_lib_reprc::hidden::is_reprc::<*mut u8>();
56        osom_lib_reprc::hidden::is_reprc::<Length>();
57        osom_lib_reprc::hidden::is_reprc::<PowerOfTwo32>();
58        osom_lib_reprc::hidden::is_reprc::<TConfig>();
59        osom_lib_reprc::hidden::is_reprc::<PhantomData<KVP<TKey, TValue>>>();
60    };
61}
62
63unsafe impl<TKey, TValue, TConfig> Send for BytellHashTable<TKey, TValue, TConfig>
64where
65    TKey: Send + Eq + Hash,
66    TValue: Send,
67    TConfig: BytellConfig + Send,
68{
69}
70
71unsafe impl<TKey, TValue, TConfig> Sync for BytellHashTable<TKey, TValue, TConfig>
72where
73    TKey: Sync + Eq + Hash,
74    TValue: Sync,
75    TConfig: BytellConfig + Sync,
76{
77}
78
79impl<TKey, TValue, TConfig> BytellHashTable<TKey, TValue, TConfig>
80where
81    TKey: Eq + Hash,
82    TConfig: BytellConfig,
83{
84    /// Creates a new [`BytellHashTable`] with the default configuration.
85    #[inline(always)]
86    pub fn new() -> Self
87    where
88        TConfig: Default,
89    {
90        Self::with_config(TConfig::default())
91    }
92
93    /// Creates a new [`BytellHashTable`] with the specified configuration.
94    #[inline]
95    pub const fn with_config(config: TConfig) -> Self {
96        Self {
97            data: ptr::null_mut(),
98            elements_count: Length::ZERO,
99            blocks_count: PowerOfTwo32::ZERO,
100            config: ManuallyDrop::new(config),
101            _marker: PhantomData,
102        }
103    }
104
105    /// Creates a new [`BytellHashTable`] with the specified capacity and the default configuration.
106    ///
107    /// # Errors
108    ///
109    /// For details see [`HashTableError`].
110    #[inline(always)]
111    pub fn with_capacity(capacity: Length) -> Result<Self, HashTableError>
112    where
113        TConfig: Default,
114    {
115        Self::with_capacity_and_config(capacity, TConfig::default())
116    }
117
118    /// Creates a new [`BytellHashTable`] expected to support passed `number_of_items`.
119    ///
120    /// # Notes
121    ///
122    /// This method will almost surely overallocate, since it takes `config.load_factor()`
123    /// into account.
124    ///
125    /// # Errors
126    ///
127    /// For details see [`HashTableError`].
128    pub fn with_capacity_and_config(capacity: Length, config: TConfig) -> Result<Self, HashTableError> {
129        let number_of_items = capacity.as_u32();
130        let block_layout = BlockLayoutHolder::<TKey, TValue>::LAYOUT;
131        let block_capacity = block_layout.block_capacity().value();
132        debug_check_or_release_hint!(block_capacity >= 4, "block_capacity is less than 4");
133        let block_binary_layout = block_layout.layout();
134        let block_size = block_binary_layout.size() as u64;
135        debug_check_or_release_hint!(
136            u32::try_from(block_size).is_ok(),
137            "block_size expected to be at most u32::MAX"
138        );
139
140        #[allow(clippy::cast_sign_loss)]
141        let expected_capacity = ((f64::from(number_of_items)) / config.load_factor().value()) as u64 + 1;
142
143        if expected_capacity >= u64::from(u32::MAX) {
144            return Err(HashTableError::LengthLimitExceeded);
145        }
146
147        let expected_capacity = expected_capacity as u32;
148        let expected_number_of_buckets = (expected_capacity / block_capacity) + 1;
149        let number_of_buckets = expected_number_of_buckets.next_power_of_two();
150        let block_count = unsafe { PowerOfTwo32::new_unchecked(number_of_buckets) };
151        Self::with_block_count_and_config(block_count, config)
152    }
153
154    /// Reduces the memory of the current table, if possible.
155    ///
156    /// # Notes
157    ///
158    /// If the number of underlying blocks cannot be reduced, it
159    /// does nothing.
160    ///
161    /// Otherwise it tries to reduce the number of blocks to the
162    /// minimal possible, creates a new table with that layout,
163    /// and moves (rehases) items into it. Then overwrites `self`
164    /// with the new table.
165    ///
166    /// # Errors
167    ///
168    /// For details see [`HashTableError`].
169    pub fn shrink_to_fit(&mut self) -> Result<(), HashTableError> {
170        let current_size = self.elements_count;
171        let mut new_blocks_count = unsafe { PowerOfTwo32::new_unchecked(current_size.as_u32().next_power_of_two()) };
172        if Self::static_should_grow(current_size, new_blocks_count, &self.config) {
173            new_blocks_count = new_blocks_count.next();
174        }
175
176        if new_blocks_count == self.blocks_count {
177            return Ok(());
178        }
179
180        // We temporarily generate an unsafe copy of the config. But it will be discarded
181
182        let new_config = unsafe { core::ptr::read(self.config.deref()) };
183        let mut new_table = Self::with_block_count_and_config(new_blocks_count, new_config)?;
184        self.move_content_to(&mut new_table);
185        self.deallocate_buffer();
186        core::mem::swap(self, &mut new_table);
187        core::mem::forget(new_table);
188        Ok(())
189    }
190
191    fn with_block_count_and_config(block_count: PowerOfTwo32, config: TConfig) -> Result<Self, HashTableError> {
192        let mut new_table = Self::with_config(config);
193        if block_count == PowerOfTwo32::ZERO {
194            return Ok(new_table);
195        }
196
197        let block_capacity = BlockLayoutHolder::<TKey, TValue>::LAYOUT.block_capacity().as_usize();
198        let block_layout = BlockLayoutHolder::<TKey, TValue>::LAYOUT.layout();
199        let total_size = block_layout.size() * block_count.as_usize();
200        let layout = unsafe { Layout::from_size_align_unchecked(total_size, block_layout.align()) };
201        let ptr = new_table
202            .config
203            .allocator_mut()
204            .allocate(layout)
205            .map_err(|_| HashTableError::AllocationError)?;
206        new_table.data = ptr.as_ptr();
207        new_table.blocks_count = block_count;
208        let new_capacity = new_table.capacity();
209        new_table
210            .config
211            .hash_to_index_mut()
212            .update_for_new_table_capacity(new_capacity);
213
214        // Initialize all control bytes to EMPTY
215        let mut block_ptr = new_table.data;
216        for _ in 0..new_table.blocks_count.value() {
217            unsafe {
218                block_ptr.write_bytes(ControlByte::EMPTY.binary_value(), block_capacity);
219                block_ptr = block_ptr.add(block_layout.size());
220            }
221        }
222
223        Ok(new_table)
224    }
225
226    /// Returns the length of the [`BytellHashTable`].
227    #[inline(always)]
228    pub const fn length(&self) -> Length {
229        self.elements_count
230    }
231
232    /// Returns the capacity of the [`BytellHashTable`].
233    #[inline(always)]
234    pub const fn capacity(&self) -> PowerOfTwo32 {
235        let block_capacity = BlockLayoutHolder::<TKey, TValue>::LAYOUT.block_capacity().value();
236        let result = self.blocks_count.value() * block_capacity;
237        debug_check_or_release_hint!(result == 0 || result.is_power_of_two(), "result is not a power of two");
238        unsafe { PowerOfTwo32::new_unchecked(result) }
239    }
240
241    #[inline(always)]
242    pub(super) const unsafe fn get_entry_by_index(&self, index: usize) -> Entry<TKey, TValue> {
243        Entry::new(self.data, self.blocks_count, index)
244    }
245
246    #[inline(always)]
247    pub(super) fn should_grow(&self) -> bool {
248        Self::static_should_grow(self.elements_count, self.capacity(), &self.config)
249    }
250
251    #[inline]
252    fn static_should_grow(current_size: Length, current_capacity: PowerOfTwo32, config: &TConfig) -> bool {
253        #[allow(clippy::cast_sign_loss)]
254        {
255            let capacity = f64::from(current_capacity.value());
256            let threshold = config.load_factor().value() * capacity;
257            f64::from(current_size.as_u32() + 1) > threshold
258        }
259    }
260
261    #[allow(clippy::used_underscore_binding)]
262    pub(super) fn grow(&mut self) -> Result<(), HashTableError> {
263        let old_block_count = self.blocks_count.as_usize();
264        let new_block_count = (old_block_count + 1).next_power_of_two();
265        debug_check_or_release_hint!(new_block_count < u32::MAX as usize, "Too many blocks");
266        let new_block_count = unsafe { PowerOfTwo32::new_unchecked(new_block_count as u32) };
267        let new_config = unsafe { core::ptr::read(self.config.deref()) };
268        let mut new_table = Self::with_block_count_and_config(new_block_count, new_config)?;
269        self.move_content_to(&mut new_table);
270        debug_assert!(self.elements_count == Length::ZERO, "self is not empty");
271        debug_assert!(!new_table.data.is_null(), "data is null");
272        self.deallocate_buffer();
273        core::mem::swap(self, &mut new_table);
274        core::mem::forget(new_table);
275        Ok(())
276    }
277
278    pub(super) unsafe fn get_entry_by_key<Q>(&self, key: &Q) -> Entry<TKey, TValue>
279    where
280        TKey: Borrow<Q>,
281        Q: Eq + Hash + ?Sized,
282    {
283        let hash_value = self.config.build_hasher().hash_one(key);
284        let index = self.config.hash_to_index().hash_to_index(hash_value, self.capacity());
285        unsafe { self.get_entry_by_index(index) }
286    }
287
288    pub(super) fn search_for_free_entry(
289        &self,
290        current_entry: &Entry<TKey, TValue>,
291    ) -> Option<(Entry<TKey, TValue>, u8)> {
292        debug_check_or_release_hint!(JUMP_DISTANCES.len() < u8::MAX as usize);
293        let current_entry_index = current_entry.element_index() as usize;
294        let capacity = self.capacity().value() as usize;
295        debug_check_or_release_hint!(capacity > 0, "capacity is zero");
296        for (index, jmp_distance) in JUMP_DISTANCES.iter().enumerate().skip(1) {
297            let real_offset = jmp_distance.wrapping_add(current_entry_index) & (capacity - 1);
298            let entry = Entry::<TKey, TValue>::new(self.data, self.blocks_count, real_offset);
299            unsafe {
300                if *entry.control_byte() == ControlByte::EMPTY {
301                    return Some((entry, index as u8));
302                }
303            }
304        }
305
306        None
307    }
308
309    pub(super) unsafe fn find_parent_for_storage_entry(&self, entry: &Entry<TKey, TValue>) -> Entry<TKey, TValue> {
310        unsafe {
311            debug_check_or_release_hint!(
312                !ptr_to_ref!(entry.control_byte()).is_direct_hit(),
313                "find_parent_for_storage_entry expects storage entry, got direct hit"
314            );
315            let key = &ptr_to_ref!(entry.kvp()).key;
316            let mut current = self.get_entry_by_key(key);
317            debug_check_or_release_hint!(
318                ptr_to_ref!(current.control_byte()).is_direct_hit(),
319                "get_entry_by_key did not return direct hit"
320            );
321            debug_check_or_release_hint!(&current != entry, "current == entry should not happen");
322
323            loop {
324                let next_link = current.next_link();
325                debug_check_or_release_hint!(
326                    next_link.is_some(),
327                    "next_link is None, that shouldn't have happened, but it did"
328                );
329                let next = next_link.unwrap_unchecked();
330                if &next == entry {
331                    return current;
332                }
333                current = next;
334            }
335        }
336    }
337
338    pub(super) fn move_content_to(&mut self, other: &mut Self) {
339        debug_check_or_release_hint!(self.data != other.data);
340
341        unsafe {
342            let capacity = self.capacity().as_usize();
343            let mut el_idx = 0;
344            let mut remaining_items = self.elements_count.as_u32();
345            while el_idx < capacity {
346                if remaining_items == 0 {
347                    break;
348                }
349
350                let entry = Entry::<TKey, TValue>::new(self.data, self.blocks_count, el_idx);
351                el_idx += 1;
352
353                let control_byte = ptr_to_mut!(entry.control_byte());
354                if !control_byte.contains_data() {
355                    continue;
356                }
357                *control_byte = ControlByte::EMPTY;
358
359                let (key, value) = entry.kvp().read().unpack();
360
361                other.insert_or_update_with(key, || value, |_| panic!("Update should not happen"));
362
363                remaining_items -= 1;
364            }
365            debug_check_or_release_hint!(remaining_items == 0, "Moved less items than was supposed to");
366            self.elements_count = Length::ZERO;
367        }
368    }
369
370    pub(super) fn clone_content_to(&self, other: &mut Self) -> Result<(), TryCloneHashTableError>
371    where
372        TKey: TryClone,
373        TValue: TryClone,
374    {
375        let capacity = self.capacity().as_usize();
376        let mut el_idx = 0;
377        while el_idx < capacity {
378            let entry = Entry::<TKey, TValue>::new(self.data, self.blocks_count, el_idx);
379            el_idx += 1;
380
381            let control_byte = ptr_to_ref!(entry.control_byte());
382            if !control_byte.contains_data() {
383                continue;
384            }
385
386            let kvp = ptr_to_ref!(entry.kvp());
387            let key = kvp
388                .key
389                .try_clone()
390                .map_err(|_| TryCloneHashTableError::KeyOrValueError)?;
391            let value = kvp
392                .value
393                .try_clone()
394                .map_err(|_| TryCloneHashTableError::KeyOrValueError)?;
395            other.insert(key, value);
396        }
397        Ok(())
398    }
399
400    pub(super) unsafe fn get_key_value_raw<Q>(&self, key: &Q) -> Option<*mut KVP<TKey, TValue>>
401    where
402        TKey: Borrow<Q>,
403        Q: Eq + Hash + ?Sized,
404    {
405        if self.blocks_count.value() == 0 {
406            return None;
407        }
408
409        unsafe {
410            let mut entry = self.get_entry_by_key(key);
411            let control_byte = ptr_to_ref!(entry.control_byte());
412            if !control_byte.is_direct_hit() {
413                return None;
414            }
415
416            loop {
417                let kvp_ptr = entry.kvp();
418                let kvp = ptr_to_ref!(entry.kvp());
419                if kvp.key.borrow() == key {
420                    return Some(kvp_ptr);
421                }
422                if let Some(next) = entry.next_link() {
423                    entry = next;
424                } else {
425                    return None;
426                }
427            }
428        }
429    }
430
431    fn deallocate_buffer(&mut self) {
432        unsafe {
433            let blocks_count = self.blocks_count.as_usize();
434            if blocks_count > 0 {
435                let block_size = BlockLayoutHolder::<TKey, TValue>::LAYOUT.layout().size();
436                let block_align = BlockLayoutHolder::<TKey, TValue>::LAYOUT.layout().align();
437                let layout = Layout::from_size_align_unchecked(blocks_count * block_size, block_align);
438                self.config
439                    .allocator_mut()
440                    .deallocate(NonNull::new_unchecked(self.data), layout);
441            }
442        }
443    }
444}
445
446impl<TKey, TValue, TConfig> Default for BytellHashTable<TKey, TValue, TConfig>
447where
448    TKey: Eq + Hash,
449    TConfig: BytellConfig + Default,
450{
451    fn default() -> Self {
452        Self::new()
453    }
454}
455
456impl<TKey, TValue, TConfig> TryClone for BytellHashTable<TKey, TValue, TConfig>
457where
458    TKey: Eq + Hash + TryClone,
459    TValue: TryClone,
460    TConfig: BytellConfig + TryClone,
461{
462    type Error = TryCloneHashTableError;
463
464    fn try_clone(&self) -> Result<Self, Self::Error> {
465        let new_config = self
466            .config
467            .try_clone()
468            .map_err(|_| HashTableError::AllocatorCloningError)?;
469        let mut new_table = Self::with_capacity_and_config(self.elements_count, new_config)?;
470        self.clone_content_to(&mut new_table)?;
471        Ok(new_table)
472    }
473}
474
475impl<TKey, TValue, TConfig> Clone for BytellHashTable<TKey, TValue, TConfig>
476where
477    TKey: Eq + Hash + TryClone,
478    TValue: TryClone,
479    TConfig: BytellConfig + TryClone,
480{
481    fn clone(&self) -> Self {
482        self.try_clone().expect("[BytellHashTable::try_clone] failure")
483    }
484}
485
486impl<TKey, TValue, TConfig> PartialEq for BytellHashTable<TKey, TValue, TConfig>
487where
488    TKey: Eq + Hash,
489    TValue: PartialEq,
490    TConfig: BytellConfig,
491{
492    fn eq(&self, other: &Self) -> bool {
493        use crate::traits::ImmutableHashTable;
494        if self.length() != other.length() {
495            return false;
496        }
497        for kvp in self.iter() {
498            let Some(other_value) = other.get(kvp.key) else {
499                return false;
500            };
501            if other_value != kvp.value {
502                return false;
503            }
504        }
505        true
506    }
507}
508
509impl<TKey, TValue, TConfig> Eq for BytellHashTable<TKey, TValue, TConfig>
510where
511    TKey: Eq + Hash,
512    TValue: Eq,
513    TConfig: BytellConfig,
514{
515}
516
517impl<TKey, TValue, TConfig> core::hash::Hash for BytellHashTable<TKey, TValue, TConfig>
518where
519    TKey: Eq + Hash,
520    TValue: Hash,
521    TConfig: BytellConfig,
522{
523    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
524        use core::hash::Hasher;
525        use osom_lib_hashes::siphash::SipHashBuilder;
526
527        // We need to ensure that calculating the hash does not depend on the order of iteration,
528        // which is not guaranteed at all.
529        //
530        // Therefore, we first calculate a temporary hash, and then add all of those hashes
531        // together. We take advantage of the fact that addition is commutative and associative.
532        let sip_hash_builder = SipHashBuilder::with_keys(0, u64::from(self.length().as_u32()));
533        let mut result = 0u64;
534        for kvp in self.iter() {
535            let mut sip_hash = sip_hash_builder.create_hasher();
536            kvp.key.hash(&mut sip_hash);
537            kvp.value.hash(&mut sip_hash);
538            result = result.wrapping_add(sip_hash.finish());
539        }
540        state.write_u64(result);
541    }
542}
543
544impl<TKey, TValue, TConfig> Drop for BytellHashTable<TKey, TValue, TConfig>
545where
546    TKey: Eq + Hash,
547    TConfig: BytellConfig,
548{
549    fn drop(&mut self) {
550        unsafe {
551            if core::mem::needs_drop::<TKey>() || core::mem::needs_drop::<TValue>() {
552                let capacity = self.capacity().as_usize();
553                let mut el_idx = 0;
554                let mut remaining_items = self.elements_count.as_u32();
555
556                while el_idx < capacity {
557                    if remaining_items == 0 {
558                        break;
559                    }
560                    let entry = self.get_entry_by_index(el_idx);
561                    el_idx += 1;
562                    if !(*entry.control_byte()).contains_data() {
563                        continue;
564                    }
565                    let kvp = ptr_to_mut!(entry.kvp());
566                    core::ptr::drop_in_place(&raw mut kvp.key);
567                    core::ptr::drop_in_place(&raw mut kvp.value);
568                    remaining_items -= 1;
569                }
570            }
571
572            self.deallocate_buffer();
573
574            ManuallyDrop::drop(&mut self.config);
575        }
576    }
577}