Skip to main content

osom_lib_hash_tables/bytell/hash_table/
bytell_hash_table.rs

1use core::{
2    alloc::Layout,
3    borrow::Borrow,
4    hash::{BuildHasher, Hash},
5    marker::PhantomData,
6    mem::ManuallyDrop,
7    ops::Deref,
8    ptr::{self, NonNull},
9};
10
11#[allow(unused_imports)]
12use core::fmt::Debug;
13
14use osom_lib_alloc::traits::Allocator;
15use osom_lib_macros::debug_check_or_release_hint;
16use osom_lib_primitives::{length::Length, power_of_two::PowerOfTwo32};
17use osom_lib_reprc::traits::ReprC;
18
19use crate::{
20    bytell::{
21        configuration::BytellConfig,
22        constants::JUMP_DISTANCES,
23        errors::BytellError,
24        hash_table::{block_layout::BlockLayoutHolder, control_byte::ControlByte, entry::Entry},
25        hash_to_index::HashToIndex,
26    },
27    helpers::{KVP, ptr_to_mut, ptr_to_ref},
28    traits::{ImmutableHashTable, MutableHashTable},
29};
30
31#[doc = include_str!("../README.md")]
32#[repr(C)]
33#[must_use]
34pub struct BytellHashTable<TKey, TValue, TConfig>
35where
36    TKey: Eq + Hash,
37    TConfig: BytellConfig,
38{
39    pub(super) data: *mut u8,
40    pub(super) elements_count: Length,
41    pub(super) blocks_count: PowerOfTwo32,
42    pub(super) config: ManuallyDrop<TConfig>,
43    _marker: PhantomData<KVP<TKey, TValue>>,
44}
45
46unsafe impl<TKey, TValue, TConfig> ReprC for BytellHashTable<TKey, TValue, TConfig>
47where
48    TKey: Eq + Hash + ReprC,
49    TValue: ReprC,
50    TConfig: BytellConfig + ReprC,
51{
52    const CHECK: () = const {
53        let () = <*mut u8 as ReprC>::CHECK;
54        let () = <Length as ReprC>::CHECK;
55        let () = <PowerOfTwo32 as ReprC>::CHECK;
56        let () = <TConfig as ReprC>::CHECK;
57        let () = <PhantomData<KVP<TKey, TValue>> as ReprC>::CHECK;
58    };
59}
60
61unsafe impl<TKey, TValue, TConfig> Send for BytellHashTable<TKey, TValue, TConfig>
62where
63    TKey: Send + Eq + Hash,
64    TValue: Send,
65    TConfig: BytellConfig,
66{
67}
68
69unsafe impl<TKey, TValue, TConfig> Sync for BytellHashTable<TKey, TValue, TConfig>
70where
71    TKey: Sync + Eq + Hash,
72    TValue: Sync,
73    TConfig: BytellConfig,
74{
75}
76
77impl<TKey, TValue, TConfig> BytellHashTable<TKey, TValue, TConfig>
78where
79    TKey: Eq + Hash,
80    TConfig: BytellConfig,
81{
82    /// Creates a new [`BytellHashTable`] with the default configuration.
83    #[inline(always)]
84    pub fn new() -> Self {
85        Self::with_config(TConfig::default())
86    }
87
88    /// Creates a new [`BytellHashTable`] with the specified configuration.
89    #[inline]
90    pub fn with_config(config: TConfig) -> Self {
91        Self {
92            data: ptr::null_mut(),
93            elements_count: Length::ZERO,
94            blocks_count: PowerOfTwo32::ZERO,
95            config: ManuallyDrop::new(config),
96            _marker: PhantomData,
97        }
98    }
99
100    /// Creates a new [`BytellHashTable`] with the specified capacity and the default configuration.
101    ///
102    /// # Errors
103    ///
104    /// Returns [`BytellError::AllocationError`] if the allocation fails.
105    ///
106    /// # Panics
107    ///
108    /// If the expected_capacity exceeds [`u32::MAX`]
109    #[inline(always)]
110    pub fn with_capacity(number_of_items: u32) -> Result<Self, BytellError> {
111        Self::with_capacity_and_config(number_of_items, TConfig::default())
112    }
113
114    /// Creates a new [`BytellHashTable`] expected to support passed `number_of_items`.
115    ///
116    /// # Notes
117    ///
118    /// This method will almost surely overallocate, since it takes `config.load_factor()`
119    /// into account.
120    ///
121    /// # Errors
122    ///
123    /// Returns [`BytellError::AllocationError`] if the allocation fails.
124    ///
125    /// # Panics
126    ///
127    /// If the expected_capacity exceeds [`u32::MAX`].
128    pub fn with_capacity_and_config(number_of_items: u32, config: TConfig) -> Result<Self, BytellError> {
129        let block_layout = BlockLayoutHolder::<TKey, TValue>::LAYOUT;
130        let block_capacity = block_layout.block_capacity().value();
131        debug_check_or_release_hint!(block_capacity >= 4, "block_capacity is less than 4");
132        let block_binary_layout = block_layout.layout();
133        let block_size = block_binary_layout.size() as u64;
134        debug_check_or_release_hint!(
135            u32::try_from(block_size).is_ok(),
136            "block_size expected to be at most u32::MAX"
137        );
138
139        #[allow(clippy::cast_sign_loss)]
140        let expected_capacity = ((f64::from(number_of_items)) / config.load_factor().value()) as u64 + 1;
141
142        assert!(
143            expected_capacity < u64::from(u32::MAX),
144            "The number_of_items is too big"
145        );
146        let expected_capacity = expected_capacity as u32;
147        let expected_number_of_buckets = (expected_capacity / block_capacity) + 1;
148        let number_of_buckets = expected_number_of_buckets.next_power_of_two();
149        let block_count = unsafe { PowerOfTwo32::new_unchecked(number_of_buckets) };
150        Self::with_block_count_and_config(block_count, config)
151    }
152
153    /// Reduces the memory of the current table, if possible.
154    ///
155    /// # Notes
156    ///
157    /// If the number of underlying blocks cannot be reduced, it
158    /// does nothing.
159    ///
160    /// Otherwise it tries to reduce the number of blocks to the
161    /// minimal possible, creates a new table with that layout,
162    /// and moves (rehases) items into it. Then overwrites `self`
163    /// with the new table.
164    ///
165    /// # Errors
166    ///
167    /// Returns [`BytellError::AllocationError`] if it cannot allocate
168    /// memory for the new hidden table.
169    pub fn shrink_to_fit(&mut self) -> Result<(), BytellError> {
170        let current_size = self.elements_count;
171        let mut new_blocks_count = unsafe { PowerOfTwo32::new_unchecked(current_size.as_u32().next_power_of_two()) };
172        if Self::static_should_grow(current_size, new_blocks_count, &self.config) {
173            new_blocks_count = new_blocks_count.next();
174        }
175
176        if new_blocks_count == self.blocks_count {
177            return Ok(());
178        }
179
180        let mut new_table = Self::with_block_count_and_config(new_blocks_count, self.config.deref().clone())?;
181        self.move_content_to(&mut new_table);
182        core::mem::swap(self, &mut new_table);
183        Ok(())
184    }
185
186    fn with_block_count_and_config(block_count: PowerOfTwo32, config: TConfig) -> Result<Self, BytellError> {
187        let mut new_table = Self::with_config(config);
188        if block_count == PowerOfTwo32::ZERO {
189            return Ok(new_table);
190        }
191
192        let block_capacity = BlockLayoutHolder::<TKey, TValue>::LAYOUT.block_capacity().as_usize();
193        let block_layout = BlockLayoutHolder::<TKey, TValue>::LAYOUT.layout();
194        let total_size = block_layout.size() * block_count.as_usize();
195        let layout = unsafe { Layout::from_size_align_unchecked(total_size, block_layout.align()) };
196        let ptr = new_table.config.allocator().allocate(layout).map_err(Into::into)?;
197        new_table.data = ptr.as_ptr();
198        new_table.blocks_count = block_count;
199        let new_capacity = new_table.capacity();
200        new_table
201            .config
202            .hash_to_index_mut()
203            .update_for_new_table_capacity(new_capacity);
204
205        // Initialize all control bytes to EMPTY
206        let mut block_ptr = new_table.data;
207        for _ in 0..new_table.blocks_count.value() {
208            unsafe {
209                block_ptr.write_bytes(ControlByte::EMPTY.binary_value(), block_capacity);
210                block_ptr = block_ptr.add(block_layout.size());
211            }
212        }
213
214        Ok(new_table)
215    }
216
217    /// Returns the capacity of the [`BytellHashTable`].
218    #[inline(always)]
219    pub fn capacity(&self) -> PowerOfTwo32 {
220        let block_capacity = BlockLayoutHolder::<TKey, TValue>::LAYOUT.block_capacity().value();
221        let result = self.blocks_count.value() * block_capacity;
222        debug_check_or_release_hint!(result == 0 || result.is_power_of_two(), "result is not a power of two");
223        unsafe { PowerOfTwo32::new_unchecked(result) }
224    }
225
226    #[inline(always)]
227    pub(super) const unsafe fn get_entry_by_index(&self, index: usize) -> Entry<TKey, TValue> {
228        Entry::new(self.data, self.blocks_count, index)
229    }
230
231    #[inline(always)]
232    pub(super) fn should_grow(&self) -> bool {
233        Self::static_should_grow(self.elements_count, self.capacity(), &self.config)
234    }
235
236    #[inline]
237    fn static_should_grow(current_size: Length, current_capacity: PowerOfTwo32, config: &TConfig) -> bool {
238        #[allow(clippy::cast_sign_loss)]
239        {
240            let capacity = f64::from(current_capacity.value());
241            let threshold = config.load_factor().value() * capacity;
242            f64::from(current_size.as_u32() + 1) > threshold
243        }
244    }
245
246    #[allow(clippy::used_underscore_binding)]
247    pub(super) fn grow(&mut self) {
248        let old_block_count = self.blocks_count.as_usize();
249        let new_block_count = (old_block_count + 1).next_power_of_two();
250        debug_check_or_release_hint!(new_block_count < u32::MAX as usize, "Too many blocks");
251        let new_block_count = unsafe { PowerOfTwo32::new_unchecked(new_block_count as u32) };
252        let mut new_table = Self::with_block_count_and_config(new_block_count, self.config.deref().clone())
253            .expect("Failed to allocate new bytell hash table");
254        self.move_content_to(&mut new_table);
255        debug_assert!(self.elements_count == Length::ZERO, "self is not empty");
256        debug_assert!(!new_table.data.is_null(), "data is null");
257        core::mem::swap(self, &mut new_table);
258    }
259
260    pub(super) unsafe fn get_entry_by_key<Q>(&self, key: &Q) -> Entry<TKey, TValue>
261    where
262        TKey: Borrow<Q>,
263        Q: Eq + Hash + ?Sized,
264    {
265        let hash_value = self.config.build_hasher().hash_one(key);
266        let index = self.config.hash_to_index().hash_to_index(hash_value, self.capacity());
267        unsafe { self.get_entry_by_index(index) }
268    }
269
270    pub(super) fn search_for_free_entry(
271        &self,
272        current_entry: &Entry<TKey, TValue>,
273    ) -> Option<(Entry<TKey, TValue>, u8)> {
274        debug_check_or_release_hint!(JUMP_DISTANCES.len() < u8::MAX as usize);
275        let current_entry_index = current_entry.element_index() as usize;
276        let capacity = self.capacity().value() as usize;
277        debug_check_or_release_hint!(capacity > 0, "capacity is zero");
278        for (index, jmp_distance) in JUMP_DISTANCES.iter().enumerate().skip(1) {
279            let real_offset = jmp_distance.wrapping_add(current_entry_index) & (capacity - 1);
280            let entry = Entry::<TKey, TValue>::new(self.data, self.blocks_count, real_offset);
281            unsafe {
282                if *entry.control_byte() == ControlByte::EMPTY {
283                    return Some((entry, index as u8));
284                }
285            }
286        }
287
288        None
289    }
290
291    pub(super) unsafe fn find_parent_for_storage_entry(&self, entry: &Entry<TKey, TValue>) -> Entry<TKey, TValue> {
292        unsafe {
293            debug_check_or_release_hint!(
294                !ptr_to_ref!(entry.control_byte()).is_direct_hit(),
295                "find_parent_for_storage_entry expects storage entry, got direct hit"
296            );
297            let key = &ptr_to_ref!(entry.kvp()).key;
298            let mut current = self.get_entry_by_key(key);
299            debug_check_or_release_hint!(
300                ptr_to_ref!(current.control_byte()).is_direct_hit(),
301                "get_entry_by_key did not return direct hit"
302            );
303            debug_check_or_release_hint!(&current != entry, "current == entry should not happen");
304
305            loop {
306                let next_link = current.next_link();
307                debug_check_or_release_hint!(
308                    next_link.is_some(),
309                    "next_link is None, that shouldn't have happened, but it did"
310                );
311                let next = next_link.unwrap_unchecked();
312                if &next == entry {
313                    return current;
314                }
315                current = next;
316            }
317        }
318    }
319
320    pub(super) fn move_content_to(&mut self, other: &mut Self) {
321        debug_check_or_release_hint!(self.data != other.data);
322
323        unsafe {
324            let capacity = self.capacity().as_usize();
325            let mut el_idx = 0;
326            let mut remaining_items = self.elements_count.as_u32();
327            while el_idx < capacity {
328                if remaining_items == 0 {
329                    break;
330                }
331
332                let entry = Entry::<TKey, TValue>::new(self.data, self.blocks_count, el_idx);
333                el_idx += 1;
334
335                let control_byte = ptr_to_mut!(entry.control_byte());
336                if !control_byte.contains_data() {
337                    continue;
338                }
339                *control_byte = ControlByte::EMPTY;
340
341                let kvp = entry.kvp().read();
342
343                other.insert_or_update_with(kvp.key, || kvp.value, |_| panic!("Update should not happen"));
344
345                remaining_items -= 1;
346            }
347            debug_check_or_release_hint!(remaining_items == 0, "Moved less items than was supposed to");
348            self.elements_count = Length::ZERO;
349        }
350    }
351
352    pub(super) fn clone_content_to(&self, other: &mut Self)
353    where
354        TKey: Clone,
355        TValue: Clone,
356    {
357        let capacity = self.capacity().as_usize();
358        let mut el_idx = 0;
359        while el_idx < capacity {
360            let entry = Entry::<TKey, TValue>::new(self.data, self.blocks_count, el_idx);
361            el_idx += 1;
362
363            let control_byte = ptr_to_ref!(entry.control_byte());
364            if control_byte.contains_data() {
365                continue;
366            }
367
368            let kvp = ptr_to_ref!(entry.kvp());
369            other.insert(kvp.key.clone(), kvp.value.clone());
370        }
371    }
372}
373
374impl<TKey, TValue, TConfig> Default for BytellHashTable<TKey, TValue, TConfig>
375where
376    TKey: Eq + Hash,
377    TConfig: BytellConfig,
378{
379    fn default() -> Self {
380        Self::new()
381    }
382}
383
384impl<TKey, TValue, TConfig> Clone for BytellHashTable<TKey, TValue, TConfig>
385where
386    TKey: Eq + Hash + Clone,
387    TValue: Clone,
388    TConfig: BytellConfig,
389{
390    fn clone(&self) -> Self {
391        let mut new_table = Self::with_capacity_and_config(self.elements_count.as_u32(), self.config.deref().clone())
392            .expect("Failed to allocate new bytell hash table");
393        self.clone_content_to(&mut new_table);
394        new_table
395    }
396}
397
398impl<TKey, TValue, TConfig> PartialEq for BytellHashTable<TKey, TValue, TConfig>
399where
400    TKey: Eq + Hash,
401    TValue: PartialEq,
402    TConfig: BytellConfig,
403{
404    fn eq(&self, other: &Self) -> bool {
405        use crate::traits::ImmutableHashTable;
406        if self.length() != other.length() {
407            return false;
408        }
409        for (key, value) in self.iter() {
410            let Some(other_value) = other.get(key) else {
411                return false;
412            };
413            if other_value != value {
414                return false;
415            }
416        }
417        true
418    }
419}
420
421impl<TKey, TValue, TConfig> Eq for BytellHashTable<TKey, TValue, TConfig>
422where
423    TKey: Eq + Hash,
424    TValue: Eq,
425    TConfig: BytellConfig,
426{
427}
428
429impl<TKey, TValue, TConfig> core::hash::Hash for BytellHashTable<TKey, TValue, TConfig>
430where
431    TKey: Eq + Hash,
432    TValue: Hash,
433    TConfig: BytellConfig,
434{
435    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
436        use core::hash::Hasher;
437        use osom_lib_hashes::siphash::SipHashBuilder;
438
439        // We need to ensure that calculating the hash does not depend on the order of iteration,
440        // which is not guaranteed at all.
441        //
442        // Therefore, we first calculate a temporary hash, and then add all of those hashes
443        // together. We take advantage of the fact that addition is commutative and associative.
444        let sip_hash_builder = SipHashBuilder::with_keys(0, u64::from(self.length().as_u32()));
445        let mut result = 0u64;
446        for (key, value) in self.iter() {
447            let mut sip_hash = sip_hash_builder.create_hasher();
448            key.hash(&mut sip_hash);
449            value.hash(&mut sip_hash);
450            result = result.wrapping_add(sip_hash.finish());
451        }
452        state.write_u64(result);
453    }
454}
455
456impl<TKey, TValue, TConfig> Drop for BytellHashTable<TKey, TValue, TConfig>
457where
458    TKey: Eq + Hash,
459    TConfig: BytellConfig,
460{
461    fn drop(&mut self) {
462        unsafe {
463            if core::mem::needs_drop::<TKey>() || core::mem::needs_drop::<TValue>() {
464                let capacity = self.capacity().as_usize();
465                let mut el_idx = 0;
466                let mut remaining_items = self.elements_count.as_u32();
467
468                while el_idx < capacity {
469                    if remaining_items == 0 {
470                        break;
471                    }
472                    let entry = self.get_entry_by_index(el_idx);
473                    el_idx += 1;
474                    if !(*entry.control_byte()).contains_data() {
475                        continue;
476                    }
477                    let kvp = ptr_to_mut!(entry.kvp());
478                    core::ptr::drop_in_place(&raw mut kvp.key);
479                    core::ptr::drop_in_place(&raw mut kvp.value);
480                    remaining_items -= 1;
481                }
482            }
483
484            let blocks_count = self.blocks_count.as_usize();
485            if blocks_count > 0 {
486                let block_size = BlockLayoutHolder::<TKey, TValue>::LAYOUT.layout().size();
487                let block_align = BlockLayoutHolder::<TKey, TValue>::LAYOUT.layout().align();
488                let layout = Layout::from_size_align_unchecked(blocks_count * block_size, block_align);
489                self.config
490                    .allocator()
491                    .deallocate(NonNull::new_unchecked(self.data), layout);
492            }
493
494            ManuallyDrop::drop(&mut self.config);
495        }
496    }
497}