Skip to main content

osom_lib_hashes/
siphash.rs

1//! Holds the implementation of the `SipHash` algorithm.
2
3#![allow(clippy::cast_possible_truncation, clippy::doc_markdown)]
4
5use core::hash::{BuildHasher, Hasher};
6
7use osom_lib_arrays::const_helpers::from_le_const_u64;
8use osom_lib_reprc::macros::reprc;
9
10use crate::traits::HashFunction;
11
12/// Packs up to 7 bytes from `buf[start..start+len]` into a little-endian u64.
13const fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
14    let mut out: u64 = 0;
15    let mut i = 0;
16    while i < len {
17        out |= (buf[start + i] as u64) << (i * 8);
18        i += 1;
19    }
20    out
21}
22
23/// Implementation of the `SipHash` algorithm.
24///
25/// This algorithm is resistant to various hash attacks when C >= 2 and D >= 4.
26///
27/// # Notes
28///
29/// The `HashFunction` implementation returns values in little-endian order,
30/// and thus is cross-platform (with slightly better performance on little-endian platforms).
31///
32/// This algorithm is an implementation of the "SipHash: a fast short-input PRF" paper
33/// by Jean-Philippe Aumasson and Daniel J. Bernstein.
34#[reprc]
35#[must_use]
36pub struct GeneralSipHash<const C: u32, const D: u32> {
37    /// Current state stored as [v0, v2, v1, v3] so the algorithm's natural
38    /// (v0,v2) and (v1,v3) pairs are memory-adjacent for SIMD auto-vectorization.
39    state: [u64; 4],
40
41    /// Unprocessed bytes packed as a little-endian u64.
42    tail: u64,
43
44    /// How many bytes are valid in `tail` (0..=7).
45    ntail: u8,
46
47    /// How many bytes have been hashed so far, mod 256.
48    last_byte: u8,
49}
50
51unsafe impl<const C: u32, const D: u32> Send for GeneralSipHash<C, D> {}
52unsafe impl<const C: u32, const D: u32> Sync for GeneralSipHash<C, D> {}
53
54impl<const C: u32, const D: u32> GeneralSipHash<C, D> {
55    /// Creates a new [`GeneralSipHash`] instance from a given array key.
56    #[inline]
57    pub const fn for_array_key(key: &[u8; 2 * size_of::<u64>()]) -> Self {
58        unsafe {
59            let k0 = from_le_const_u64(key, 0);
60            let k1 = from_le_const_u64(key, size_of::<u64>());
61            Self::for_keys(k0, k1)
62        }
63    }
64
65    /// Creates a new [`GeneralSipHash`] instance from a given slice.
66    ///
67    /// # Panics
68    ///
69    /// If the key is not exactly 16 bytes long, this function will panic.
70    #[inline]
71    pub const fn for_slice_key(key: &[u8]) -> Self {
72        assert!(
73            key.len() == 2 * size_of::<u64>(),
74            "The key must be exactly 16 bytes long."
75        );
76        unsafe {
77            let k0 = from_le_const_u64(key, 0);
78            let k1 = from_le_const_u64(key, size_of::<u64>());
79            Self::for_keys(k0, k1)
80        }
81    }
82
83    /// Creates a new [`GeneralSipHash`] instance from a key pair.
84    #[inline]
85    pub const fn for_keys(key0: u64, key1: u64) -> Self {
86        Self {
87            // [v0, v2, v1, v3]: (v0,v2) and (v1,v3) are adjacent pairs.
88            state: [
89                key0 ^ 0x736f6d6570736575, // v0
90                key0 ^ 0x6c7967656e657261, // v2
91                key1 ^ 0x646f72616e646f6d, // v1
92                key1 ^ 0x7465646279746573, // v3
93            ],
94            tail: 0,
95            ntail: 0,
96            last_byte: 0,
97        }
98    }
99
100    #[inline]
101    const fn process_block(state: &mut [u64; 4], value: u64) {
102        state[3] ^= value; // v3 ^= value
103        sip_rounds::<C>(state);
104        state[0] ^= value; // v0 ^= value
105    }
106
107    /// Updates the underlying state with the given block.
108    ///
109    /// # Panics
110    ///
111    /// If the length of the block exceeds `u32::MAX`.
112    pub const fn update_const(&mut self, data: &[u8]) {
113        let length = data.len();
114
115        assert!(
116            length <= u32::MAX as usize,
117            "The max size of a chunk for SipHash is u32::MAX."
118        );
119
120        self.last_byte = self.last_byte.wrapping_add(length as u8);
121
122        let mut data_offset = 0usize;
123
124        if self.ntail != 0 {
125            let needed = 8 - self.ntail as usize;
126            let fill_len = if length < needed { length } else { needed };
127            self.tail |= u8to64_le(data, 0, fill_len) << (8 * self.ntail as u32);
128            if length < needed {
129                self.ntail += length as u8;
130                return;
131            }
132            Self::process_block(&mut self.state, self.tail);
133            self.ntail = 0;
134            self.tail = 0;
135            data_offset = needed;
136        }
137
138        let remaining = length - data_offset;
139        let left = remaining & 0x7;
140        let end = data_offset + remaining - left;
141
142        while data_offset < end {
143            let mi = unsafe { from_le_const_u64(data, data_offset) };
144            Self::process_block(&mut self.state, mi);
145            data_offset += 8;
146        }
147
148        self.tail = u8to64_le(data, data_offset, left);
149        self.ntail = left as u8;
150    }
151
152    /// Calculates the final hash value.
153    ///
154    /// This function does not update the internal state, and thus
155    /// [`SipHash`] can still be used afterwards.
156    #[must_use]
157    pub const fn result_const(&self) -> u64 {
158        let mut state = self.state;
159        let b: u64 = ((self.last_byte as u64) << 56) | self.tail;
160
161        state[3] ^= b;
162        sip_rounds::<C>(&mut state);
163        state[0] ^= b;
164
165        state[1] ^= 0xff; // v2 ^= 0xff (v2 is at index 1 in [v0, v2, v1, v3])
166        sip_rounds::<D>(&mut state);
167        state[0] ^ state[1] ^ state[2] ^ state[3]
168    }
169}
170
171impl<const C: u32, const D: u32> HashFunction for GeneralSipHash<C, D> {
172    type Output = [u8; 8];
173
174    #[inline(always)]
175    fn update(&mut self, data: impl AsRef<[u8]>) {
176        self.update_const(data.as_ref());
177    }
178
179    #[inline(always)]
180    fn write_result(&self, output: &mut Self::Output) {
181        *output = self.result_const().to_le_bytes();
182    }
183}
184
185#[inline]
186const fn sip_rounds<const ROUNDS: u32>(state: &mut [u64; 4]) {
187    // State layout: [v0, v2, v1, v3] at indices [0, 1, 2, 3].
188    let mut index = 0;
189    while index < ROUNDS {
190        state[0] = state[0].wrapping_add(state[2]);
191        state[2] = state[2].rotate_left(13);
192        state[2] ^= state[0];
193        state[0] = state[0].rotate_left(32);
194        state[1] = state[1].wrapping_add(state[3]);
195        state[3] = state[3].rotate_left(16);
196        state[3] ^= state[1];
197        state[0] = state[0].wrapping_add(state[3]);
198        state[3] = state[3].rotate_left(21);
199        state[3] ^= state[0];
200        state[1] = state[1].wrapping_add(state[2]);
201        state[2] = state[2].rotate_left(17);
202        state[2] ^= state[1];
203        state[1] = state[1].rotate_left(32);
204        index += 1;
205    }
206}
207
208impl<const C: u32, const D: u32> Hasher for GeneralSipHash<C, D> {
209    #[inline(always)]
210    fn finish(&self) -> u64 {
211        self.result_const()
212    }
213
214    #[inline(always)]
215    fn write(&mut self, bytes: &[u8]) {
216        self.update_const(bytes);
217    }
218}
219
220/// Represents a builder for [`GeneralSipHash`].
221#[reprc]
222#[must_use]
223pub struct GeneralSipHashBuilder<const C: u32, const D: u32> {
224    key0: u64,
225    key1: u64,
226}
227
228impl<const C: u32, const D: u32> GeneralSipHashBuilder<C, D> {
229    /// Creates a new [`GeneralSipHashBuilder`] instance with custom keys.
230    #[inline(always)]
231    pub const fn with_keys(key0: u64, key1: u64) -> Self {
232        Self { key0, key1 }
233    }
234
235    /// Creates a new [`GeneralSipHashBuilder`] instance from a given array key.
236    #[inline]
237    pub const fn with_array_key(key: &[u8; 2 * size_of::<u64>()]) -> Self {
238        unsafe {
239            let k0 = from_le_const_u64(key, 0);
240            let k1 = from_le_const_u64(key, size_of::<u64>());
241            Self::with_keys(k0, k1)
242        }
243    }
244
245    /// Creates a new [`GeneralSipHashBuilder`] instance from a given slice.
246    ///
247    /// # Panics
248    ///
249    /// If the key is not exactly 16 bytes long, this function will panic.
250    #[inline]
251    pub const fn with_slice_key(key: &[u8]) -> Self {
252        assert!(
253            key.len() == 2 * size_of::<u64>(),
254            "The key must be exactly 16 bytes long."
255        );
256
257        unsafe {
258            let k0 = from_le_const_u64(key, 0);
259            let k1 = from_le_const_u64(key, size_of::<u64>());
260            Self::with_keys(k0, k1)
261        }
262    }
263
264    /// Creates a new [`GeneralSipHash`] instance from the builder.
265    #[inline(always)]
266    pub const fn create_hasher(&self) -> GeneralSipHash<C, D> {
267        GeneralSipHash::for_keys(self.key0, self.key1)
268    }
269}
270
271impl<const C: u32, const D: u32> BuildHasher for GeneralSipHashBuilder<C, D> {
272    type Hasher = GeneralSipHash<C, D>;
273
274    #[inline(always)]
275    fn build_hasher(&self) -> Self::Hasher {
276        self.create_hasher()
277    }
278}
279
280impl<const C: u32, const D: u32> Clone for GeneralSipHashBuilder<C, D> {
281    #[inline(always)]
282    fn clone(&self) -> Self {
283        Self {
284            key0: self.key0,
285            key1: self.key1,
286        }
287    }
288}
289
290/// The alias for [`GeneralSipHash<2, 4>`], which is an optimal choice of constants.
291pub type SipHash = GeneralSipHash<2, 4>;
292
293/// The alias for [`GeneralSipHashBuilder<2, 4>`], which is an optimal choice of constants.
294pub type SipHashBuilder = GeneralSipHashBuilder<2, 4>;