osom_lib_hashes/sha2/sha2_256/platform/
sha2_256_x86.rs

1#![allow(clippy::wildcard_imports, clippy::cast_possible_wrap, clippy::needless_return)]
2
3use osom_lib_arrays::fixed_array::ConstBufferer;
4use osom_lib_reprc::macros::reprc;
5
6use crate::sha2::sha2_256::portable::SHA2_256_Portable;
7use crate::traits::HashFunction;
8
9use super::sha2_256_template::{SHA2_256_Template, SHA2_256_Updater};
10
11#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
12use crate::sha2::sha2_256::sha2_256_shared::K;
13
14#[cfg(target_arch = "x86")]
15use core::arch::x86::*;
16#[cfg(target_arch = "x86_64")]
17use core::arch::x86_64::*;
18
19#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
20unsafe fn schedule(v0: __m128i, v1: __m128i, v2: __m128i, v3: __m128i) -> __m128i {
21    unsafe {
22        let t1 = _mm_sha256msg1_epu32(v0, v1);
23        let t2 = _mm_alignr_epi8(v3, v2, 4);
24        let t3 = _mm_add_epi32(t1, t2);
25        _mm_sha256msg2_epu32(t3, v3)
26    }
27}
28
29#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
30macro_rules! rounds4 {
31    ($abef:ident, $cdgh:ident, $rest:expr, $i:expr) => {{
32        let idx: usize = 4 * ($i);
33        let kv = _mm_set_epi32(K[idx + 3] as i32, K[idx + 2] as i32, K[idx + 1] as i32, K[idx] as i32);
34        let t1 = _mm_add_epi32($rest, kv);
35        $cdgh = _mm_sha256rnds2_epu32($cdgh, $abef, t1);
36        let t2 = _mm_shuffle_epi32(t1, 0x0E);
37        $abef = _mm_sha256rnds2_epu32($abef, $cdgh, t2);
38    }};
39}
40
41#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
42macro_rules! schedule_rounds4 {
43    (
44        $abef:ident, $cdgh:ident,
45        $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr,
46        $i: expr
47    ) => {{
48        $w4 = schedule($w0, $w1, $w2, $w3);
49        rounds4!($abef, $cdgh, $w4, $i);
50    }};
51}
52
53#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
54#[target_feature(enable = "sha,sse2,sse3,ssse3,sse4.1")]
55unsafe fn sha2_256_update_state_x86(state: &mut [u32; 8], bufferer: &mut ConstBufferer<'_, 64, u8>) {
56    unsafe {
57        let mask = _mm_set_epi64x(0x0C0D_0E0F_0809_0A0Bu64 as i64, 0x0405_0607_0001_0203u64 as i64);
58
59        let state_ptr: *const __m128i = state.as_ptr().cast();
60        let dcba = _mm_loadu_si128(state_ptr.add(0));
61        let hgfe = _mm_loadu_si128(state_ptr.add(1));
62
63        let cdab = _mm_shuffle_epi32(dcba, 0xB1);
64        let efgh = _mm_shuffle_epi32(hgfe, 0x1B);
65        let mut abef = _mm_alignr_epi8(cdab, efgh, 8);
66        let mut cdgh = _mm_blend_epi16(efgh, cdab, 0xF0);
67
68        while let Some(block) = bufferer.next() {
69            let abef_save = abef;
70            let cdgh_save = cdgh;
71
72            let block_ptr: *const __m128i = block.as_ptr().cast();
73            let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(0)), mask);
74            let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(1)), mask);
75            let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(2)), mask);
76            let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(3)), mask);
77            let mut w4;
78
79            rounds4!(abef, cdgh, w0, 0);
80            rounds4!(abef, cdgh, w1, 1);
81            rounds4!(abef, cdgh, w2, 2);
82            rounds4!(abef, cdgh, w3, 3);
83            schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 4);
84            schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 5);
85            schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 6);
86            schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 7);
87            schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 8);
88            schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 9);
89            schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 10);
90            schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 11);
91            schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 12);
92            schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 13);
93            schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 14);
94            schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 15);
95
96            abef = _mm_add_epi32(abef, abef_save);
97            cdgh = _mm_add_epi32(cdgh, cdgh_save);
98        }
99
100        let feba = _mm_shuffle_epi32(abef, 0x1B);
101        let dchg = _mm_shuffle_epi32(cdgh, 0xB1);
102        let dcba = _mm_blend_epi16(feba, dchg, 0xF0);
103        let hgef = _mm_alignr_epi8(dchg, feba, 8);
104
105        let state_ptr_mut: *mut __m128i = state.as_mut_ptr().cast();
106        _mm_storeu_si128(state_ptr_mut.add(0), dcba);
107        _mm_storeu_si128(state_ptr_mut.add(1), hgef);
108    }
109}
110
111struct SHA2_256_x86_Updater;
112
113impl SHA2_256_Updater for SHA2_256_x86_Updater {
114    #[inline(always)]
115    fn update_state(state: &mut [u32; 8], bufferer: &mut ConstBufferer<'_, 64, u8>) {
116        cfg_select! {
117            any(target_arch = "x86", target_arch = "x86_64") => {
118                unsafe { sha2_256_update_state_x86(state, bufferer) };
119            },
120            _ => {
121                let _ = state;
122                let _ = bufferer;
123                panic!("SHA2_256_x86 requires x86 or x86_64 target.");
124            },
125        }
126    }
127}
128
129/// An `x86` and `x86_64` optimized implementation of the `SHA2_256` algorithm.
130///
131/// This implementation is only available on `x86` and `x86_64` targets.
132/// The code will panic if the target is invalid.
133///
134/// # Safety
135///
136/// This implementation does not verify that the target supports the required instructions.
137/// In particular `sha,sse4.1` features have to be supported. Otherwise the code will
138/// likely crash at runtime.
139#[reprc]
140#[repr(transparent)]
141#[must_use]
142pub struct SHA2_256_x86 {
143    inner: SHA2_256_Template<SHA2_256_x86_Updater>,
144}
145
146impl SHA2_256_x86 {
147    /// Creates a new [`SHA2_256_x86`] instance.
148    ///
149    /// # Panics
150    ///
151    /// If `target_arch` is neither `x86` nor `x86_64`.
152    #[inline(always)]
153    pub const fn new() -> Self {
154        cfg_select! {
155            any(target_arch = "x86", target_arch = "x86_64") => {
156                return Self {
157                    inner: SHA2_256_Template::new(),
158                };
159            },
160            _ => {
161                panic!("SHA2_256_x86 requires x86 or x86_64 target.");
162            },
163        }
164    }
165
166    /// Writes a block of data to the underlying state.
167    #[inline(always)]
168    pub fn update(&mut self, data: impl AsRef<[u8]>) {
169        self.inner.update(data);
170    }
171
172    /// Calculates the final hash value.
173    #[inline(always)]
174    pub fn result(&self, output: &mut [u8; 32]) {
175        self.inner.result(output);
176    }
177}
178
179impl Default for SHA2_256_x86 {
180    fn default() -> Self {
181        Self::new()
182    }
183}
184
185impl HashFunction for SHA2_256_x86 {
186    type Output = [u8; 32];
187
188    #[inline(always)]
189    fn update(&mut self, data: impl AsRef<[u8]>) {
190        self.update(data);
191    }
192
193    #[inline(always)]
194    fn write_result(&self, output: &mut Self::Output) {
195        self.result(output);
196    }
197}
198
199impl From<SHA2_256_Portable> for SHA2_256_x86 {
200    #[inline(always)]
201    fn from(portable: SHA2_256_Portable) -> Self {
202        Self { inner: portable.into() }
203    }
204}
205
206impl From<SHA2_256_x86> for SHA2_256_Portable {
207    #[inline(always)]
208    fn from(x86: SHA2_256_x86) -> Self {
209        x86.inner.into()
210    }
211}
osom_lib_hashes/sha2/sha2_256/platform/sha2_256_x86.rs

osom_lib_hashes/sha2/sha2_256/platform/
sha2_256_x86.rs