1#![allow(clippy::wildcard_imports, clippy::cast_possible_wrap, clippy::needless_return)]
2
3use osom_lib_arrays::fixed_array::ConstBufferer;
4use osom_lib_reprc::macros::reprc;
5
6use crate::sha2::sha2_256::portable::SHA2_256_Portable;
7use crate::traits::HashFunction;
8
9use super::sha2_256_template::{SHA2_256_Template, SHA2_256_Updater};
10
11#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
12use crate::sha2::sha2_256::sha2_256_shared::K;
13
14#[cfg(target_arch = "x86")]
15use core::arch::x86::*;
16#[cfg(target_arch = "x86_64")]
17use core::arch::x86_64::*;
18
19#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
20unsafe fn schedule(v0: __m128i, v1: __m128i, v2: __m128i, v3: __m128i) -> __m128i {
21 unsafe {
22 let t1 = _mm_sha256msg1_epu32(v0, v1);
23 let t2 = _mm_alignr_epi8(v3, v2, 4);
24 let t3 = _mm_add_epi32(t1, t2);
25 _mm_sha256msg2_epu32(t3, v3)
26 }
27}
28
29#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
30macro_rules! rounds4 {
31 ($abef:ident, $cdgh:ident, $rest:expr, $i:expr) => {{
32 let idx: usize = 4 * ($i);
33 let kv = _mm_set_epi32(K[idx + 3] as i32, K[idx + 2] as i32, K[idx + 1] as i32, K[idx] as i32);
34 let t1 = _mm_add_epi32($rest, kv);
35 $cdgh = _mm_sha256rnds2_epu32($cdgh, $abef, t1);
36 let t2 = _mm_shuffle_epi32(t1, 0x0E);
37 $abef = _mm_sha256rnds2_epu32($abef, $cdgh, t2);
38 }};
39}
40
41#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
42macro_rules! schedule_rounds4 {
43 (
44 $abef:ident, $cdgh:ident,
45 $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr,
46 $i: expr
47 ) => {{
48 $w4 = schedule($w0, $w1, $w2, $w3);
49 rounds4!($abef, $cdgh, $w4, $i);
50 }};
51}
52
53#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
54#[target_feature(enable = "sha,sse2,sse3,ssse3,sse4.1")]
55unsafe fn sha2_256_update_state_x86(state: &mut [u32; 8], bufferer: &mut ConstBufferer<'_, 64, u8>) {
56 unsafe {
57 let mask = _mm_set_epi64x(0x0C0D_0E0F_0809_0A0Bu64 as i64, 0x0405_0607_0001_0203u64 as i64);
58
59 let state_ptr: *const __m128i = state.as_ptr().cast();
60 let dcba = _mm_loadu_si128(state_ptr.add(0));
61 let hgfe = _mm_loadu_si128(state_ptr.add(1));
62
63 let cdab = _mm_shuffle_epi32(dcba, 0xB1);
64 let efgh = _mm_shuffle_epi32(hgfe, 0x1B);
65 let mut abef = _mm_alignr_epi8(cdab, efgh, 8);
66 let mut cdgh = _mm_blend_epi16(efgh, cdab, 0xF0);
67
68 while let Some(block) = bufferer.next() {
69 let abef_save = abef;
70 let cdgh_save = cdgh;
71
72 let block_ptr: *const __m128i = block.as_ptr().cast();
73 let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(0)), mask);
74 let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(1)), mask);
75 let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(2)), mask);
76 let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(3)), mask);
77 let mut w4;
78
79 rounds4!(abef, cdgh, w0, 0);
80 rounds4!(abef, cdgh, w1, 1);
81 rounds4!(abef, cdgh, w2, 2);
82 rounds4!(abef, cdgh, w3, 3);
83 schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 4);
84 schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 5);
85 schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 6);
86 schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 7);
87 schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 8);
88 schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 9);
89 schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 10);
90 schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 11);
91 schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 12);
92 schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 13);
93 schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 14);
94 schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 15);
95
96 abef = _mm_add_epi32(abef, abef_save);
97 cdgh = _mm_add_epi32(cdgh, cdgh_save);
98 }
99
100 let feba = _mm_shuffle_epi32(abef, 0x1B);
101 let dchg = _mm_shuffle_epi32(cdgh, 0xB1);
102 let dcba = _mm_blend_epi16(feba, dchg, 0xF0);
103 let hgef = _mm_alignr_epi8(dchg, feba, 8);
104
105 let state_ptr_mut: *mut __m128i = state.as_mut_ptr().cast();
106 _mm_storeu_si128(state_ptr_mut.add(0), dcba);
107 _mm_storeu_si128(state_ptr_mut.add(1), hgef);
108 }
109}
110
111struct SHA2_256_x86_Updater;
112
113impl SHA2_256_Updater for SHA2_256_x86_Updater {
114 #[inline(always)]
115 fn update_state(state: &mut [u32; 8], bufferer: &mut ConstBufferer<'_, 64, u8>) {
116 cfg_select! {
117 any(target_arch = "x86", target_arch = "x86_64") => {
118 unsafe { sha2_256_update_state_x86(state, bufferer) };
119 },
120 _ => {
121 let _ = state;
122 let _ = bufferer;
123 panic!("SHA2_256_x86 requires x86 or x86_64 target.");
124 },
125 }
126 }
127}
128
129#[reprc]
140#[repr(transparent)]
141#[must_use]
142pub struct SHA2_256_x86 {
143 inner: SHA2_256_Template<SHA2_256_x86_Updater>,
144}
145
146impl SHA2_256_x86 {
147 #[inline(always)]
153 pub const fn new() -> Self {
154 cfg_select! {
155 any(target_arch = "x86", target_arch = "x86_64") => {
156 return Self {
157 inner: SHA2_256_Template::new(),
158 };
159 },
160 _ => {
161 panic!("SHA2_256_x86 requires x86 or x86_64 target.");
162 },
163 }
164 }
165
166 #[inline(always)]
168 pub fn update(&mut self, data: impl AsRef<[u8]>) {
169 self.inner.update(data);
170 }
171
172 #[inline(always)]
174 pub fn result(&self, output: &mut [u8; 32]) {
175 self.inner.result(output);
176 }
177}
178
179impl Default for SHA2_256_x86 {
180 fn default() -> Self {
181 Self::new()
182 }
183}
184
185impl HashFunction for SHA2_256_x86 {
186 type Output = [u8; 32];
187
188 #[inline(always)]
189 fn update(&mut self, data: impl AsRef<[u8]>) {
190 self.update(data);
191 }
192
193 #[inline(always)]
194 fn write_result(&self, output: &mut Self::Output) {
195 self.result(output);
196 }
197}
198
199impl From<SHA2_256_Portable> for SHA2_256_x86 {
200 #[inline(always)]
201 fn from(portable: SHA2_256_Portable) -> Self {
202 Self { inner: portable.into() }
203 }
204}
205
206impl From<SHA2_256_x86> for SHA2_256_Portable {
207 #[inline(always)]
208 fn from(x86: SHA2_256_x86) -> Self {
209 x86.inner.into()
210 }
211}