Skip to main content

osom_lib_hashes/sha2/sha2_256/platform/
sha2_256_aarch64.rs

1#![allow(clippy::wildcard_imports, clippy::cast_possible_wrap, clippy::needless_return)]
2
3use osom_lib_arrays::fixed_array::ConstBufferer;
4use osom_lib_reprc::macros::reprc;
5
6use crate::sha2::sha2_256::portable::SHA2_256_Portable;
7use crate::traits::HashFunction;
8
9use super::sha2_256_template::{SHA2_256_Template, SHA2_256_Updater};
10
11#[cfg(target_arch = "aarch64")]
12use core::arch::aarch64::*;
13
14#[cfg(target_arch = "aarch64")]
15use crate::sha2::sha2_256::sha2_256_shared::K;
16
17#[cfg(target_arch = "aarch64")]
18#[target_feature(enable = "sha2")]
19unsafe fn sha2_256_update_state_aarch64(state: &mut [u32; 8], bufferer: &mut ConstBufferer<'_, 64, u8>) {
20    unsafe {
21        let mut abcd = vld1q_u32(state[0..4].as_ptr());
22        let mut efgh = vld1q_u32(state[4..8].as_ptr());
23
24        // Iterate through the message blocks.
25        while let Some(block) = bufferer.next() {
26            // Keep original state values.
27            let abcd_orig = abcd;
28            let efgh_orig = efgh;
29
30            // Load the message block into vectors, assuming little endianness.
31            let mut s0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[0..16].as_ptr())));
32            let mut s1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[16..32].as_ptr())));
33            let mut s2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[32..48].as_ptr())));
34            let mut s3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[48..64].as_ptr())));
35
36            // Rounds 0 to 3
37            let mut tmp = vaddq_u32(s0, vld1q_u32(&K[0]));
38            let mut abcd_prev = abcd;
39            abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
40            efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
41
42            // Rounds 4 to 7
43            tmp = vaddq_u32(s1, vld1q_u32(&K[4]));
44            abcd_prev = abcd;
45            abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
46            efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
47
48            // Rounds 8 to 11
49            tmp = vaddq_u32(s2, vld1q_u32(&K[8]));
50            abcd_prev = abcd;
51            abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
52            efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
53
54            // Rounds 12 to 15
55            tmp = vaddq_u32(s3, vld1q_u32(&K[12]));
56            abcd_prev = abcd;
57            abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
58            efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
59
60            for t in (16..64).step_by(16) {
61                // Rounds t to t + 3
62                s0 = vsha256su1q_u32(vsha256su0q_u32(s0, s1), s2, s3);
63                tmp = vaddq_u32(s0, vld1q_u32(&K[t]));
64                abcd_prev = abcd;
65                abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
66                efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
67
68                // Rounds t + 4 to t + 7
69                s1 = vsha256su1q_u32(vsha256su0q_u32(s1, s2), s3, s0);
70                tmp = vaddq_u32(s1, vld1q_u32(&K[t + 4]));
71                abcd_prev = abcd;
72                abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
73                efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
74
75                // Rounds t + 8 to t + 11
76                s2 = vsha256su1q_u32(vsha256su0q_u32(s2, s3), s0, s1);
77                tmp = vaddq_u32(s2, vld1q_u32(&K[t + 8]));
78                abcd_prev = abcd;
79                abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
80                efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
81
82                // Rounds t + 12 to t + 15
83                s3 = vsha256su1q_u32(vsha256su0q_u32(s3, s0), s1, s2);
84                tmp = vaddq_u32(s3, vld1q_u32(&K[t + 12]));
85                abcd_prev = abcd;
86                abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
87                efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
88            }
89
90            // Add the block-specific state to the original state.
91            abcd = vaddq_u32(abcd, abcd_orig);
92            efgh = vaddq_u32(efgh, efgh_orig);
93        }
94
95        // Store vectors into state.
96        vst1q_u32(state[0..4].as_mut_ptr(), abcd);
97        vst1q_u32(state[4..8].as_mut_ptr(), efgh);
98    }
99}
100
101struct SHA2_256_aarch64_Updater;
102
103impl SHA2_256_Updater for SHA2_256_aarch64_Updater {
104    #[inline(always)]
105    fn update_state(state: &mut [u32; 8], bufferer: &mut ConstBufferer<'_, 64, u8>) {
106        cfg_select! {
107            target_arch = "aarch64" => {
108                unsafe { sha2_256_update_state_aarch64(state, bufferer) };
109            },
110            _ => {
111                let _ = state;
112                let _ = bufferer;
113                panic!("SHA2_256_aarch64 requires aarch64 target.");
114            },
115        }
116    }
117}
118
119/// An `aarch64` optimized implementation of the `SHA2_256` algorithm.
120///
121/// This implementation is only available on `aarch64` targets.
122/// The code will panic if the target is invalid.
123///
124/// # Safety
125///
126/// This implementation does not verify that the target supports the required instructions.
127/// In particular `sha2` feature has to be supported. Otherwise the code will
128/// likely crash at runtime.
129#[reprc]
130#[repr(transparent)]
131#[must_use]
132pub struct SHA2_256_aarch64 {
133    inner: SHA2_256_Template<SHA2_256_aarch64_Updater>,
134}
135
136impl SHA2_256_aarch64 {
137    /// Creates a new [`SHA2_256_aarch64`] instance.
138    ///
139    /// # Panics
140    ///
141    /// If `target_arch` is not `aarch64`.
142    #[inline(always)]
143    pub const fn new() -> Self {
144        cfg_select! {
145            target_arch="aarch64" => {
146                return Self {
147                    inner: SHA2_256_Template::new(),
148                };
149            },
150            _ => {
151                panic!("SHA2_256_aarch64 requires aarch64 target.");
152            },
153        }
154    }
155
156    /// Writes a block of data to the underlying state.
157    #[inline(always)]
158    pub fn update(&mut self, data: impl AsRef<[u8]>) {
159        self.inner.update(data);
160    }
161
162    /// Calculates the final hash value.
163    #[inline(always)]
164    pub fn result(&self, output: &mut [u8; 32]) {
165        self.inner.result(output);
166    }
167}
168
169impl Default for SHA2_256_aarch64 {
170    fn default() -> Self {
171        Self::new()
172    }
173}
174
175impl HashFunction for SHA2_256_aarch64 {
176    type Output = [u8; 32];
177
178    #[inline(always)]
179    fn update(&mut self, data: impl AsRef<[u8]>) {
180        self.update(data);
181    }
182
183    #[inline(always)]
184    fn write_result(&self, output: &mut Self::Output) {
185        self.result(output);
186    }
187}
188
189impl From<SHA2_256_Portable> for SHA2_256_aarch64 {
190    #[inline(always)]
191    fn from(portable: SHA2_256_Portable) -> Self {
192        Self { inner: portable.into() }
193    }
194}
195
196impl From<SHA2_256_aarch64> for SHA2_256_Portable {
197    #[inline(always)]
198    fn from(x86: SHA2_256_aarch64) -> Self {
199        x86.inner.into()
200    }
201}