1use crate::{Block, Key};
5use universal_hash::{consts::U16, NewUniversalHash, Output, UniversalHash};
6
7#[cfg(target_arch = "x86")]
8use core::arch::x86::*;
9#[cfg(target_arch = "x86_64")]
10use core::arch::x86_64::*;
11
12#[derive(Clone)]
14pub struct Polyval {
15 h: __m128i,
16 y: __m128i,
17}
18
19impl NewUniversalHash for Polyval {
20 type KeySize = U16;
21
22 fn new(h: &Key) -> Self {
24 unsafe {
25 #[allow(clippy::cast_ptr_alignment)]
27 Self {
28 h: _mm_loadu_si128(h.as_ptr() as *const __m128i),
29 y: _mm_setzero_si128(),
30 }
31 }
32 }
33}
34
35impl UniversalHash for Polyval {
36 type BlockSize = U16;
37
38 #[inline]
39 fn update(&mut self, x: &Block) {
40 unsafe {
41 self.mul(x);
42 }
43 }
44
45 fn reset(&mut self) {
47 unsafe {
48 self.y = _mm_setzero_si128();
49 }
50 }
51
52 fn finalize(self) -> Output<Self> {
54 unsafe { core::mem::transmute(self.y) }
55 }
56}
57
58impl Polyval {
59 #[inline]
60 #[target_feature(enable = "pclmulqdq")]
61 #[target_feature(enable = "sse4.1")]
62 unsafe fn mul(&mut self, x: &Block) {
63 let h = self.h;
64
65 #[allow(clippy::cast_ptr_alignment)]
67 let x = _mm_loadu_si128(x.as_ptr() as *const __m128i);
68 let y = _mm_xor_si128(self.y, x);
69
70 let h0 = h;
71 let h1 = _mm_shuffle_epi32(h, 0x0E);
72 let h2 = _mm_xor_si128(h0, h1);
73 let y0 = y;
74
75 let y1 = _mm_shuffle_epi32(y, 0x0E);
77 let y2 = _mm_xor_si128(y0, y1);
78 let t0 = _mm_clmulepi64_si128(y0, h0, 0x00);
79 let t1 = _mm_clmulepi64_si128(y, h, 0x11);
80 let t2 = _mm_clmulepi64_si128(y2, h2, 0x00);
81 let t2 = _mm_xor_si128(t2, _mm_xor_si128(t0, t1));
82 let v0 = t0;
83 let v1 = _mm_xor_si128(_mm_shuffle_epi32(t0, 0x0E), t2);
84 let v2 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
85 let v3 = _mm_shuffle_epi32(t1, 0x0E);
86
87 let v2 = xor5(
89 v2,
90 v0,
91 _mm_srli_epi64(v0, 1),
92 _mm_srli_epi64(v0, 2),
93 _mm_srli_epi64(v0, 7),
94 );
95
96 let v1 = xor4(
97 v1,
98 _mm_slli_epi64(v0, 63),
99 _mm_slli_epi64(v0, 62),
100 _mm_slli_epi64(v0, 57),
101 );
102
103 let v3 = xor5(
104 v3,
105 v1,
106 _mm_srli_epi64(v1, 1),
107 _mm_srli_epi64(v1, 2),
108 _mm_srli_epi64(v1, 7),
109 );
110
111 let v2 = xor4(
112 v2,
113 _mm_slli_epi64(v1, 63),
114 _mm_slli_epi64(v1, 62),
115 _mm_slli_epi64(v1, 57),
116 );
117
118 self.y = _mm_unpacklo_epi64(v2, v3);
119 }
120}
121
122#[cfg(feature = "zeroize")]
123impl Drop for Polyval {
124 fn drop(&mut self) {
125 use zeroize::Zeroize;
126 self.h.zeroize();
127 self.y.zeroize();
128 }
129}
130
131#[inline(always)]
132unsafe fn xor4(e1: __m128i, e2: __m128i, e3: __m128i, e4: __m128i) -> __m128i {
133 _mm_xor_si128(_mm_xor_si128(e1, e2), _mm_xor_si128(e3, e4))
134}
135
136#[inline(always)]
137unsafe fn xor5(e1: __m128i, e2: __m128i, e3: __m128i, e4: __m128i, e5: __m128i) -> __m128i {
138 _mm_xor_si128(
139 e1,
140 _mm_xor_si128(_mm_xor_si128(e2, e3), _mm_xor_si128(e4, e5)),
141 )
142}