1 /*-
2 * Copyright (c) 2014-2021 The FreeBSD Foundation
3 * Copyright (c) 2018 iXsystems, Inc
4 * All rights reserved.
5 *
6 * Portions of this software were developed by John-Mark Gurney
7 * under the sponsorship of the FreeBSD Foundation and
8 * Rubicon Communications, LLC (Netgate).
9 *
10 * Portions of this software were developed by Ararat River
11 * Consulting, LLC under sponsorship of the FreeBSD Foundation.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *
35 * $FreeBSD$
36 *
37 * This file implements AES-CCM+CBC-MAC, as described
38 * at https://tools.ietf.org/html/rfc3610, using Intel's
39 * AES-NI instructions.
40 *
41 */
42
43 #include <sys/types.h>
44 #include <sys/endian.h>
45 #include <sys/param.h>
46
47 #include <sys/systm.h>
48 #include <crypto/aesni/aesni.h>
49 #include <crypto/aesni/aesni_os.h>
50 #include <crypto/aesni/aesencdec.h>
51 #define AESNI_ENC(d, k, nr) aesni_enc(nr-1, (const __m128i*)k, d)
52
53 #include <wmmintrin.h>
54 #include <emmintrin.h>
55 #include <smmintrin.h>
56
57 /*
58 * Encrypt a single 128-bit block after
59 * doing an xor. This is also used to
60 * decrypt (yay symmetric encryption).
61 */
62 static inline __m128i
63 xor_and_encrypt(__m128i a, __m128i b, const unsigned char *k, int nr)
64 {
65 __m128i retval = _mm_xor_si128(a, b);
66
67 retval = AESNI_ENC(retval, k, nr);
68 return (retval);
69 }
70
71 /*
72 * Put value at the end of block, starting at offset.
73 * (This goes backwards, putting bytes in *until* it
74 * reaches offset.)
75 */
76 static void
77 append_int(size_t value, __m128i *block, size_t offset)
78 {
79 int indx = sizeof(*block) - 1;
80 uint8_t *bp = (uint8_t*)block;
81
82 while (indx > (sizeof(*block) - offset)) {
83 bp[indx] = value & 0xff;
84 indx--;
85 value >>= 8;
86 }
87 }
88
89 /*
90 * Start the CBC-MAC process. This handles the auth data.
91 */
92 static __m128i
93 cbc_mac_start(const unsigned char *auth_data, size_t auth_len,
94 const unsigned char *nonce, size_t nonce_len,
95 const unsigned char *key, int nr,
96 size_t data_len, size_t tag_len)
97 {
98 __m128i cbc_block, staging_block;
99 uint8_t *byte_ptr;
100 /* This defines where the message length goes */
101 int L = sizeof(__m128i) - 1 - nonce_len;
102
103 /*
104 * Set up B0 here. This has the flags byte,
105 * followed by the nonce, followed by the
106 * length of the message.
107 */
108 cbc_block = _mm_setzero_si128();
109 byte_ptr = (uint8_t*)&cbc_block;
110 byte_ptr[0] = ((auth_len > 0) ? 1 : 0) * 64 |
111 (((tag_len - 2) / 2) * 8) |
112 (L - 1);
113 bcopy(nonce, byte_ptr + 1, nonce_len);
114 append_int(data_len, &cbc_block, L+1);
115 cbc_block = AESNI_ENC(cbc_block, key, nr);
116
117 if (auth_len != 0) {
118 /*
119 * We need to start by appending the length descriptor.
120 */
121 uint32_t auth_amt;
122 size_t copy_amt;
123 const uint8_t *auth_ptr = auth_data;
124
125 staging_block = _mm_setzero_si128();
126
127 /*
128 * The current OCF calling convention means that
129 * there can never be more than 4g of authentication
130 * data, so we don't handle the 0xffff case.
131 */
132 KASSERT(auth_len < (1ULL << 32),
133 ("%s: auth_len (%zu) larger than 4GB",
134 __FUNCTION__, auth_len));
135
136 if (auth_len < ((1 << 16) - (1 << 8))) {
137 /*
138 * If the auth data length is less than
139 * 0xff00, we don't need to encode a length
140 * specifier, just the length of the auth
141 * data.
142 */
143 be16enc(&staging_block, auth_len);
144 auth_amt = 2;
145 } else if (auth_len < (1ULL << 32)) {
146 /*
147 * Two bytes for the length prefix, and then
148 * four bytes for the length. This makes a total
149 * of 6 bytes to describe the auth data length.
150 */
151 be16enc(&staging_block, 0xfffe);
152 be32enc((char*)&staging_block + 2, auth_len);
153 auth_amt = 6;
154 } else
155 panic("%s: auth len too large", __FUNCTION__);
156
157 /*
158 * Need to copy abytes into blocks. The first block is
159 * already partially filled, by auth_amt, so we need
160 * to handle that. The last block needs to be zero padded.
161 */
162 copy_amt = MIN(auth_len,
163 sizeof(staging_block) - auth_amt);
164 byte_ptr = (uint8_t*)&staging_block;
165 bcopy(auth_ptr, &byte_ptr[auth_amt], copy_amt);
166 auth_ptr += copy_amt;
167
168 cbc_block = xor_and_encrypt(cbc_block, staging_block, key, nr);
169
170 while (auth_ptr < auth_data + auth_len) {
171 copy_amt = MIN((auth_data + auth_len) - auth_ptr,
172 sizeof(staging_block));
173 if (copy_amt < sizeof(staging_block))
174 bzero(&staging_block, sizeof(staging_block));
175 bcopy(auth_ptr, &staging_block, copy_amt);
176 cbc_block = xor_and_encrypt(cbc_block, staging_block,
177 key, nr);
178 auth_ptr += copy_amt;
179 }
180 }
181 return (cbc_block);
182 }
183
184 /*
185 * Implement AES CCM+CBC-MAC encryption and authentication.
186 *
187 * A couple of notes:
188 * Since abytes is limited to a 32 bit value here, the AAD is
189 * limited to 4 gigabytes or less.
190 */
191 void
192 AES_CCM_encrypt(const unsigned char *in, unsigned char *out,
193 const unsigned char *addt, const unsigned char *nonce,
194 unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,
195 int tag_length, const unsigned char *key, int nr)
196 {
197 int L;
198 int counter = 1; /* S0 has 0, S1 has 1 */
199 size_t copy_amt, total = 0;
200 uint8_t *byte_ptr;
201 __m128i s0, rolling_mac, s_x, staging_block;
202
203 /* NIST 800-38c section A.1 says n is [7, 13]. */
204 if (nlen < 7 || nlen > 13)
205 panic("%s: bad nonce length %d", __FUNCTION__, nlen);
206
207 /*
208 * We need to know how many bytes to use to describe
209 * the length of the data. Normally, nlen should be
210 * 12, which leaves us 3 bytes to do that -- 16mbytes of
211 * data to encrypt. But it can be longer or shorter;
212 * this impacts the length of the message.
213 */
214 L = sizeof(__m128i) - 1 - nlen;
215
216 /*
217 * Clear out the blocks
218 */
219 s0 = _mm_setzero_si128();
220
221 rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,
222 key, nr, nbytes, tag_length);
223
224 /* s0 has flags, nonce, and then 0 */
225 byte_ptr = (uint8_t*)&s0;
226 byte_ptr[0] = L - 1; /* but the flags byte only has L' */
227 bcopy(nonce, &byte_ptr[1], nlen);
228
229 /*
230 * Now to cycle through the rest of the data.
231 */
232 bcopy(&s0, &s_x, sizeof(s0));
233
234 while (total < nbytes) {
235 /*
236 * Copy the plain-text data into staging_block.
237 * This may need to be zero-padded.
238 */
239 copy_amt = MIN(nbytes - total, sizeof(staging_block));
240 bcopy(in+total, &staging_block, copy_amt);
241 if (copy_amt < sizeof(staging_block)) {
242 byte_ptr = (uint8_t*)&staging_block;
243 bzero(&byte_ptr[copy_amt],
244 sizeof(staging_block) - copy_amt);
245 }
246 rolling_mac = xor_and_encrypt(rolling_mac, staging_block,
247 key, nr);
248 /* Put the counter into the s_x block */
249 append_int(counter++, &s_x, L+1);
250 /* Encrypt that */
251 __m128i X = AESNI_ENC(s_x, key, nr);
252 /* XOR the plain-text with the encrypted counter block */
253 staging_block = _mm_xor_si128(staging_block, X);
254 /* And copy it out */
255 bcopy(&staging_block, out+total, copy_amt);
256 total += copy_amt;
257 }
258 /*
259 * Allegedly done with it! Except for the tag.
260 */
261 s0 = AESNI_ENC(s0, key, nr);
262 staging_block = _mm_xor_si128(s0, rolling_mac);
263 bcopy(&staging_block, tag, tag_length);
264 explicit_bzero(&s0, sizeof(s0));
265 explicit_bzero(&staging_block, sizeof(staging_block));
266 explicit_bzero(&s_x, sizeof(s_x));
267 explicit_bzero(&rolling_mac, sizeof(rolling_mac));
268 }
269
270 /*
271 * Implement AES CCM+CBC-MAC decryption and authentication.
272 * Returns 0 on failure, 1 on success.
273 *
274 * The primary difference here is that each encrypted block
275 * needs to be hashed&encrypted after it is decrypted (since
276 * the CBC-MAC is based on the plain text). This means that
277 * we do the decryption twice -- first to verify the tag,
278 * and second to decrypt and copy it out.
279 *
280 * To avoid annoying code copying, we implement the main
281 * loop as a separate function.
282 *
283 * Call with out as NULL to not store the decrypted results;
284 * call with hashp as NULL to not run the authentication.
285 * Calling with neither as NULL does the decryption and
286 * authentication as a single pass (which is not allowed
287 * per the specification, really).
288 *
289 * If hashp is non-NULL, it points to the post-AAD computed
290 * checksum.
291 */
292 static void
293 decrypt_loop(const unsigned char *in, unsigned char *out, size_t nbytes,
294 __m128i s0, size_t nonce_length, __m128i *macp,
295 const unsigned char *key, int nr)
296 {
297 size_t total = 0;
298 __m128i s_x = s0, mac_block;
299 int counter = 1;
300 const size_t L = sizeof(__m128i) - 1 - nonce_length;
301 __m128i pad_block, staging_block;
302
303 /*
304 * The starting mac (post AAD, if any).
305 */
306 if (macp != NULL)
307 mac_block = *macp;
308
309 while (total < nbytes) {
310 size_t copy_amt = MIN(nbytes - total, sizeof(staging_block));
311
312 if (copy_amt < sizeof(staging_block)) {
313 staging_block = _mm_setzero_si128();
314 }
315 bcopy(in+total, &staging_block, copy_amt);
316
317 /*
318 * staging_block has the current block of input data,
319 * zero-padded if necessary. This is used in computing
320 * both the decrypted data, and the authentication tag.
321 */
322 append_int(counter++, &s_x, L+1);
323 /*
324 * The tag is computed based on the decrypted data.
325 */
326 pad_block = AESNI_ENC(s_x, key, nr);
327 if (copy_amt < sizeof(staging_block)) {
328 /*
329 * Need to pad out pad_block with 0.
330 * (staging_block was set to 0's above.)
331 */
332 uint8_t *end_of_buffer = (uint8_t*)&pad_block;
333 bzero(end_of_buffer + copy_amt,
334 sizeof(pad_block) - copy_amt);
335 }
336 staging_block = _mm_xor_si128(staging_block, pad_block);
337
338 if (out)
339 bcopy(&staging_block, out+total, copy_amt);
340
341 if (macp)
342 mac_block = xor_and_encrypt(mac_block, staging_block,
343 key, nr);
344 total += copy_amt;
345 }
346
347 if (macp)
348 *macp = mac_block;
349
350 explicit_bzero(&pad_block, sizeof(pad_block));
351 explicit_bzero(&staging_block, sizeof(staging_block));
352 explicit_bzero(&mac_block, sizeof(mac_block));
353 }
354
355 /*
356 * The exposed decryption routine. This is practically a
357 * copy of the encryption routine, except that the order
358 * in which the tag is created is changed.
359 * XXX combine the two functions at some point!
360 */
361 int
362 AES_CCM_decrypt(const unsigned char *in, unsigned char *out,
363 const unsigned char *addt, const unsigned char *nonce,
364 const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,
365 int tag_length, const unsigned char *key, int nr)
366 {
367 int L;
368 __m128i s0, rolling_mac, staging_block;
369 uint8_t *byte_ptr;
370
371 if (nlen < 0 || nlen > 15)
372 panic("%s: bad nonce length %d", __FUNCTION__, nlen);
373
374 /*
375 * We need to know how many bytes to use to describe
376 * the length of the data. Normally, nlen should be
377 * 12, which leaves us 3 bytes to do that -- 16mbytes of
378 * data to encrypt. But it can be longer or shorter.
379 */
380 L = sizeof(__m128i) - 1 - nlen;
381
382 /*
383 * Clear out the blocks
384 */
385 s0 = _mm_setzero_si128();
386
387 rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,
388 key, nr, nbytes, tag_length);
389 /* s0 has flags, nonce, and then 0 */
390 byte_ptr = (uint8_t*)&s0;
391 byte_ptr[0] = L-1; /* but the flags byte only has L' */
392 bcopy(nonce, &byte_ptr[1], nlen);
393
394 /*
395 * Now to cycle through the rest of the data.
396 */
397 decrypt_loop(in, NULL, nbytes, s0, nlen, &rolling_mac, key, nr);
398
399 /*
400 * Compare the tag.
401 */
402 staging_block = _mm_xor_si128(AESNI_ENC(s0, key, nr), rolling_mac);
403 if (timingsafe_bcmp(&staging_block, tag, tag_length) != 0) {
404 return (0);
405 }
406
407 /*
408 * Push out the decryption results this time.
409 */
410 decrypt_loop(in, out, nbytes, s0, nlen, NULL, key, nr);
411 return (1);
412 }
Cache object: a1883a1062d527186302ac8fd3fcb4dc
|