1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2017 Chelsio Communications, Inc.
5 * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org>
6 * All rights reserved.
7 * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include "opt_ddb.h"
35
36 #include <sys/param.h>
37 #include <sys/bus.h>
38 #include <sys/lock.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/module.h>
43 #include <sys/rman.h>
44 #include <sys/sglist.h>
45 #include <sys/sysctl.h>
46
47 #ifdef DDB
48 #include <ddb/ddb.h>
49 #endif
50
51 #include <dev/pci/pcireg.h>
52 #include <dev/pci/pcivar.h>
53
54 #include <machine/bus.h>
55 #include <machine/resource.h>
56 #include <machine/vmparam.h>
57
58 #include <opencrypto/cryptodev.h>
59 #include <opencrypto/xform.h>
60
61 #include <vm/vm.h>
62 #include <vm/pmap.h>
63
64 #include "cryptodev_if.h"
65
66 #include "ccp.h"
67 #include "ccp_hardware.h"
68 #include "ccp_lsb.h"
69
70 CTASSERT(sizeof(struct ccp_desc) == 32);
71
72 static struct ccp_xts_unitsize_map_entry {
73 enum ccp_xts_unitsize cxu_id;
74 unsigned cxu_size;
75 } ccp_xts_unitsize_map[] = {
76 { CCP_XTS_AES_UNIT_SIZE_16, 16 },
77 { CCP_XTS_AES_UNIT_SIZE_512, 512 },
78 { CCP_XTS_AES_UNIT_SIZE_1024, 1024 },
79 { CCP_XTS_AES_UNIT_SIZE_2048, 2048 },
80 { CCP_XTS_AES_UNIT_SIZE_4096, 4096 },
81 };
82
83 SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
84 "ccp node");
85
86 unsigned g_ccp_ring_order = 11;
87 SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order,
88 0, "Set CCP ring order. (1 << this) == ring size. Min: 6, Max: 16");
89
90 /*
91 * Zero buffer, sufficient for padding LSB entries, that does not span a page
92 * boundary
93 */
94 static const char g_zeroes[32] __aligned(32);
95
96 static inline uint32_t
97 ccp_read_4(struct ccp_softc *sc, uint32_t offset)
98 {
99 return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset));
100 }
101
102 static inline void
103 ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value)
104 {
105 bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value);
106 }
107
108 static inline uint32_t
109 ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset)
110 {
111 /*
112 * Each queue gets its own 4kB register space. Queue 0 is at 0x1000.
113 */
114 return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset));
115 }
116
117 static inline void
118 ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset,
119 uint32_t value)
120 {
121 ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value);
122 }
123
124 void
125 ccp_queue_write_tail(struct ccp_queue *qp)
126 {
127 ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE,
128 ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail));
129 }
130
131 /*
132 * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of
133 * that entry for the queue's private LSB region.
134 */
135 static inline uint8_t
136 ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry)
137 {
138 return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry));
139 }
140
141 /*
142 * Given a queue and a reserved LSB entry index, compute the LSB *address* of
143 * that entry for the queue's private LSB region.
144 */
145 static inline uint32_t
146 ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry)
147 {
148 return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE);
149 }
150
151 /*
152 * Some terminology:
153 *
154 * LSB - Local Storage Block
155 * =========================
156 *
157 * 8 segments/regions, each containing 16 entries.
158 *
159 * Each entry contains 256 bits (32 bytes).
160 *
161 * Segments are virtually addressed in commands, but accesses cannot cross
162 * segment boundaries. Virtual map uses an identity mapping by default
163 * (virtual segment N corresponds to physical segment N).
164 *
165 * Access to a physical region can be restricted to any subset of all five
166 * queues.
167 *
168 * "Pass-through" mode
169 * ===================
170 *
171 * Pass-through is a generic DMA engine, much like ioat(4). Some nice
172 * features:
173 *
174 * - Supports byte-swapping for endian conversion (32- or 256-bit words)
175 * - AND, OR, XOR with fixed 256-bit mask
176 * - CRC32 of data (may be used in tandem with bswap, but not bit operations)
177 * - Read/write of LSB
178 * - Memset
179 *
180 * If bit manipulation mode is enabled, input must be a multiple of 256 bits
181 * (32 bytes).
182 *
183 * If byte-swapping is enabled, input must be a multiple of the word size.
184 *
185 * Zlib mode -- only usable from one queue at a time, single job at a time.
186 * ========================================================================
187 *
188 * Only usable from private host, aka PSP? Not host processor?
189 *
190 * RNG.
191 * ====
192 *
193 * Raw bits are conditioned with AES and fed through CTR_DRBG. Output goes in
194 * a ring buffer readable by software.
195 *
196 * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are
197 * implemented on the raw input stream and may be enabled to verify min-entropy
198 * of 0.5 bits per bit.
199 */
200
201 static void
202 ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
203 {
204 bus_addr_t *baddr;
205
206 KASSERT(error == 0, ("%s: error:%d", __func__, error));
207 baddr = arg;
208 *baddr = segs->ds_addr;
209 }
210
211 static int
212 ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue)
213 {
214 struct ccp_softc *sc;
215 struct ccp_queue *qp;
216 void *desc;
217 size_t ringsz, num_descriptors;
218 int error;
219
220 desc = NULL;
221 sc = device_get_softc(dev);
222 qp = &sc->queues[queue];
223
224 /*
225 * Don't bother allocating a ring for queues the host isn't allowed to
226 * drive.
227 */
228 if ((sc->valid_queues & (1 << queue)) == 0)
229 return (0);
230
231 ccp_queue_decode_lsb_regions(sc, lsbmask, queue);
232
233 /* Ignore queues that do not have any LSB access. */
234 if (qp->lsb_mask == 0) {
235 device_printf(dev, "Ignoring queue %u with no LSB access\n",
236 queue);
237 sc->valid_queues &= ~(1 << queue);
238 return (0);
239 }
240
241 num_descriptors = 1 << sc->ring_size_order;
242 ringsz = sizeof(struct ccp_desc) * num_descriptors;
243
244 /*
245 * "Queue_Size" is order - 1.
246 *
247 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits.
248 */
249 error = bus_dma_tag_create(bus_get_dma_tag(dev),
250 1 << (5 + sc->ring_size_order),
251 #if defined(__i386__) && !defined(PAE)
252 0, BUS_SPACE_MAXADDR,
253 #else
254 (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT,
255 #endif
256 BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1,
257 ringsz, 0, NULL, NULL, &qp->ring_desc_tag);
258 if (error != 0)
259 goto out;
260
261 error = bus_dmamem_alloc(qp->ring_desc_tag, &desc,
262 BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map);
263 if (error != 0)
264 goto out;
265
266 error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc,
267 ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK);
268 if (error != 0)
269 goto out;
270
271 qp->desc_ring = desc;
272 qp->completions_ring = malloc(num_descriptors *
273 sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK);
274
275 /* Zero control register; among other things, clears the RUN flag. */
276 qp->qcontrol = 0;
277 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
278 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0);
279
280 /* Clear any leftover interrupt status flags */
281 ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE,
282 ALL_INTERRUPTS);
283
284 qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT;
285
286 ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE,
287 (uint32_t)qp->desc_ring_bus_addr);
288 ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE,
289 (uint32_t)qp->desc_ring_bus_addr);
290
291 /*
292 * Enable completion interrupts, as well as error or administrative
293 * halt interrupts. We don't use administrative halts, but they
294 * shouldn't trip unless we do, so it ought to be harmless.
295 */
296 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE,
297 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
298
299 qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT;
300 qp->qcontrol |= CMD_Q_RUN;
301 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
302
303 out:
304 if (error != 0) {
305 if (qp->desc_ring != NULL)
306 bus_dmamap_unload(qp->ring_desc_tag,
307 qp->ring_desc_map);
308 if (desc != NULL)
309 bus_dmamem_free(qp->ring_desc_tag, desc,
310 qp->ring_desc_map);
311 if (qp->ring_desc_tag != NULL)
312 bus_dma_tag_destroy(qp->ring_desc_tag);
313 }
314 return (error);
315 }
316
317 static void
318 ccp_hw_detach_queue(device_t dev, unsigned queue)
319 {
320 struct ccp_softc *sc;
321 struct ccp_queue *qp;
322
323 sc = device_get_softc(dev);
324 qp = &sc->queues[queue];
325
326 /*
327 * Don't bother allocating a ring for queues the host isn't allowed to
328 * drive.
329 */
330 if ((sc->valid_queues & (1 << queue)) == 0)
331 return;
332
333 free(qp->completions_ring, M_CCP);
334 bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map);
335 bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map);
336 bus_dma_tag_destroy(qp->ring_desc_tag);
337 }
338
339 static int
340 ccp_map_pci_bar(device_t dev)
341 {
342 struct ccp_softc *sc;
343
344 sc = device_get_softc(dev);
345
346 sc->pci_resource_id = PCIR_BAR(2);
347 sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
348 &sc->pci_resource_id, RF_ACTIVE);
349 if (sc->pci_resource == NULL) {
350 device_printf(dev, "unable to allocate pci resource\n");
351 return (ENODEV);
352 }
353
354 sc->pci_resource_id_msix = PCIR_BAR(5);
355 sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
356 &sc->pci_resource_id_msix, RF_ACTIVE);
357 if (sc->pci_resource_msix == NULL) {
358 device_printf(dev, "unable to allocate pci resource msix\n");
359 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
360 sc->pci_resource);
361 return (ENODEV);
362 }
363
364 sc->pci_bus_tag = rman_get_bustag(sc->pci_resource);
365 sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource);
366 return (0);
367 }
368
369 static void
370 ccp_unmap_pci_bar(device_t dev)
371 {
372 struct ccp_softc *sc;
373
374 sc = device_get_softc(dev);
375
376 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix,
377 sc->pci_resource_msix);
378 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
379 sc->pci_resource);
380 }
381
382 const static struct ccp_error_code {
383 uint8_t ce_code;
384 const char *ce_name;
385 int ce_errno;
386 const char *ce_desc;
387 } ccp_error_codes[] = {
388 { 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" },
389 { 0x03, "ILLEGAL_FUNCTION_TYPE", EIO,
390 "A non-supported function type was specified" },
391 { 0x04, "ILLEGAL_FUNCTION_MODE", EIO,
392 "A non-supported function mode was specified" },
393 { 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO,
394 "A CMAC type was specified when ENCRYPT was not specified" },
395 { 0x06, "ILLEGAL_FUNCTION_SIZE", EIO,
396 "A non-supported function size was specified.\n"
397 "AES-CFB: Size was not 127 or 7;\n"
398 "3DES-CFB: Size was not 7;\n"
399 "RSA: See supported size table (7.4.2);\n"
400 "ECC: Size was greater than 576 bits." },
401 { 0x07, "Zlib_MISSING_INIT_EOM", EIO,
402 "Zlib command does not have INIT and EOM set" },
403 { 0x08, "ILLEGAL_FUNCTION_RSVD", EIO,
404 "Reserved bits in a function specification were not 0" },
405 { 0x09, "ILLEGAL_BUFFER_LENGTH", EIO,
406 "The buffer length specified was not correct for the selected engine"
407 },
408 { 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n"
409 "Undefined VLSB segment mapping or\n"
410 "mapping to unsupported LSB segment id" },
411 { 0x0B, "ILLEGAL_MEM_ADDR", EFAULT,
412 "The specified source/destination buffer access was illegal:\n"
413 "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n"
414 "Data buffer not completely contained within a single segment; or\n"
415 "Pointer with Fixed=1 is not 32-bit aligned; or\n"
416 "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory."
417 },
418 { 0x0C, "ILLEGAL_MEM_SEL", EIO,
419 "A src_mem, dst_mem, or key_mem field was illegal:\n"
420 "A field was set to a reserved value; or\n"
421 "A public command attempted to reference AXI1 (local) or GART memory; or\n"
422 "A Zlib command attmpted to use the LSB." },
423 { 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO,
424 "The specified context location was illegal:\n"
425 "Context located in a LSB location disallowed by the LSB protection masks; or\n"
426 "Context not completely contained within a single segment." },
427 { 0x0E, "ILLEGAL_KEY_ADDR", EIO,
428 "The specified key location was illegal:\n"
429 "Key located in a LSB location disallowed by the LSB protection masks; or\n"
430 "Key not completely contained within a single segment." },
431 { 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" },
432 /* XXX Could fill out these descriptions too */
433 { 0x13, "IDMA0_AXI_SLVERR", EIO, "" },
434 { 0x14, "IDMA0_AXI_DECERR", EIO, "" },
435 { 0x16, "IDMA1_AXI_SLVERR", EIO, "" },
436 { 0x17, "IDMA1_AXI_DECERR", EIO, "" },
437 { 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" },
438 { 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" },
439 { 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" },
440 { 0x1D, "ZLIB_EXTRA_DATA", EIO, "" },
441 { 0x1E, "ZLIB_BTYPE", EIO, "" },
442 { 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" },
443 { 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" },
444 { 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" },
445 { 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" },
446 { 0x24, "ZLIB_LIMIT_REACHED", EIO, "" },
447 { 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" },
448 { 0x26, "ODMA0_AXI_SLVERR", EIO, "" },
449 { 0x27, "ODMA0_AXI_DECERR", EIO, "" },
450 { 0x29, "ODMA1_AXI_SLVERR", EIO, "" },
451 { 0x2A, "ODMA1_AXI_DECERR", EIO, "" },
452 { 0x2B, "LSB_PARITY_ERR", EIO,
453 "A read from the LSB encountered a parity error" },
454 };
455
456 static void
457 ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc)
458 {
459 struct ccp_completion_ctx *cctx;
460 const struct ccp_error_code *ec;
461 struct ccp_softc *sc;
462 uint32_t status, error, esource, faultblock;
463 unsigned q, idx;
464 int errno;
465
466 sc = qp->cq_softc;
467 q = qp->cq_qindex;
468
469 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
470
471 error = status & STATUS_ERROR_MASK;
472
473 /* Decode error status */
474 ec = NULL;
475 for (idx = 0; idx < nitems(ccp_error_codes); idx++)
476 if (ccp_error_codes[idx].ce_code == error) {
477 ec = &ccp_error_codes[idx];
478 break;
479 }
480
481 esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
482 STATUS_ERRORSOURCE_MASK;
483 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
484 STATUS_VLSB_FAULTBLOCK_MASK;
485 device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n",
486 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
487 faultblock);
488 if (ec != NULL)
489 device_printf(sc->dev, "Error description: %s\n", ec->ce_desc);
490
491 /* TODO Could format the desc nicely here */
492 idx = desc - qp->desc_ring;
493 DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx,
494 (const void *)desc, " ");
495
496 /*
497 * TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status,
498 * Zlib Decompress status may be interesting.
499 */
500
501 while (true) {
502 /* Keep unused descriptors zero for next use. */
503 memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx]));
504
505 cctx = &qp->completions_ring[idx];
506
507 /*
508 * Restart procedure described in § 14.2.5. Could be used by HoC if we
509 * used that.
510 *
511 * Advance HEAD_LO past bad descriptor + any remaining in
512 * transaction manually, then restart queue.
513 */
514 idx = (idx + 1) % (1 << sc->ring_size_order);
515
516 /* Callback function signals end of transaction */
517 if (cctx->callback_fn != NULL) {
518 if (ec == NULL)
519 errno = EIO;
520 else
521 errno = ec->ce_errno;
522 /* TODO More specific error code */
523 cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno);
524 cctx->callback_fn = NULL;
525 break;
526 }
527 }
528
529 qp->cq_head = idx;
530 qp->cq_waiting = false;
531 wakeup(&qp->cq_tail);
532 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
533 ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE,
534 (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE));
535 ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol);
536 DPRINTF(sc->dev, "%s: Restarted queue\n", __func__);
537 }
538
539 static void
540 ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints)
541 {
542 struct ccp_completion_ctx *cctx;
543 struct ccp_softc *sc;
544 const struct ccp_desc *desc;
545 uint32_t headlo, idx;
546 unsigned q, completed;
547
548 sc = qp->cq_softc;
549 q = qp->cq_qindex;
550
551 mtx_lock(&qp->cq_lock);
552
553 /*
554 * Hardware HEAD_LO points to the first incomplete descriptor. Process
555 * any submitted and completed descriptors, up to but not including
556 * HEAD_LO.
557 */
558 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
559 idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
560
561 DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx,
562 qp->cq_head);
563 completed = 0;
564 while (qp->cq_head != idx) {
565 DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head);
566
567 cctx = &qp->completions_ring[qp->cq_head];
568 if (cctx->callback_fn != NULL) {
569 cctx->callback_fn(qp, cctx->session,
570 cctx->callback_arg, 0);
571 cctx->callback_fn = NULL;
572 }
573
574 /* Keep unused descriptors zero for next use. */
575 memset(&qp->desc_ring[qp->cq_head], 0,
576 sizeof(qp->desc_ring[qp->cq_head]));
577
578 qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order);
579 completed++;
580 }
581 if (completed > 0) {
582 qp->cq_waiting = false;
583 wakeup(&qp->cq_tail);
584 }
585
586 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
587
588 /*
589 * Desc points to the first incomplete descriptor, at the time we read
590 * HEAD_LO. If there was an error flagged in interrupt status, the HW
591 * will not proceed past the erroneous descriptor by itself.
592 */
593 desc = &qp->desc_ring[idx];
594 if ((ints & INT_ERROR) != 0)
595 ccp_intr_handle_error(qp, desc);
596
597 mtx_unlock(&qp->cq_lock);
598 }
599
600 static void
601 ccp_intr_handler(void *arg)
602 {
603 struct ccp_softc *sc = arg;
604 size_t i;
605 uint32_t ints;
606
607 DPRINTF(sc->dev, "%s: interrupt\n", __func__);
608
609 /*
610 * We get one global interrupt per PCI device, shared over all of
611 * its queues. Scan each valid queue on interrupt for flags indicating
612 * activity.
613 */
614 for (i = 0; i < nitems(sc->queues); i++) {
615 if ((sc->valid_queues & (1 << i)) == 0)
616 continue;
617
618 ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE);
619 if (ints == 0)
620 continue;
621
622 #if 0
623 DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__,
624 (unsigned)ints, i);
625 #endif
626 /* Write back 1s to clear interrupt status bits. */
627 ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints);
628
629 /*
630 * If there was an error, we still need to run completions on
631 * any descriptors prior to the error. The completions handler
632 * invoked below will also handle the error descriptor.
633 */
634 if ((ints & (INT_COMPLETION | INT_ERROR)) != 0)
635 ccp_intr_run_completions(&sc->queues[i], ints);
636
637 if ((ints & INT_QUEUE_STOPPED) != 0)
638 device_printf(sc->dev, "%s: queue %zu stopped\n",
639 __func__, i);
640 }
641
642 /* Re-enable interrupts after processing */
643 for (i = 0; i < nitems(sc->queues); i++) {
644 if ((sc->valid_queues & (1 << i)) == 0)
645 continue;
646 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE,
647 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
648 }
649 }
650
651 static int
652 ccp_intr_filter(void *arg)
653 {
654 struct ccp_softc *sc = arg;
655 size_t i;
656
657 /* TODO: Split individual queues into separate taskqueues? */
658 for (i = 0; i < nitems(sc->queues); i++) {
659 if ((sc->valid_queues & (1 << i)) == 0)
660 continue;
661
662 /* Mask interrupt until task completes */
663 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0);
664 }
665
666 return (FILTER_SCHEDULE_THREAD);
667 }
668
669 static int
670 ccp_setup_interrupts(struct ccp_softc *sc)
671 {
672 uint32_t nvec;
673 int rid, error, n, ridcopy;
674
675 n = pci_msix_count(sc->dev);
676 if (n < 1) {
677 device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n);
678 return (ENXIO);
679 }
680
681 nvec = n;
682 error = pci_alloc_msix(sc->dev, &nvec);
683 if (error != 0) {
684 device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__,
685 error);
686 return (error);
687 }
688 if (nvec < 1) {
689 device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n",
690 __func__);
691 return (ENXIO);
692 }
693 if (nvec > nitems(sc->intr_res)) {
694 device_printf(sc->dev, "%s: too many vectors: %u\n", __func__,
695 nvec);
696 nvec = nitems(sc->intr_res);
697 }
698
699 for (rid = 1; rid < 1 + nvec; rid++) {
700 ridcopy = rid;
701 sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev,
702 SYS_RES_IRQ, &ridcopy, RF_ACTIVE);
703 if (sc->intr_res[rid - 1] == NULL) {
704 device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n",
705 __func__);
706 return (ENXIO);
707 }
708
709 sc->intr_tag[rid - 1] = NULL;
710 error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1],
711 INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter,
712 ccp_intr_handler, sc, &sc->intr_tag[rid - 1]);
713 if (error != 0)
714 device_printf(sc->dev, "%s: setup_intr: %d\n",
715 __func__, error);
716 }
717 sc->intr_count = nvec;
718
719 return (error);
720 }
721
722 static void
723 ccp_release_interrupts(struct ccp_softc *sc)
724 {
725 unsigned i;
726
727 for (i = 0; i < sc->intr_count; i++) {
728 if (sc->intr_tag[i] != NULL)
729 bus_teardown_intr(sc->dev, sc->intr_res[i],
730 sc->intr_tag[i]);
731 if (sc->intr_res[i] != NULL)
732 bus_release_resource(sc->dev, SYS_RES_IRQ,
733 rman_get_rid(sc->intr_res[i]), sc->intr_res[i]);
734 }
735
736 pci_release_msi(sc->dev);
737 }
738
739 int
740 ccp_hw_attach(device_t dev)
741 {
742 struct ccp_softc *sc;
743 uint64_t lsbmask;
744 uint32_t version, lsbmasklo, lsbmaskhi;
745 unsigned queue_idx, j;
746 int error;
747 bool bars_mapped, interrupts_setup;
748
749 queue_idx = 0;
750 bars_mapped = interrupts_setup = false;
751 sc = device_get_softc(dev);
752
753 error = ccp_map_pci_bar(dev);
754 if (error != 0) {
755 device_printf(dev, "%s: couldn't map BAR(s)\n", __func__);
756 goto out;
757 }
758 bars_mapped = true;
759
760 error = pci_enable_busmaster(dev);
761 if (error != 0) {
762 device_printf(dev, "%s: couldn't enable busmaster\n",
763 __func__);
764 goto out;
765 }
766
767 sc->ring_size_order = g_ccp_ring_order;
768 if (sc->ring_size_order < 6 || sc->ring_size_order > 16) {
769 device_printf(dev, "bogus hw.ccp.ring_order\n");
770 error = EINVAL;
771 goto out;
772 }
773 sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET);
774
775 version = ccp_read_4(sc, VERSION_REG);
776 if ((version & VERSION_NUM_MASK) < 5) {
777 device_printf(dev,
778 "driver supports version 5 and later hardware\n");
779 error = ENXIO;
780 goto out;
781 }
782
783 error = ccp_setup_interrupts(sc);
784 if (error != 0)
785 goto out;
786 interrupts_setup = true;
787
788 sc->hw_version = version & VERSION_NUM_MASK;
789 sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) &
790 VERSION_NUMVQM_MASK;
791 sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) &
792 VERSION_LSBSIZE_MASK;
793 sc->hw_features = version & VERSION_CAP_MASK;
794
795 /*
796 * Copy private LSB mask to public registers to enable access to LSB
797 * from all queues allowed by BIOS.
798 */
799 lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET);
800 lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET);
801 ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo);
802 ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi);
803
804 lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo;
805
806 for (; queue_idx < nitems(sc->queues); queue_idx++) {
807 error = ccp_hw_attach_queue(dev, lsbmask, queue_idx);
808 if (error != 0) {
809 device_printf(dev, "%s: couldn't attach queue %u\n",
810 __func__, queue_idx);
811 goto out;
812 }
813 }
814 ccp_assign_lsb_regions(sc, lsbmask);
815
816 out:
817 if (error != 0) {
818 if (interrupts_setup)
819 ccp_release_interrupts(sc);
820 for (j = 0; j < queue_idx; j++)
821 ccp_hw_detach_queue(dev, j);
822 if (sc->ring_size_order != 0)
823 pci_disable_busmaster(dev);
824 if (bars_mapped)
825 ccp_unmap_pci_bar(dev);
826 }
827 return (error);
828 }
829
830 void
831 ccp_hw_detach(device_t dev)
832 {
833 struct ccp_softc *sc;
834 unsigned i;
835
836 sc = device_get_softc(dev);
837
838 for (i = 0; i < nitems(sc->queues); i++)
839 ccp_hw_detach_queue(dev, i);
840
841 ccp_release_interrupts(sc);
842 pci_disable_busmaster(dev);
843 ccp_unmap_pci_bar(dev);
844 }
845
846 static int __must_check
847 ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst,
848 enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type,
849 bus_size_t len, enum ccp_passthru_byteswap swapmode,
850 enum ccp_passthru_bitwise bitmode, bool interrupt,
851 const struct ccp_completion_ctx *cctx)
852 {
853 struct ccp_desc *desc;
854
855 if (ccp_queue_get_ring_space(qp) == 0)
856 return (EAGAIN);
857
858 desc = &qp->desc_ring[qp->cq_tail];
859
860 memset(desc, 0, sizeof(*desc));
861 desc->engine = CCP_ENGINE_PASSTHRU;
862
863 desc->pt.ioc = interrupt;
864 desc->pt.byteswap = swapmode;
865 desc->pt.bitwise = bitmode;
866 desc->length = len;
867
868 desc->src_lo = (uint32_t)src;
869 desc->src_hi = src >> 32;
870 desc->src_mem = src_type;
871
872 desc->dst_lo = (uint32_t)dst;
873 desc->dst_hi = dst >> 32;
874 desc->dst_mem = dst_type;
875
876 if (bitmode != CCP_PASSTHRU_BITWISE_NOOP)
877 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY);
878
879 if (cctx != NULL)
880 memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx));
881
882 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
883 return (0);
884 }
885
886 static int __must_check
887 ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb,
888 struct sglist *sgl, bus_size_t len, bool interrupt,
889 const struct ccp_completion_ctx *cctx)
890 {
891 struct sglist_seg *seg;
892 size_t i, remain, nb;
893 int error;
894
895 remain = len;
896 for (i = 0; i < sgl->sg_nseg && remain != 0; i++) {
897 seg = &sgl->sg_segs[i];
898 /* crp lengths are int, so 32-bit min() is ok. */
899 nb = min(remain, seg->ss_len);
900
901 if (tolsb)
902 error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB,
903 seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb,
904 CCP_PASSTHRU_BYTESWAP_NOOP,
905 CCP_PASSTHRU_BITWISE_NOOP,
906 (nb == remain) && interrupt, cctx);
907 else
908 error = ccp_passthrough(qp, seg->ss_paddr,
909 CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb,
910 CCP_PASSTHRU_BYTESWAP_NOOP,
911 CCP_PASSTHRU_BITWISE_NOOP,
912 (nb == remain) && interrupt, cctx);
913 if (error != 0)
914 return (error);
915
916 remain -= nb;
917 }
918 return (0);
919 }
920
921 /*
922 * Note that these vectors are in reverse of the usual order.
923 */
924 const struct SHA_vectors {
925 uint32_t SHA1[8];
926 uint32_t SHA224[8];
927 uint32_t SHA256[8];
928 uint64_t SHA384[8];
929 uint64_t SHA512[8];
930 } SHA_H __aligned(PAGE_SIZE) = {
931 .SHA1 = {
932 0xc3d2e1f0ul,
933 0x10325476ul,
934 0x98badcfeul,
935 0xefcdab89ul,
936 0x67452301ul,
937 0,
938 0,
939 0,
940 },
941 .SHA224 = {
942 0xbefa4fa4ul,
943 0x64f98fa7ul,
944 0x68581511ul,
945 0xffc00b31ul,
946 0xf70e5939ul,
947 0x3070dd17ul,
948 0x367cd507ul,
949 0xc1059ed8ul,
950 },
951 .SHA256 = {
952 0x5be0cd19ul,
953 0x1f83d9abul,
954 0x9b05688cul,
955 0x510e527ful,
956 0xa54ff53aul,
957 0x3c6ef372ul,
958 0xbb67ae85ul,
959 0x6a09e667ul,
960 },
961 .SHA384 = {
962 0x47b5481dbefa4fa4ull,
963 0xdb0c2e0d64f98fa7ull,
964 0x8eb44a8768581511ull,
965 0x67332667ffc00b31ull,
966 0x152fecd8f70e5939ull,
967 0x9159015a3070dd17ull,
968 0x629a292a367cd507ull,
969 0xcbbb9d5dc1059ed8ull,
970 },
971 .SHA512 = {
972 0x5be0cd19137e2179ull,
973 0x1f83d9abfb41bd6bull,
974 0x9b05688c2b3e6c1full,
975 0x510e527fade682d1ull,
976 0xa54ff53a5f1d36f1ull,
977 0x3c6ef372fe94f82bull,
978 0xbb67ae8584caa73bull,
979 0x6a09e667f3bcc908ull,
980 },
981 };
982 /*
983 * Ensure vectors do not cross a page boundary.
984 *
985 * Disabled due to a new Clang error: "expression is not an integral constant
986 * expression." GCC (cross toolchain) seems to handle this assertion with
987 * _Static_assert just fine.
988 */
989 #if 0
990 CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H));
991 #endif
992
993 const struct SHA_Defn {
994 enum sha_version version;
995 const void *H_vectors;
996 size_t H_size;
997 const struct auth_hash *axf;
998 enum ccp_sha_type engine_type;
999 } SHA_definitions[] = {
1000 {
1001 .version = SHA1,
1002 .H_vectors = SHA_H.SHA1,
1003 .H_size = sizeof(SHA_H.SHA1),
1004 .axf = &auth_hash_hmac_sha1,
1005 .engine_type = CCP_SHA_TYPE_1,
1006 },
1007 #if 0
1008 {
1009 .version = SHA2_224,
1010 .H_vectors = SHA_H.SHA224,
1011 .H_size = sizeof(SHA_H.SHA224),
1012 .axf = &auth_hash_hmac_sha2_224,
1013 .engine_type = CCP_SHA_TYPE_224,
1014 },
1015 #endif
1016 {
1017 .version = SHA2_256,
1018 .H_vectors = SHA_H.SHA256,
1019 .H_size = sizeof(SHA_H.SHA256),
1020 .axf = &auth_hash_hmac_sha2_256,
1021 .engine_type = CCP_SHA_TYPE_256,
1022 },
1023 {
1024 .version = SHA2_384,
1025 .H_vectors = SHA_H.SHA384,
1026 .H_size = sizeof(SHA_H.SHA384),
1027 .axf = &auth_hash_hmac_sha2_384,
1028 .engine_type = CCP_SHA_TYPE_384,
1029 },
1030 {
1031 .version = SHA2_512,
1032 .H_vectors = SHA_H.SHA512,
1033 .H_size = sizeof(SHA_H.SHA512),
1034 .axf = &auth_hash_hmac_sha2_512,
1035 .engine_type = CCP_SHA_TYPE_512,
1036 },
1037 };
1038
1039 static int __must_check
1040 ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn,
1041 vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits)
1042 {
1043 struct ccp_desc *desc;
1044
1045 if (ccp_queue_get_ring_space(qp) == 0)
1046 return (EAGAIN);
1047
1048 desc = &qp->desc_ring[qp->cq_tail];
1049
1050 memset(desc, 0, sizeof(*desc));
1051 desc->engine = CCP_ENGINE_SHA;
1052 desc->som = start;
1053 desc->eom = end;
1054
1055 desc->sha.type = defn->engine_type;
1056 desc->length = len;
1057
1058 if (end) {
1059 desc->sha_len_lo = (uint32_t)msgbits;
1060 desc->sha_len_hi = msgbits >> 32;
1061 }
1062
1063 desc->src_lo = (uint32_t)addr;
1064 desc->src_hi = addr >> 32;
1065 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1066
1067 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA);
1068
1069 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
1070 return (0);
1071 }
1072
1073 static int __must_check
1074 ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src,
1075 struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx)
1076 {
1077 const struct SHA_Defn *defn;
1078 struct sglist_seg *seg;
1079 size_t i, msgsize, remaining, nb;
1080 uint32_t lsbaddr;
1081 int error;
1082
1083 for (i = 0; i < nitems(SHA_definitions); i++)
1084 if (SHA_definitions[i].version == version)
1085 break;
1086 if (i == nitems(SHA_definitions))
1087 return (EINVAL);
1088 defn = &SHA_definitions[i];
1089
1090 /* XXX validate input ??? */
1091
1092 /* Load initial SHA state into LSB */
1093 /* XXX ensure H_vectors don't span page boundaries */
1094 error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA),
1095 CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors),
1096 CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE),
1097 CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false,
1098 NULL);
1099 if (error != 0)
1100 return (error);
1101
1102 /* Execute series of SHA updates on correctly sized buffers */
1103 msgsize = 0;
1104 for (i = 0; i < sgl_src->sg_nseg; i++) {
1105 seg = &sgl_src->sg_segs[i];
1106 msgsize += seg->ss_len;
1107 error = ccp_sha_single_desc(qp, defn, seg->ss_paddr,
1108 seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1,
1109 msgsize << 3);
1110 if (error != 0)
1111 return (error);
1112 }
1113
1114 /* Copy result out to sgl_dst */
1115 remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE);
1116 lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA);
1117 for (i = 0; i < sgl_dst->sg_nseg; i++) {
1118 seg = &sgl_dst->sg_segs[i];
1119 /* crp lengths are int, so 32-bit min() is ok. */
1120 nb = min(remaining, seg->ss_len);
1121
1122 error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM,
1123 lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP,
1124 CCP_PASSTHRU_BITWISE_NOOP,
1125 (cctx != NULL) ? (nb == remaining) : false,
1126 (nb == remaining) ? cctx : NULL);
1127 if (error != 0)
1128 return (error);
1129
1130 remaining -= nb;
1131 lsbaddr += nb;
1132 if (remaining == 0)
1133 break;
1134 }
1135
1136 return (0);
1137 }
1138
1139 static void
1140 byteswap256(uint64_t *buffer)
1141 {
1142 uint64_t t;
1143
1144 t = bswap64(buffer[3]);
1145 buffer[3] = bswap64(buffer[0]);
1146 buffer[0] = t;
1147
1148 t = bswap64(buffer[2]);
1149 buffer[2] = bswap64(buffer[1]);
1150 buffer[1] = t;
1151 }
1152
1153 /*
1154 * Translate CCP internal LSB hash format into a standard hash ouput.
1155 *
1156 * Manipulates input buffer with byteswap256 operation.
1157 */
1158 static void
1159 ccp_sha_copy_result(char *output, char *buffer, enum sha_version version)
1160 {
1161 const struct SHA_Defn *defn;
1162 size_t i;
1163
1164 for (i = 0; i < nitems(SHA_definitions); i++)
1165 if (SHA_definitions[i].version == version)
1166 break;
1167 if (i == nitems(SHA_definitions))
1168 panic("bogus sha version auth_mode %u\n", (unsigned)version);
1169
1170 defn = &SHA_definitions[i];
1171
1172 /* Swap 256bit manually -- DMA engine can, but with limitations */
1173 byteswap256((void *)buffer);
1174 if (defn->axf->hashsize > LSB_ENTRY_SIZE)
1175 byteswap256((void *)(buffer + LSB_ENTRY_SIZE));
1176
1177 switch (defn->version) {
1178 case SHA1:
1179 memcpy(output, buffer + 12, defn->axf->hashsize);
1180 break;
1181 #if 0
1182 case SHA2_224:
1183 memcpy(output, buffer + XXX, defn->axf->hashsize);
1184 break;
1185 #endif
1186 case SHA2_256:
1187 memcpy(output, buffer, defn->axf->hashsize);
1188 break;
1189 case SHA2_384:
1190 memcpy(output,
1191 buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize,
1192 defn->axf->hashsize - LSB_ENTRY_SIZE);
1193 memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer,
1194 LSB_ENTRY_SIZE);
1195 break;
1196 case SHA2_512:
1197 memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE);
1198 memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE);
1199 break;
1200 }
1201 }
1202
1203 static void
1204 ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s,
1205 struct cryptop *crp, int error)
1206 {
1207 char ihash[SHA2_512_HASH_LEN /* max hash len */];
1208 union authctx auth_ctx;
1209 const struct auth_hash *axf;
1210
1211 axf = s->hmac.auth_hash;
1212
1213 s->pending--;
1214
1215 if (error != 0) {
1216 crp->crp_etype = error;
1217 goto out;
1218 }
1219
1220 /* Do remaining outer hash over small inner hash in software */
1221 axf->Init(&auth_ctx);
1222 axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize);
1223 ccp_sha_copy_result(ihash, s->hmac.res, s->hmac.auth_mode);
1224 #if 0
1225 INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__,
1226 (u_char *)ihash, " ");
1227 #endif
1228 axf->Update(&auth_ctx, ihash, axf->hashsize);
1229 axf->Final(s->hmac.res, &auth_ctx);
1230
1231 if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) {
1232 crypto_copydata(crp, crp->crp_digest_start, s->hmac.hash_len,
1233 ihash);
1234 if (timingsafe_bcmp(s->hmac.res, ihash, s->hmac.hash_len) != 0)
1235 crp->crp_etype = EBADMSG;
1236 } else
1237 crypto_copyback(crp, crp->crp_digest_start, s->hmac.hash_len,
1238 s->hmac.res);
1239
1240 /* Avoid leaking key material */
1241 explicit_bzero(&auth_ctx, sizeof(auth_ctx));
1242 explicit_bzero(s->hmac.res, sizeof(s->hmac.res));
1243
1244 out:
1245 crypto_done(crp);
1246 }
1247
1248 static void
1249 ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1250 int error)
1251 {
1252 struct cryptop *crp;
1253
1254 crp = vcrp;
1255 ccp_do_hmac_done(qp, s, crp, error);
1256 }
1257
1258 static int __must_check
1259 ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1260 const struct ccp_completion_ctx *cctx)
1261 {
1262 device_t dev;
1263 const struct auth_hash *axf;
1264 int error;
1265
1266 dev = qp->cq_softc->dev;
1267 axf = s->hmac.auth_hash;
1268
1269 /*
1270 * Populate the SGL describing inside hash contents. We want to hash
1271 * the ipad (key XOR fixed bit pattern) concatenated with the user
1272 * data.
1273 */
1274 sglist_reset(qp->cq_sg_ulptx);
1275 error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize);
1276 if (error != 0)
1277 return (error);
1278 if (crp->crp_aad_length != 0) {
1279 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1280 crp->crp_aad_start, crp->crp_aad_length);
1281 if (error != 0)
1282 return (error);
1283 }
1284 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1285 crp->crp_payload_start, crp->crp_payload_length);
1286 if (error != 0) {
1287 DPRINTF(dev, "%s: sglist too short\n", __func__);
1288 return (error);
1289 }
1290 /* Populate SGL for output -- use hmac.res buffer. */
1291 sglist_reset(qp->cq_sg_dst);
1292 error = sglist_append(qp->cq_sg_dst, s->hmac.res,
1293 roundup2(axf->hashsize, LSB_ENTRY_SIZE));
1294 if (error != 0)
1295 return (error);
1296
1297 error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst,
1298 cctx);
1299 if (error != 0) {
1300 DPRINTF(dev, "%s: ccp_sha error\n", __func__);
1301 return (error);
1302 }
1303 return (0);
1304 }
1305
1306 int __must_check
1307 ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1308 {
1309 struct ccp_completion_ctx ctx;
1310
1311 ctx.callback_fn = ccp_hmac_done;
1312 ctx.callback_arg = crp;
1313 ctx.session = s;
1314
1315 return (ccp_do_hmac(qp, s, crp, &ctx));
1316 }
1317
1318 static void
1319 ccp_byteswap(char *data, size_t len)
1320 {
1321 size_t i;
1322 char t;
1323
1324 len--;
1325 for (i = 0; i < len; i++, len--) {
1326 t = data[i];
1327 data[i] = data[len];
1328 data[len] = t;
1329 }
1330 }
1331
1332 static void
1333 ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1334 int error)
1335 {
1336 struct cryptop *crp;
1337
1338 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1339
1340 crp = vcrp;
1341
1342 s->pending--;
1343
1344 if (error != 0)
1345 crp->crp_etype = error;
1346
1347 DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp);
1348 crypto_done(crp);
1349 }
1350
1351 static void
1352 ccp_collect_iv(struct cryptop *crp, const struct crypto_session_params *csp,
1353 char *iv)
1354 {
1355
1356 crypto_read_iv(crp, iv);
1357
1358 /*
1359 * Append an explicit counter of 1 for GCM.
1360 */
1361 if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16)
1362 *(uint32_t *)&iv[12] = htobe32(1);
1363
1364 if (csp->csp_cipher_alg == CRYPTO_AES_XTS &&
1365 csp->csp_ivlen < AES_BLOCK_LEN)
1366 memset(&iv[csp->csp_ivlen], 0, AES_BLOCK_LEN - csp->csp_ivlen);
1367
1368 /* Reverse order of IV material for HW */
1369 INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__, iv, " ",
1370 csp->csp_ivlen);
1371
1372 /*
1373 * For unknown reasons, XTS mode expects the IV in the reverse byte
1374 * order to every other AES mode.
1375 */
1376 if (csp->csp_cipher_alg != CRYPTO_AES_XTS)
1377 ccp_byteswap(iv, AES_BLOCK_LEN);
1378 }
1379
1380 static int __must_check
1381 ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src,
1382 size_t len)
1383 {
1384 int error;
1385
1386 sglist_reset(qp->cq_sg_ulptx);
1387 error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len);
1388 if (error != 0)
1389 return (error);
1390
1391 error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len,
1392 false, NULL);
1393 return (error);
1394 }
1395
1396 static int __must_check
1397 ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1398 enum ccp_cipher_dir dir, const struct ccp_completion_ctx *cctx)
1399 {
1400 struct ccp_desc *desc;
1401 device_t dev;
1402 unsigned i;
1403 enum ccp_xts_unitsize usize;
1404
1405 /* IV and Key data are already loaded */
1406
1407 dev = qp->cq_softc->dev;
1408
1409 for (i = 0; i < nitems(ccp_xts_unitsize_map); i++)
1410 if (ccp_xts_unitsize_map[i].cxu_size ==
1411 crp->crp_payload_length) {
1412 usize = ccp_xts_unitsize_map[i].cxu_id;
1413 break;
1414 }
1415 if (i >= nitems(ccp_xts_unitsize_map))
1416 return (EINVAL);
1417
1418 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1419 struct sglist_seg *seg;
1420
1421 seg = &qp->cq_sg_ulptx->sg_segs[i];
1422
1423 desc = &qp->desc_ring[qp->cq_tail];
1424 desc->engine = CCP_ENGINE_XTS_AES;
1425 desc->som = (i == 0);
1426 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1427 desc->ioc = (desc->eom && cctx != NULL);
1428 DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n",
1429 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1430 (int)desc->ioc, (int)dir);
1431
1432 if (desc->ioc)
1433 memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1434 sizeof(*cctx));
1435
1436 desc->aes_xts.encrypt = dir;
1437 desc->aes_xts.type = s->blkcipher.cipher_type;
1438 desc->aes_xts.size = usize;
1439
1440 DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__,
1441 qp->cq_tail, (unsigned)desc->aes_xts.type,
1442 (unsigned)desc->aes_xts.size);
1443
1444 desc->length = seg->ss_len;
1445 desc->src_lo = (uint32_t)seg->ss_paddr;
1446 desc->src_hi = (seg->ss_paddr >> 32);
1447 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1448
1449 /* Crypt in-place */
1450 desc->dst_lo = desc->src_lo;
1451 desc->dst_hi = desc->src_hi;
1452 desc->dst_mem = desc->src_mem;
1453
1454 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1455 desc->key_hi = 0;
1456 desc->key_mem = CCP_MEMTYPE_SB;
1457
1458 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1459
1460 qp->cq_tail = (qp->cq_tail + 1) %
1461 (1 << qp->cq_softc->ring_size_order);
1462 }
1463 return (0);
1464 }
1465
1466 static int __must_check
1467 ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s,
1468 struct cryptop *crp, const struct ccp_completion_ctx *cctx)
1469 {
1470 const struct crypto_session_params *csp;
1471 struct ccp_desc *desc;
1472 char *keydata;
1473 device_t dev;
1474 enum ccp_cipher_dir dir;
1475 int error, iv_len;
1476 size_t keydata_len;
1477 unsigned i, j;
1478
1479 dev = qp->cq_softc->dev;
1480
1481 if (s->blkcipher.key_len == 0 || crp->crp_payload_length == 0) {
1482 DPRINTF(dev, "%s: empty\n", __func__);
1483 return (EINVAL);
1484 }
1485 if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) {
1486 DPRINTF(dev, "%s: len modulo: %d\n", __func__,
1487 crp->crp_payload_length);
1488 return (EINVAL);
1489 }
1490
1491 /*
1492 * Individual segments must be multiples of AES block size for the HW
1493 * to process it. Non-compliant inputs aren't bogus, just not doable
1494 * on this hardware.
1495 */
1496 for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++)
1497 if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1498 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1499 qp->cq_sg_crp->sg_segs[i].ss_len);
1500 return (EINVAL);
1501 }
1502
1503 /* Gather IV/nonce data */
1504 csp = crypto_get_params(crp->crp_session);
1505 ccp_collect_iv(crp, csp, s->blkcipher.iv);
1506 iv_len = csp->csp_ivlen;
1507 if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1508 iv_len = AES_BLOCK_LEN;
1509
1510 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1511 dir = CCP_CIPHER_DIR_ENCRYPT;
1512 else
1513 dir = CCP_CIPHER_DIR_DECRYPT;
1514
1515 /* Set up passthrough op(s) to copy IV into LSB */
1516 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1517 s->blkcipher.iv, iv_len);
1518 if (error != 0)
1519 return (error);
1520
1521 /*
1522 * Initialize keydata and keydata_len for GCC. The default case of the
1523 * following switch is impossible to reach, but GCC doesn't know that.
1524 */
1525 keydata_len = 0;
1526 keydata = NULL;
1527
1528 switch (csp->csp_cipher_alg) {
1529 case CRYPTO_AES_XTS:
1530 for (j = 0; j < nitems(ccp_xts_unitsize_map); j++)
1531 if (ccp_xts_unitsize_map[j].cxu_size ==
1532 crp->crp_payload_length)
1533 break;
1534 /* Input buffer must be a supported UnitSize */
1535 if (j >= nitems(ccp_xts_unitsize_map)) {
1536 device_printf(dev, "%s: rejected block size: %u\n",
1537 __func__, crp->crp_payload_length);
1538 return (EOPNOTSUPP);
1539 }
1540 /* FALLTHROUGH */
1541 case CRYPTO_AES_CBC:
1542 case CRYPTO_AES_ICM:
1543 keydata = s->blkcipher.enckey;
1544 keydata_len = s->blkcipher.key_len;
1545 break;
1546 }
1547
1548 INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len,
1549 keydata, " ");
1550 if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1551 INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " ");
1552
1553 /* Reverse order of key material for HW */
1554 ccp_byteswap(keydata, keydata_len);
1555
1556 /* Store key material into LSB to avoid page boundaries */
1557 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) {
1558 /*
1559 * XTS mode uses 2 256-bit vectors for the primary key and the
1560 * tweak key. For 128-bit keys, the vectors are zero-padded.
1561 *
1562 * After byteswapping the combined OCF-provided K1:K2 vector
1563 * above, we need to reverse the order again so the hardware
1564 * gets the swapped keys in the order K1':K2'.
1565 */
1566 error = ccp_do_pst_to_lsb(qp,
1567 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata,
1568 keydata_len / 2);
1569 if (error != 0)
1570 return (error);
1571 error = ccp_do_pst_to_lsb(qp,
1572 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1573 keydata + (keydata_len / 2), keydata_len / 2);
1574
1575 /* Zero-pad 128 bit keys */
1576 if (keydata_len == 32) {
1577 if (error != 0)
1578 return (error);
1579 error = ccp_do_pst_to_lsb(qp,
1580 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) +
1581 keydata_len / 2, g_zeroes, keydata_len / 2);
1582 if (error != 0)
1583 return (error);
1584 error = ccp_do_pst_to_lsb(qp,
1585 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) +
1586 keydata_len / 2, g_zeroes, keydata_len / 2);
1587 }
1588 } else
1589 error = ccp_do_pst_to_lsb(qp,
1590 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata,
1591 keydata_len);
1592 if (error != 0)
1593 return (error);
1594
1595 /*
1596 * Point SGLs at the subset of cryptop buffer contents representing the
1597 * data.
1598 */
1599 sglist_reset(qp->cq_sg_ulptx);
1600 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1601 crp->crp_payload_start, crp->crp_payload_length);
1602 if (error != 0)
1603 return (error);
1604
1605 INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__,
1606 (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " ");
1607
1608 DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail);
1609
1610 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1611 return (EAGAIN);
1612
1613 if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1614 return (ccp_do_xts(qp, s, crp, dir, cctx));
1615
1616 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1617 struct sglist_seg *seg;
1618
1619 seg = &qp->cq_sg_ulptx->sg_segs[i];
1620
1621 desc = &qp->desc_ring[qp->cq_tail];
1622 desc->engine = CCP_ENGINE_AES;
1623 desc->som = (i == 0);
1624 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1625 desc->ioc = (desc->eom && cctx != NULL);
1626 DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n",
1627 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1628 (int)desc->ioc, (int)dir);
1629
1630 if (desc->ioc)
1631 memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1632 sizeof(*cctx));
1633
1634 desc->aes.encrypt = dir;
1635 desc->aes.mode = s->blkcipher.cipher_mode;
1636 desc->aes.type = s->blkcipher.cipher_type;
1637 if (csp->csp_cipher_alg == CRYPTO_AES_ICM)
1638 /*
1639 * Size of CTR value in bits, - 1. ICM mode uses all
1640 * 128 bits as counter.
1641 */
1642 desc->aes.size = 127;
1643
1644 DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__,
1645 qp->cq_tail, (unsigned)desc->aes.mode,
1646 (unsigned)desc->aes.type, (unsigned)desc->aes.size);
1647
1648 desc->length = seg->ss_len;
1649 desc->src_lo = (uint32_t)seg->ss_paddr;
1650 desc->src_hi = (seg->ss_paddr >> 32);
1651 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1652
1653 /* Crypt in-place */
1654 desc->dst_lo = desc->src_lo;
1655 desc->dst_hi = desc->src_hi;
1656 desc->dst_mem = desc->src_mem;
1657
1658 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1659 desc->key_hi = 0;
1660 desc->key_mem = CCP_MEMTYPE_SB;
1661
1662 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1663
1664 qp->cq_tail = (qp->cq_tail + 1) %
1665 (1 << qp->cq_softc->ring_size_order);
1666 }
1667 return (0);
1668 }
1669
1670 int __must_check
1671 ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1672 {
1673 struct ccp_completion_ctx ctx;
1674
1675 ctx.callback_fn = ccp_blkcipher_done;
1676 ctx.session = s;
1677 ctx.callback_arg = crp;
1678
1679 return (ccp_do_blkcipher(qp, s, crp, &ctx));
1680 }
1681
1682 static void
1683 ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1684 int error)
1685 {
1686 struct cryptop *crp;
1687
1688 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1689
1690 crp = vcrp;
1691
1692 ccp_do_hmac_done(qp, s, crp, error);
1693 }
1694
1695 int __must_check
1696 ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1697 {
1698 struct ccp_completion_ctx ctx;
1699 int error;
1700
1701 ctx.callback_fn = ccp_authenc_done;
1702 ctx.session = s;
1703 ctx.callback_arg = crp;
1704
1705 /* Perform first operation */
1706 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1707 error = ccp_do_blkcipher(qp, s, crp, NULL);
1708 else
1709 error = ccp_do_hmac(qp, s, crp, NULL);
1710 if (error != 0)
1711 return (error);
1712
1713 /* Perform second operation */
1714 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1715 error = ccp_do_hmac(qp, s, crp, &ctx);
1716 else
1717 error = ccp_do_blkcipher(qp, s, crp, &ctx);
1718 return (error);
1719 }
1720
1721 static int __must_check
1722 ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s)
1723 {
1724 struct ccp_desc *desc;
1725 struct sglist_seg *seg;
1726 unsigned i;
1727
1728 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1729 return (EAGAIN);
1730
1731 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1732 seg = &qp->cq_sg_ulptx->sg_segs[i];
1733
1734 desc = &qp->desc_ring[qp->cq_tail];
1735
1736 desc->engine = CCP_ENGINE_AES;
1737 desc->aes.mode = CCP_AES_MODE_GHASH;
1738 desc->aes.type = s->blkcipher.cipher_type;
1739 desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD;
1740
1741 desc->som = (i == 0);
1742 desc->length = seg->ss_len;
1743
1744 desc->src_lo = (uint32_t)seg->ss_paddr;
1745 desc->src_hi = (seg->ss_paddr >> 32);
1746 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1747
1748 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1749
1750 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1751 desc->key_mem = CCP_MEMTYPE_SB;
1752
1753 qp->cq_tail = (qp->cq_tail + 1) %
1754 (1 << qp->cq_softc->ring_size_order);
1755 }
1756 return (0);
1757 }
1758
1759 static int __must_check
1760 ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s,
1761 enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom)
1762 {
1763 struct ccp_desc *desc;
1764
1765 if (ccp_queue_get_ring_space(qp) == 0)
1766 return (EAGAIN);
1767
1768 desc = &qp->desc_ring[qp->cq_tail];
1769
1770 desc->engine = CCP_ENGINE_AES;
1771 desc->aes.mode = CCP_AES_MODE_GCTR;
1772 desc->aes.type = s->blkcipher.cipher_type;
1773 desc->aes.encrypt = dir;
1774 desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1;
1775
1776 desc->som = som;
1777 desc->eom = eom;
1778
1779 /* Trailing bytes will be masked off by aes.size above. */
1780 desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN);
1781
1782 desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr;
1783 desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32;
1784 desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM;
1785
1786 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1787
1788 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1789 desc->key_mem = CCP_MEMTYPE_SB;
1790
1791 qp->cq_tail = (qp->cq_tail + 1) %
1792 (1 << qp->cq_softc->ring_size_order);
1793 return (0);
1794 }
1795
1796 static int __must_check
1797 ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s)
1798 {
1799 struct ccp_desc *desc;
1800
1801 if (ccp_queue_get_ring_space(qp) == 0)
1802 return (EAGAIN);
1803
1804 desc = &qp->desc_ring[qp->cq_tail];
1805
1806 desc->engine = CCP_ENGINE_AES;
1807 desc->aes.mode = CCP_AES_MODE_GHASH;
1808 desc->aes.type = s->blkcipher.cipher_type;
1809 desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL;
1810
1811 desc->length = GMAC_BLOCK_LEN;
1812
1813 desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN);
1814 desc->src_mem = CCP_MEMTYPE_SB;
1815
1816 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1817
1818 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1819 desc->key_mem = CCP_MEMTYPE_SB;
1820
1821 desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH);
1822 desc->dst_mem = CCP_MEMTYPE_SB;
1823
1824 qp->cq_tail = (qp->cq_tail + 1) %
1825 (1 << qp->cq_softc->ring_size_order);
1826 return (0);
1827 }
1828
1829 static void
1830 ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1831 int error)
1832 {
1833 char tag[GMAC_DIGEST_LEN];
1834 struct cryptop *crp;
1835
1836 crp = vcrp;
1837
1838 s->pending--;
1839
1840 if (error != 0) {
1841 crp->crp_etype = error;
1842 goto out;
1843 }
1844
1845 /* Encrypt is done. Decrypt needs to verify tag. */
1846 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1847 goto out;
1848
1849 /* Copy in message tag. */
1850 crypto_copydata(crp, crp->crp_digest_start, s->gmac.hash_len, tag);
1851
1852 /* Verify tag against computed GMAC */
1853 if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0)
1854 crp->crp_etype = EBADMSG;
1855
1856 out:
1857 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1858 explicit_bzero(&s->gmac.final_block, sizeof(s->gmac.final_block));
1859 crypto_done(crp);
1860 }
1861
1862 int __must_check
1863 ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1864 {
1865 const struct crypto_session_params *csp;
1866 struct ccp_completion_ctx ctx;
1867 enum ccp_cipher_dir dir;
1868 device_t dev;
1869 unsigned i;
1870 int error;
1871
1872 if (s->blkcipher.key_len == 0)
1873 return (EINVAL);
1874
1875 dev = qp->cq_softc->dev;
1876
1877 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1878 dir = CCP_CIPHER_DIR_ENCRYPT;
1879 else
1880 dir = CCP_CIPHER_DIR_DECRYPT;
1881
1882 /* Zero initial GHASH portion of context */
1883 memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv));
1884
1885 /* Gather IV data */
1886 csp = crypto_get_params(crp->crp_session);
1887 ccp_collect_iv(crp, csp, s->blkcipher.iv);
1888
1889 /* Reverse order of key material for HW */
1890 ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len);
1891
1892 /* Prepare input buffer of concatenated lengths for final GHASH */
1893 be64enc(s->gmac.final_block, (uint64_t)crp->crp_aad_length * 8);
1894 be64enc(&s->gmac.final_block[8], (uint64_t)crp->crp_payload_length * 8);
1895
1896 /* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */
1897 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1898 s->blkcipher.iv, 32);
1899 if (error != 0)
1900 return (error);
1901 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1902 s->blkcipher.enckey, s->blkcipher.key_len);
1903 if (error != 0)
1904 return (error);
1905 error = ccp_do_pst_to_lsb(qp,
1906 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block,
1907 GMAC_BLOCK_LEN);
1908 if (error != 0)
1909 return (error);
1910
1911 /* First step - compute GHASH over AAD */
1912 if (crp->crp_aad_length != 0) {
1913 sglist_reset(qp->cq_sg_ulptx);
1914 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1915 crp->crp_aad_start, crp->crp_aad_length);
1916 if (error != 0)
1917 return (error);
1918
1919 /* This engine cannot process non-block multiple AAD data. */
1920 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1921 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len %
1922 GMAC_BLOCK_LEN) != 0) {
1923 DPRINTF(dev, "%s: AD seg modulo: %zu\n",
1924 __func__,
1925 qp->cq_sg_ulptx->sg_segs[i].ss_len);
1926 return (EINVAL);
1927 }
1928
1929 error = ccp_do_ghash_aad(qp, s);
1930 if (error != 0)
1931 return (error);
1932 }
1933
1934 /* Feed data piece by piece into GCTR */
1935 sglist_reset(qp->cq_sg_ulptx);
1936 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1937 crp->crp_payload_start, crp->crp_payload_length);
1938 if (error != 0)
1939 return (error);
1940
1941 /*
1942 * All segments except the last must be even multiples of AES block
1943 * size for the HW to process it. Non-compliant inputs aren't bogus,
1944 * just not doable on this hardware.
1945 *
1946 * XXX: Well, the hardware will produce a valid tag for shorter final
1947 * segment inputs, but it will still write out a block-sized plaintext
1948 * or ciphertext chunk. For a typical CRP this tramples trailing data,
1949 * including the provided message tag. So, reject such inputs for now.
1950 */
1951 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1952 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1953 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1954 qp->cq_sg_ulptx->sg_segs[i].ss_len);
1955 return (EINVAL);
1956 }
1957
1958 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1959 struct sglist_seg *seg;
1960
1961 seg = &qp->cq_sg_ulptx->sg_segs[i];
1962 error = ccp_do_gctr(qp, s, dir, seg,
1963 (i == 0 && crp->crp_aad_length == 0),
1964 i == (qp->cq_sg_ulptx->sg_nseg - 1));
1965 if (error != 0)
1966 return (error);
1967 }
1968
1969 /* Send just initial IV (not GHASH!) to LSB again */
1970 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1971 s->blkcipher.iv, AES_BLOCK_LEN);
1972 if (error != 0)
1973 return (error);
1974
1975 ctx.callback_fn = ccp_gcm_done;
1976 ctx.session = s;
1977 ctx.callback_arg = crp;
1978
1979 /* Compute final hash and copy result back */
1980 error = ccp_do_ghash_final(qp, s);
1981 if (error != 0)
1982 return (error);
1983
1984 /* When encrypting, copy computed tag out to caller buffer. */
1985 sglist_reset(qp->cq_sg_ulptx);
1986 if (dir == CCP_CIPHER_DIR_ENCRYPT)
1987 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1988 crp->crp_digest_start, s->gmac.hash_len);
1989 else
1990 /*
1991 * For decrypting, copy the computed tag out to our session
1992 * buffer to verify in our callback.
1993 */
1994 error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block,
1995 s->gmac.hash_len);
1996 if (error != 0)
1997 return (error);
1998 error = ccp_passthrough_sgl(qp,
1999 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx,
2000 s->gmac.hash_len, true, &ctx);
2001 return (error);
2002 }
2003
2004 #define MAX_TRNG_RETRIES 10
2005 u_int
2006 random_ccp_read(void *v, u_int c)
2007 {
2008 uint32_t *buf;
2009 u_int i, j;
2010
2011 KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c));
2012
2013 buf = v;
2014 for (i = c; i > 0; i -= sizeof(*buf)) {
2015 for (j = 0; j < MAX_TRNG_RETRIES; j++) {
2016 *buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET);
2017 if (*buf != 0)
2018 break;
2019 }
2020 if (j == MAX_TRNG_RETRIES)
2021 return (0);
2022 buf++;
2023 }
2024 return (c);
2025
2026 }
2027
2028 #ifdef DDB
2029 void
2030 db_ccp_show_hw(struct ccp_softc *sc)
2031 {
2032
2033 db_printf(" queue mask: 0x%x\n",
2034 ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET));
2035 db_printf(" queue prio: 0x%x\n",
2036 ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET));
2037 db_printf(" reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET));
2038 db_printf(" trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET));
2039 db_printf(" cmd timeout: 0x%x\n",
2040 ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET));
2041 db_printf(" lsb public mask lo: 0x%x\n",
2042 ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET));
2043 db_printf(" lsb public mask hi: 0x%x\n",
2044 ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET));
2045 db_printf(" lsb private mask lo: 0x%x\n",
2046 ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET));
2047 db_printf(" lsb private mask hi: 0x%x\n",
2048 ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET));
2049 db_printf(" version: 0x%x\n", ccp_read_4(sc, VERSION_REG));
2050 }
2051
2052 void
2053 db_ccp_show_queue_hw(struct ccp_queue *qp)
2054 {
2055 const struct ccp_error_code *ec;
2056 struct ccp_softc *sc;
2057 uint32_t status, error, esource, faultblock, headlo, qcontrol;
2058 unsigned q, i;
2059
2060 sc = qp->cq_softc;
2061 q = qp->cq_qindex;
2062
2063 qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE);
2064 db_printf(" qcontrol: 0x%x%s%s\n", qcontrol,
2065 (qcontrol & CMD_Q_RUN) ? " RUN" : "",
2066 (qcontrol & CMD_Q_HALTED) ? " HALTED" : "");
2067 db_printf(" tail_lo: 0x%x\n",
2068 ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE));
2069 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
2070 db_printf(" head_lo: 0x%x\n", headlo);
2071 db_printf(" int enable: 0x%x\n",
2072 ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE));
2073 db_printf(" interrupt status: 0x%x\n",
2074 ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE));
2075 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
2076 db_printf(" status: 0x%x\n", status);
2077 db_printf(" int stats: 0x%x\n",
2078 ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE));
2079
2080 error = status & STATUS_ERROR_MASK;
2081 if (error == 0)
2082 return;
2083
2084 esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
2085 STATUS_ERRORSOURCE_MASK;
2086 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
2087 STATUS_VLSB_FAULTBLOCK_MASK;
2088
2089 ec = NULL;
2090 for (i = 0; i < nitems(ccp_error_codes); i++)
2091 if (ccp_error_codes[i].ce_code == error)
2092 break;
2093 if (i < nitems(ccp_error_codes))
2094 ec = &ccp_error_codes[i];
2095
2096 db_printf(" Error: %s (%u) Source: %u Faulting LSB block: %u\n",
2097 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
2098 faultblock);
2099 if (ec != NULL)
2100 db_printf(" Error description: %s\n", ec->ce_desc);
2101
2102 i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
2103 db_printf(" Bad descriptor idx: %u contents:\n %32D\n", i,
2104 (void *)&qp->desc_ring[i], " ");
2105 }
2106 #endif
Cache object: ae1ca3b804ca28393ab0816c32f6dec8
|