1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2013 The FreeBSD Foundation
5 *
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/domainset.h>
37 #include <sys/malloc.h>
38 #include <sys/bus.h>
39 #include <sys/conf.h>
40 #include <sys/interrupt.h>
41 #include <sys/kernel.h>
42 #include <sys/ktr.h>
43 #include <sys/lock.h>
44 #include <sys/proc.h>
45 #include <sys/memdesc.h>
46 #include <sys/msan.h>
47 #include <sys/mutex.h>
48 #include <sys/sysctl.h>
49 #include <sys/rman.h>
50 #include <sys/taskqueue.h>
51 #include <sys/tree.h>
52 #include <sys/uio.h>
53 #include <sys/vmem.h>
54 #include <dev/pci/pcireg.h>
55 #include <dev/pci/pcivar.h>
56 #include <vm/vm.h>
57 #include <vm/vm_extern.h>
58 #include <vm/vm_kern.h>
59 #include <vm/vm_object.h>
60 #include <vm/vm_page.h>
61 #include <vm/vm_map.h>
62 #include <dev/iommu/iommu.h>
63 #include <machine/atomic.h>
64 #include <machine/bus.h>
65 #include <machine/md_var.h>
66 #include <machine/iommu.h>
67 #include <dev/iommu/busdma_iommu.h>
68
69 /*
70 * busdma_iommu.c, the implementation of the busdma(9) interface using
71 * IOMMU units from Intel VT-d.
72 */
73
74 static bool
75 iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
76 {
77 char str[128], *env;
78 int default_bounce;
79 bool ret;
80 static const char bounce_str[] = "bounce";
81 static const char iommu_str[] = "iommu";
82 static const char dmar_str[] = "dmar"; /* compatibility */
83
84 default_bounce = 0;
85 env = kern_getenv("hw.busdma.default");
86 if (env != NULL) {
87 if (strcmp(env, bounce_str) == 0)
88 default_bounce = 1;
89 else if (strcmp(env, iommu_str) == 0 ||
90 strcmp(env, dmar_str) == 0)
91 default_bounce = 0;
92 freeenv(env);
93 }
94
95 snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d",
96 domain, bus, slot, func);
97 env = kern_getenv(str);
98 if (env == NULL)
99 return (default_bounce != 0);
100 if (strcmp(env, bounce_str) == 0)
101 ret = true;
102 else if (strcmp(env, iommu_str) == 0 ||
103 strcmp(env, dmar_str) == 0)
104 ret = false;
105 else
106 ret = default_bounce != 0;
107 freeenv(env);
108 return (ret);
109 }
110
111 /*
112 * Given original device, find the requester ID that will be seen by
113 * the IOMMU unit and used for page table lookup. PCI bridges may take
114 * ownership of transactions from downstream devices, so it may not be
115 * the same as the BSF of the target device. In those cases, all
116 * devices downstream of the bridge must share a single mapping
117 * domain, and must collectively be assigned to use either IOMMU or
118 * bounce mapping.
119 */
120 device_t
121 iommu_get_requester(device_t dev, uint16_t *rid)
122 {
123 devclass_t pci_class;
124 device_t l, pci, pcib, pcip, pcibp, requester;
125 int cap_offset;
126 uint16_t pcie_flags;
127 bool bridge_is_pcie;
128
129 pci_class = devclass_find("pci");
130 l = requester = dev;
131
132 *rid = pci_get_rid(dev);
133
134 /*
135 * Walk the bridge hierarchy from the target device to the
136 * host port to find the translating bridge nearest the IOMMU
137 * unit.
138 */
139 for (;;) {
140 pci = device_get_parent(l);
141 KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent "
142 "for %s", device_get_name(dev), device_get_name(l)));
143 KASSERT(device_get_devclass(pci) == pci_class,
144 ("iommu_get_requester(%s): non-pci parent %s for %s",
145 device_get_name(dev), device_get_name(pci),
146 device_get_name(l)));
147
148 pcib = device_get_parent(pci);
149 KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge "
150 "for %s", device_get_name(dev), device_get_name(pci)));
151
152 /*
153 * The parent of our "bridge" isn't another PCI bus,
154 * so pcib isn't a PCI->PCI bridge but rather a host
155 * port, and the requester ID won't be translated
156 * further.
157 */
158 pcip = device_get_parent(pcib);
159 if (device_get_devclass(pcip) != pci_class)
160 break;
161 pcibp = device_get_parent(pcip);
162
163 if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
164 /*
165 * Do not stop the loop even if the target
166 * device is PCIe, because it is possible (but
167 * unlikely) to have a PCI->PCIe bridge
168 * somewhere in the hierarchy.
169 */
170 l = pcib;
171 } else {
172 /*
173 * Device is not PCIe, it cannot be seen as a
174 * requester by IOMMU unit. Check whether the
175 * bridge is PCIe.
176 */
177 bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
178 &cap_offset) == 0;
179 requester = pcib;
180
181 /*
182 * Check for a buggy PCIe/PCI bridge that
183 * doesn't report the express capability. If
184 * the bridge above it is express but isn't a
185 * PCI bridge, then we know pcib is actually a
186 * PCIe/PCI bridge.
187 */
188 if (!bridge_is_pcie && pci_find_cap(pcibp,
189 PCIY_EXPRESS, &cap_offset) == 0) {
190 pcie_flags = pci_read_config(pcibp,
191 cap_offset + PCIER_FLAGS, 2);
192 if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
193 PCIEM_TYPE_PCI_BRIDGE)
194 bridge_is_pcie = true;
195 }
196
197 if (bridge_is_pcie) {
198 /*
199 * The current device is not PCIe, but
200 * the bridge above it is. This is a
201 * PCIe->PCI bridge. Assume that the
202 * requester ID will be the secondary
203 * bus number with slot and function
204 * set to zero.
205 *
206 * XXX: Doesn't handle the case where
207 * the bridge is PCIe->PCI-X, and the
208 * bridge will only take ownership of
209 * requests in some cases. We should
210 * provide context entries with the
211 * same page tables for taken and
212 * non-taken transactions.
213 */
214 *rid = PCI_RID(pci_get_bus(l), 0, 0);
215 l = pcibp;
216 } else {
217 /*
218 * Neither the device nor the bridge
219 * above it are PCIe. This is a
220 * conventional PCI->PCI bridge, which
221 * will use the bridge's BSF as the
222 * requester ID.
223 */
224 *rid = pci_get_rid(pcib);
225 l = pcib;
226 }
227 }
228 }
229 return (requester);
230 }
231
232 struct iommu_ctx *
233 iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr)
234 {
235 device_t requester;
236 struct iommu_ctx *ctx;
237 bool disabled;
238 uint16_t rid;
239
240 requester = iommu_get_requester(dev, &rid);
241
242 /*
243 * If the user requested the IOMMU disabled for the device, we
244 * cannot disable the IOMMU unit, due to possibility of other
245 * devices on the same IOMMU unit still requiring translation.
246 * Instead provide the identity mapping for the device
247 * context.
248 */
249 disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester),
250 pci_get_bus(requester), pci_get_slot(requester),
251 pci_get_function(requester));
252 ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr);
253 if (ctx == NULL)
254 return (NULL);
255 if (disabled) {
256 /*
257 * Keep the first reference on context, release the
258 * later refs.
259 */
260 IOMMU_LOCK(unit);
261 if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) {
262 ctx->flags |= IOMMU_CTX_DISABLED;
263 IOMMU_UNLOCK(unit);
264 } else {
265 iommu_free_ctx_locked(unit, ctx);
266 }
267 ctx = NULL;
268 }
269 return (ctx);
270 }
271
272 struct iommu_ctx *
273 iommu_get_dev_ctx(device_t dev)
274 {
275 struct iommu_unit *unit;
276
277 unit = iommu_find(dev, bootverbose);
278 /* Not in scope of any IOMMU ? */
279 if (unit == NULL)
280 return (NULL);
281 if (!unit->dma_enabled)
282 return (NULL);
283
284 #if defined(__amd64__) || defined(__i386__)
285 dmar_quirks_pre_use(unit);
286 dmar_instantiate_rmrr_ctxs(unit);
287 #endif
288
289 return (iommu_instantiate_ctx(unit, dev, false));
290 }
291
292 bus_dma_tag_t
293 iommu_get_dma_tag(device_t dev, device_t child)
294 {
295 struct iommu_ctx *ctx;
296 bus_dma_tag_t res;
297
298 ctx = iommu_get_dev_ctx(child);
299 if (ctx == NULL)
300 return (NULL);
301
302 res = (bus_dma_tag_t)ctx->tag;
303 return (res);
304 }
305
306 bool
307 bus_dma_iommu_set_buswide(device_t dev)
308 {
309 struct iommu_unit *unit;
310 device_t parent;
311 u_int busno, slot, func;
312
313 parent = device_get_parent(dev);
314 if (device_get_devclass(parent) != devclass_find("pci"))
315 return (false);
316 unit = iommu_find(dev, bootverbose);
317 if (unit == NULL)
318 return (false);
319 busno = pci_get_bus(dev);
320 slot = pci_get_slot(dev);
321 func = pci_get_function(dev);
322 if (slot != 0 || func != 0) {
323 if (bootverbose) {
324 device_printf(dev,
325 "iommu%d pci%d:%d:%d requested buswide busdma\n",
326 unit->unit, busno, slot, func);
327 }
328 return (false);
329 }
330 iommu_set_buswide_ctx(unit, busno);
331 return (true);
332 }
333
334 void
335 iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno)
336 {
337
338 MPASS(busno <= PCI_BUSMAX);
339 IOMMU_LOCK(unit);
340 unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |=
341 1 << (busno % (NBBY * sizeof(uint32_t)));
342 IOMMU_UNLOCK(unit);
343 }
344
345 bool
346 iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno)
347 {
348
349 MPASS(busno <= PCI_BUSMAX);
350 return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] &
351 (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0);
352 }
353
354 static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map");
355
356 static void iommu_bus_schedule_dmamap(struct iommu_unit *unit,
357 struct bus_dmamap_iommu *map);
358
359 static int
360 iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
361 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
362 bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
363 int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
364 void *lockfuncarg, bus_dma_tag_t *dmat)
365 {
366 struct bus_dma_tag_iommu *newtag, *oldtag;
367 int error;
368
369 *dmat = NULL;
370 error = common_bus_dma_tag_create(parent != NULL ?
371 &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment,
372 boundary, lowaddr, highaddr, filter, filterarg, maxsize,
373 nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
374 sizeof(struct bus_dma_tag_iommu), (void **)&newtag);
375 if (error != 0)
376 goto out;
377
378 oldtag = (struct bus_dma_tag_iommu *)parent;
379 newtag->common.impl = &bus_dma_iommu_impl;
380 newtag->ctx = oldtag->ctx;
381 newtag->owner = oldtag->owner;
382
383 *dmat = (bus_dma_tag_t)newtag;
384 out:
385 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
386 __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
387 error);
388 return (error);
389 }
390
391 static int
392 iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
393 {
394
395 return (0);
396 }
397
398 static int
399 iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
400 {
401 struct bus_dma_tag_iommu *dmat, *parent;
402 struct bus_dma_tag_iommu *dmat_copy __unused;
403 int error;
404
405 error = 0;
406 dmat_copy = dmat = (struct bus_dma_tag_iommu *)dmat1;
407
408 if (dmat != NULL) {
409 if (dmat->map_count != 0) {
410 error = EBUSY;
411 goto out;
412 }
413 while (dmat != NULL) {
414 parent = (struct bus_dma_tag_iommu *)dmat->common.parent;
415 if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
416 1) {
417 if (dmat == dmat->ctx->tag)
418 iommu_free_ctx(dmat->ctx);
419 free(dmat->segments, M_IOMMU_DMAMAP);
420 free(dmat, M_DEVBUF);
421 dmat = parent;
422 } else
423 dmat = NULL;
424 }
425 }
426 out:
427 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
428 return (error);
429 }
430
431 static bool
432 iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
433 {
434
435 return (false);
436 }
437
438 static int
439 iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
440 {
441 struct bus_dma_tag_iommu *tag;
442 struct bus_dmamap_iommu *map;
443
444 tag = (struct bus_dma_tag_iommu *)dmat;
445 map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP,
446 DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO);
447 if (map == NULL) {
448 *mapp = NULL;
449 return (ENOMEM);
450 }
451 if (tag->segments == NULL) {
452 tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) *
453 tag->common.nsegments, M_IOMMU_DMAMAP,
454 DOMAINSET_PREF(tag->common.domain), M_NOWAIT);
455 if (tag->segments == NULL) {
456 free(map, M_IOMMU_DMAMAP);
457 *mapp = NULL;
458 return (ENOMEM);
459 }
460 }
461 IOMMU_DMAMAP_INIT(map);
462 TAILQ_INIT(&map->map_entries);
463 map->tag = tag;
464 map->locked = true;
465 map->cansleep = false;
466 tag->map_count++;
467 *mapp = (bus_dmamap_t)map;
468
469 return (0);
470 }
471
472 static int
473 iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
474 {
475 struct bus_dma_tag_iommu *tag;
476 struct bus_dmamap_iommu *map;
477
478 tag = (struct bus_dma_tag_iommu *)dmat;
479 map = (struct bus_dmamap_iommu *)map1;
480 if (map != NULL) {
481 IOMMU_DMAMAP_LOCK(map);
482 if (!TAILQ_EMPTY(&map->map_entries)) {
483 IOMMU_DMAMAP_UNLOCK(map);
484 return (EBUSY);
485 }
486 IOMMU_DMAMAP_DESTROY(map);
487 free(map, M_IOMMU_DMAMAP);
488 }
489 tag->map_count--;
490 return (0);
491 }
492
493
494 static int
495 iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
496 bus_dmamap_t *mapp)
497 {
498 struct bus_dma_tag_iommu *tag;
499 struct bus_dmamap_iommu *map;
500 int error, mflags;
501 vm_memattr_t attr;
502
503 error = iommu_bus_dmamap_create(dmat, flags, mapp);
504 if (error != 0)
505 return (error);
506
507 mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
508 mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
509 attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
510 VM_MEMATTR_DEFAULT;
511
512 tag = (struct bus_dma_tag_iommu *)dmat;
513 map = (struct bus_dmamap_iommu *)*mapp;
514
515 if (tag->common.maxsize < PAGE_SIZE &&
516 tag->common.alignment <= tag->common.maxsize &&
517 attr == VM_MEMATTR_DEFAULT) {
518 *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF,
519 DOMAINSET_PREF(tag->common.domain), mflags);
520 map->flags |= BUS_DMAMAP_IOMMU_MALLOC;
521 } else {
522 *vaddr = kmem_alloc_attr_domainset(
523 DOMAINSET_PREF(tag->common.domain), tag->common.maxsize,
524 mflags, 0ul, BUS_SPACE_MAXADDR, attr);
525 map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC;
526 }
527 if (*vaddr == NULL) {
528 iommu_bus_dmamap_destroy(dmat, *mapp);
529 *mapp = NULL;
530 return (ENOMEM);
531 }
532 return (0);
533 }
534
535 static void
536 iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
537 {
538 struct bus_dma_tag_iommu *tag;
539 struct bus_dmamap_iommu *map;
540
541 tag = (struct bus_dma_tag_iommu *)dmat;
542 map = (struct bus_dmamap_iommu *)map1;
543
544 if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) {
545 free(vaddr, M_DEVBUF);
546 map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC;
547 } else {
548 KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0,
549 ("iommu_bus_dmamem_free for non alloced map %p", map));
550 kmem_free(vaddr, tag->common.maxsize);
551 map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC;
552 }
553
554 iommu_bus_dmamap_destroy(dmat, map1);
555 }
556
557 static int
558 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag,
559 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
560 int flags, bus_dma_segment_t *segs, int *segp,
561 struct iommu_map_entries_tailq *entries)
562 {
563 struct iommu_ctx *ctx;
564 struct iommu_domain *domain;
565 struct iommu_map_entry *entry;
566 bus_size_t buflen1;
567 int error, e_flags, idx, gas_flags, seg;
568
569 KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset));
570 if (segs == NULL)
571 segs = tag->segments;
572 ctx = tag->ctx;
573 domain = ctx->domain;
574 e_flags = IOMMU_MAP_ENTRY_READ |
575 ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0);
576 seg = *segp;
577 error = 0;
578 idx = 0;
579 while (buflen > 0) {
580 seg++;
581 if (seg >= tag->common.nsegments) {
582 error = EFBIG;
583 break;
584 }
585 buflen1 = buflen > tag->common.maxsegsz ?
586 tag->common.maxsegsz : buflen;
587
588 /*
589 * (Too) optimistically allow split if there are more
590 * then one segments left.
591 */
592 gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0;
593 if (seg + 1 < tag->common.nsegments)
594 gas_flags |= IOMMU_MF_CANSPLIT;
595
596 error = iommu_gas_map(domain, &tag->common, buflen1,
597 offset, e_flags, gas_flags, ma + idx, &entry);
598 if (error != 0)
599 break;
600 /* Update buflen1 in case buffer split. */
601 if (buflen1 > entry->end - entry->start - offset)
602 buflen1 = entry->end - entry->start - offset;
603
604 KASSERT(vm_addr_align_ok(entry->start + offset,
605 tag->common.alignment),
606 ("alignment failed: ctx %p start 0x%jx offset %x "
607 "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
608 (uintmax_t)tag->common.alignment));
609 KASSERT(entry->end <= tag->common.lowaddr ||
610 entry->start >= tag->common.highaddr,
611 ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
612 "lowaddr 0x%jx highaddr 0x%jx", ctx,
613 (uintmax_t)entry->start, (uintmax_t)entry->end,
614 (uintmax_t)tag->common.lowaddr,
615 (uintmax_t)tag->common.highaddr));
616 KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1,
617 tag->common.boundary),
618 ("boundary failed: ctx %p start 0x%jx end 0x%jx "
619 "boundary 0x%jx", ctx, (uintmax_t)entry->start,
620 (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
621 KASSERT(buflen1 <= tag->common.maxsegsz,
622 ("segment too large: ctx %p start 0x%jx end 0x%jx "
623 "buflen1 0x%jx maxsegsz 0x%jx", ctx,
624 (uintmax_t)entry->start, (uintmax_t)entry->end,
625 (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
626
627 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
628 ("entry %p missing IOMMU_MAP_ENTRY_MAP", entry));
629 TAILQ_INSERT_TAIL(entries, entry, dmamap_link);
630
631 segs[seg].ds_addr = entry->start + offset;
632 segs[seg].ds_len = buflen1;
633
634 idx += OFF_TO_IDX(offset + buflen1);
635 offset += buflen1;
636 offset &= IOMMU_PAGE_MASK;
637 buflen -= buflen1;
638 }
639 if (error == 0)
640 *segp = seg;
641 return (error);
642 }
643
644 static int
645 iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag,
646 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
647 int flags, bus_dma_segment_t *segs, int *segp)
648 {
649 struct iommu_ctx *ctx;
650 struct iommu_domain *domain;
651 struct iommu_map_entries_tailq entries;
652 int error;
653
654 ctx = tag->ctx;
655 domain = ctx->domain;
656 atomic_add_long(&ctx->loads, 1);
657
658 TAILQ_INIT(&entries);
659 error = iommu_bus_dmamap_load_something1(tag, map, ma, offset,
660 buflen, flags, segs, segp, &entries);
661 if (error == 0) {
662 IOMMU_DMAMAP_LOCK(map);
663 TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link);
664 IOMMU_DMAMAP_UNLOCK(map);
665 } else if (!TAILQ_EMPTY(&entries)) {
666 /*
667 * The busdma interface does not allow us to report
668 * partial buffer load, so unfortunately we have to
669 * revert all work done.
670 */
671 IOMMU_DOMAIN_LOCK(domain);
672 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
673 IOMMU_DOMAIN_UNLOCK(domain);
674 taskqueue_enqueue(domain->iommu->delayed_taskqueue,
675 &domain->unload_task);
676 }
677
678 if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
679 !map->cansleep)
680 error = EINPROGRESS;
681 if (error == EINPROGRESS)
682 iommu_bus_schedule_dmamap(domain->iommu, map);
683 return (error);
684 }
685
686 static int
687 iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
688 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
689 bus_dma_segment_t *segs, int *segp)
690 {
691 struct bus_dma_tag_iommu *tag;
692 struct bus_dmamap_iommu *map;
693
694 tag = (struct bus_dma_tag_iommu *)dmat;
695 map = (struct bus_dmamap_iommu *)map1;
696 return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
697 flags, segs, segp));
698 }
699
700 static int
701 iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
702 vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
703 int *segp)
704 {
705 struct bus_dma_tag_iommu *tag;
706 struct bus_dmamap_iommu *map;
707 vm_page_t *ma, fma;
708 vm_paddr_t pstart, pend, paddr;
709 int error, i, ma_cnt, mflags, offset;
710
711 tag = (struct bus_dma_tag_iommu *)dmat;
712 map = (struct bus_dmamap_iommu *)map1;
713 pstart = trunc_page(buf);
714 pend = round_page(buf + buflen);
715 offset = buf & PAGE_MASK;
716 ma_cnt = OFF_TO_IDX(pend - pstart);
717 mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
718 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
719 if (ma == NULL)
720 return (ENOMEM);
721 fma = NULL;
722 for (i = 0; i < ma_cnt; i++) {
723 paddr = pstart + ptoa(i);
724 ma[i] = PHYS_TO_VM_PAGE(paddr);
725 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
726 /*
727 * If PHYS_TO_VM_PAGE() returned NULL or the
728 * vm_page was not initialized we'll use a
729 * fake page.
730 */
731 if (fma == NULL) {
732 fma = malloc(sizeof(struct vm_page) * ma_cnt,
733 M_DEVBUF, M_ZERO | mflags);
734 if (fma == NULL) {
735 free(ma, M_DEVBUF);
736 return (ENOMEM);
737 }
738 }
739 vm_page_initfake(&fma[i], pstart + ptoa(i),
740 VM_MEMATTR_DEFAULT);
741 ma[i] = &fma[i];
742 }
743 }
744 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
745 flags, segs, segp);
746 free(fma, M_DEVBUF);
747 free(ma, M_DEVBUF);
748 return (error);
749 }
750
751 static int
752 iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
753 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
754 int *segp)
755 {
756 struct bus_dma_tag_iommu *tag;
757 struct bus_dmamap_iommu *map;
758 vm_page_t *ma, fma;
759 vm_paddr_t pstart, pend, paddr;
760 int error, i, ma_cnt, mflags, offset;
761
762 tag = (struct bus_dma_tag_iommu *)dmat;
763 map = (struct bus_dmamap_iommu *)map1;
764 pstart = trunc_page((vm_offset_t)buf);
765 pend = round_page((vm_offset_t)buf + buflen);
766 offset = (vm_offset_t)buf & PAGE_MASK;
767 ma_cnt = OFF_TO_IDX(pend - pstart);
768 mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
769 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
770 if (ma == NULL)
771 return (ENOMEM);
772 fma = NULL;
773 for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
774 if (pmap == kernel_pmap)
775 paddr = pmap_kextract(pstart);
776 else
777 paddr = pmap_extract(pmap, pstart);
778 ma[i] = PHYS_TO_VM_PAGE(paddr);
779 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
780 /*
781 * If PHYS_TO_VM_PAGE() returned NULL or the
782 * vm_page was not initialized we'll use a
783 * fake page.
784 */
785 if (fma == NULL) {
786 fma = malloc(sizeof(struct vm_page) * ma_cnt,
787 M_DEVBUF, M_ZERO | mflags);
788 if (fma == NULL) {
789 free(ma, M_DEVBUF);
790 return (ENOMEM);
791 }
792 }
793 vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
794 ma[i] = &fma[i];
795 }
796 }
797 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
798 flags, segs, segp);
799 free(ma, M_DEVBUF);
800 free(fma, M_DEVBUF);
801 return (error);
802 }
803
804 static void
805 iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
806 struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
807 {
808 struct bus_dmamap_iommu *map;
809
810 if (map1 == NULL)
811 return;
812 map = (struct bus_dmamap_iommu *)map1;
813 map->mem = *mem;
814 map->tag = (struct bus_dma_tag_iommu *)dmat;
815 map->callback = callback;
816 map->callback_arg = callback_arg;
817 }
818
819 static bus_dma_segment_t *
820 iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
821 bus_dma_segment_t *segs, int nsegs, int error)
822 {
823 struct bus_dma_tag_iommu *tag;
824 struct bus_dmamap_iommu *map;
825
826 tag = (struct bus_dma_tag_iommu *)dmat;
827 map = (struct bus_dmamap_iommu *)map1;
828
829 if (!map->locked) {
830 KASSERT(map->cansleep,
831 ("map not locked and not sleepable context %p", map));
832
833 /*
834 * We are called from the delayed context. Relock the
835 * driver.
836 */
837 (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
838 map->locked = true;
839 }
840
841 if (segs == NULL)
842 segs = tag->segments;
843 return (segs);
844 }
845
846 /*
847 * The limitations of busdma KPI forces the iommu to perform the actual
848 * unload, consisting of the unmapping of the map entries page tables,
849 * from the delayed context on i386, since page table page mapping
850 * might require a sleep to be successfull. The unfortunate
851 * consequence is that the DMA requests can be served some time after
852 * the bus_dmamap_unload() call returned.
853 *
854 * On amd64, we assume that sf allocation cannot fail.
855 */
856 static void
857 iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
858 {
859 struct bus_dma_tag_iommu *tag;
860 struct bus_dmamap_iommu *map;
861 struct iommu_ctx *ctx;
862 struct iommu_domain *domain;
863 struct iommu_map_entries_tailq entries;
864
865 tag = (struct bus_dma_tag_iommu *)dmat;
866 map = (struct bus_dmamap_iommu *)map1;
867 ctx = tag->ctx;
868 domain = ctx->domain;
869 atomic_add_long(&ctx->unloads, 1);
870
871 TAILQ_INIT(&entries);
872 IOMMU_DMAMAP_LOCK(map);
873 TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
874 IOMMU_DMAMAP_UNLOCK(map);
875 #if defined(IOMMU_DOMAIN_UNLOAD_SLEEP)
876 IOMMU_DOMAIN_LOCK(domain);
877 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
878 IOMMU_DOMAIN_UNLOCK(domain);
879 taskqueue_enqueue(domain->iommu->delayed_taskqueue,
880 &domain->unload_task);
881 #else
882 THREAD_NO_SLEEPING();
883 iommu_domain_unload(domain, &entries, false);
884 THREAD_SLEEPING_OK();
885 KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx));
886 #endif
887 }
888
889 static void
890 iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map1,
891 bus_dmasync_op_t op)
892 {
893 struct bus_dmamap_iommu *map __unused;
894
895 map = (struct bus_dmamap_iommu *)map1;
896 kmsan_bus_dmamap_sync(&map->kmsan_mem, op);
897 }
898
899 #ifdef KMSAN
900 static void
901 iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1, struct memdesc *mem)
902 {
903 struct bus_dmamap_iommu *map;
904
905 map = (struct bus_dmamap_iommu *)map1;
906 if (map == NULL)
907 return;
908 memcpy(&map->kmsan_mem, mem, sizeof(struct memdesc));
909 }
910 #endif
911
912 struct bus_dma_impl bus_dma_iommu_impl = {
913 .tag_create = iommu_bus_dma_tag_create,
914 .tag_destroy = iommu_bus_dma_tag_destroy,
915 .tag_set_domain = iommu_bus_dma_tag_set_domain,
916 .id_mapped = iommu_bus_dma_id_mapped,
917 .map_create = iommu_bus_dmamap_create,
918 .map_destroy = iommu_bus_dmamap_destroy,
919 .mem_alloc = iommu_bus_dmamem_alloc,
920 .mem_free = iommu_bus_dmamem_free,
921 .load_phys = iommu_bus_dmamap_load_phys,
922 .load_buffer = iommu_bus_dmamap_load_buffer,
923 .load_ma = iommu_bus_dmamap_load_ma,
924 .map_waitok = iommu_bus_dmamap_waitok,
925 .map_complete = iommu_bus_dmamap_complete,
926 .map_unload = iommu_bus_dmamap_unload,
927 .map_sync = iommu_bus_dmamap_sync,
928 #ifdef KMSAN
929 .load_kmsan = iommu_bus_dmamap_load_kmsan,
930 #endif
931 };
932
933 static void
934 iommu_bus_task_dmamap(void *arg, int pending)
935 {
936 struct bus_dma_tag_iommu *tag;
937 struct bus_dmamap_iommu *map;
938 struct iommu_unit *unit;
939
940 unit = arg;
941 IOMMU_LOCK(unit);
942 while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
943 TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
944 IOMMU_UNLOCK(unit);
945 tag = map->tag;
946 map->cansleep = true;
947 map->locked = false;
948 bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
949 &map->mem, map->callback, map->callback_arg,
950 BUS_DMA_WAITOK);
951 map->cansleep = false;
952 if (map->locked) {
953 (tag->common.lockfunc)(tag->common.lockfuncarg,
954 BUS_DMA_UNLOCK);
955 } else
956 map->locked = true;
957 map->cansleep = false;
958 IOMMU_LOCK(unit);
959 }
960 IOMMU_UNLOCK(unit);
961 }
962
963 static void
964 iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map)
965 {
966
967 map->locked = false;
968 IOMMU_LOCK(unit);
969 TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
970 IOMMU_UNLOCK(unit);
971 taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
972 }
973
974 int
975 iommu_init_busdma(struct iommu_unit *unit)
976 {
977 int error;
978
979 unit->dma_enabled = 1;
980 error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled);
981 if (error == 0) /* compatibility */
982 TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled);
983 TAILQ_INIT(&unit->delayed_maps);
984 TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit);
985 unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK,
986 taskqueue_thread_enqueue, &unit->delayed_taskqueue);
987 taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
988 "iommu%d busdma taskq", unit->unit);
989 return (0);
990 }
991
992 void
993 iommu_fini_busdma(struct iommu_unit *unit)
994 {
995
996 if (unit->delayed_taskqueue == NULL)
997 return;
998
999 taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
1000 taskqueue_free(unit->delayed_taskqueue);
1001 unit->delayed_taskqueue = NULL;
1002 }
1003
1004 int
1005 bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1,
1006 vm_paddr_t start, vm_size_t length, int flags)
1007 {
1008 struct bus_dma_tag_common *tc;
1009 struct bus_dma_tag_iommu *tag;
1010 struct bus_dmamap_iommu *map;
1011 struct iommu_ctx *ctx;
1012 struct iommu_domain *domain;
1013 struct iommu_map_entry *entry;
1014 vm_page_t *ma;
1015 vm_size_t i;
1016 int error;
1017 bool waitok;
1018
1019 MPASS((start & PAGE_MASK) == 0);
1020 MPASS((length & PAGE_MASK) == 0);
1021 MPASS(length > 0);
1022 MPASS(start + length >= start);
1023 MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0);
1024
1025 tc = (struct bus_dma_tag_common *)dmat;
1026 if (tc->impl != &bus_dma_iommu_impl)
1027 return (0);
1028
1029 tag = (struct bus_dma_tag_iommu *)dmat;
1030 ctx = tag->ctx;
1031 domain = ctx->domain;
1032 map = (struct bus_dmamap_iommu *)map1;
1033 waitok = (flags & BUS_DMA_NOWAIT) != 0;
1034
1035 entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK);
1036 if (entry == NULL)
1037 return (ENOMEM);
1038 entry->start = start;
1039 entry->end = start + length;
1040 ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ?
1041 M_WAITOK : M_NOWAIT);
1042 if (ma == NULL) {
1043 iommu_gas_free_entry(entry);
1044 return (ENOMEM);
1045 }
1046 for (i = 0; i < atop(length); i++) {
1047 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
1048 VM_MEMATTR_DEFAULT);
1049 }
1050 error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ |
1051 ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE) |
1052 IOMMU_MAP_ENTRY_MAP, waitok ? IOMMU_MF_CANWAIT : 0, ma);
1053 if (error == 0) {
1054 IOMMU_DMAMAP_LOCK(map);
1055 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
1056 IOMMU_DMAMAP_UNLOCK(map);
1057 } else {
1058 iommu_gas_free_entry(entry);
1059 }
1060 for (i = 0; i < atop(length); i++)
1061 vm_page_putfake(ma[i]);
1062 free(ma, M_TEMP);
1063 return (error);
1064 }
1065
1066 static void
1067 iommu_domain_unload_task(void *arg, int pending)
1068 {
1069 struct iommu_domain *domain;
1070 struct iommu_map_entries_tailq entries;
1071
1072 domain = arg;
1073 TAILQ_INIT(&entries);
1074
1075 for (;;) {
1076 IOMMU_DOMAIN_LOCK(domain);
1077 TAILQ_SWAP(&domain->unload_entries, &entries,
1078 iommu_map_entry, dmamap_link);
1079 IOMMU_DOMAIN_UNLOCK(domain);
1080 if (TAILQ_EMPTY(&entries))
1081 break;
1082 iommu_domain_unload(domain, &entries, true);
1083 }
1084 }
1085
1086 void
1087 iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain,
1088 const struct iommu_domain_map_ops *ops)
1089 {
1090
1091 domain->ops = ops;
1092 domain->iommu = unit;
1093
1094 TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain);
1095 RB_INIT(&domain->rb_root);
1096 TAILQ_INIT(&domain->unload_entries);
1097 mtx_init(&domain->lock, "iodom", NULL, MTX_DEF);
1098 }
1099
1100 void
1101 iommu_domain_fini(struct iommu_domain *domain)
1102 {
1103
1104 mtx_destroy(&domain->lock);
1105 }
Cache object: 06a763ecedb6173b56c998817ebb69d9
|