1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2013 The FreeBSD Foundation
5 *
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #define RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry)
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/malloc.h>
39 #include <sys/bus.h>
40 #include <sys/interrupt.h>
41 #include <sys/kernel.h>
42 #include <sys/ktr.h>
43 #include <sys/lock.h>
44 #include <sys/proc.h>
45 #include <sys/rwlock.h>
46 #include <sys/memdesc.h>
47 #include <sys/mutex.h>
48 #include <sys/sysctl.h>
49 #include <sys/rman.h>
50 #include <sys/taskqueue.h>
51 #include <sys/tree.h>
52 #include <sys/uio.h>
53 #include <sys/vmem.h>
54 #include <vm/vm.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_kern.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_map.h>
60 #include <vm/uma.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/iommu/iommu.h>
64 #include <dev/iommu/iommu_gas.h>
65 #include <dev/iommu/iommu_msi.h>
66 #include <machine/atomic.h>
67 #include <machine/bus.h>
68 #include <machine/md_var.h>
69 #include <machine/iommu.h>
70 #include <dev/iommu/busdma_iommu.h>
71
72 /*
73 * Guest Address Space management.
74 */
75
76 static uma_zone_t iommu_map_entry_zone;
77
78 #ifdef INVARIANTS
79 static int iommu_check_free;
80 #endif
81
82 static void
83 intel_gas_init(void)
84 {
85
86 iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY",
87 sizeof(struct iommu_map_entry), NULL, NULL,
88 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP);
89 }
90 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL);
91
92 struct iommu_map_entry *
93 iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags)
94 {
95 struct iommu_map_entry *res;
96
97 KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0,
98 ("unsupported flags %x", flags));
99
100 res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) !=
101 0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
102 if (res != NULL && domain != NULL) {
103 res->domain = domain;
104 atomic_add_int(&domain->entries_cnt, 1);
105 }
106 return (res);
107 }
108
109 void
110 iommu_gas_free_entry(struct iommu_map_entry *entry)
111 {
112 struct iommu_domain *domain;
113
114 domain = entry->domain;
115 if (domain != NULL)
116 atomic_subtract_int(&domain->entries_cnt, 1);
117 uma_zfree(iommu_map_entry_zone, entry);
118 }
119
120 static int
121 iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b)
122 {
123
124 /* Last entry have zero size, so <= */
125 KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)",
126 a, (uintmax_t)a->start, (uintmax_t)a->end));
127 KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)",
128 b, (uintmax_t)b->start, (uintmax_t)b->end));
129 KASSERT(a->end <= b->start || b->end <= a->start ||
130 a->end == a->start || b->end == b->start,
131 ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)",
132 a, (uintmax_t)a->start, (uintmax_t)a->end,
133 b, (uintmax_t)b->start, (uintmax_t)b->end));
134
135 if (a->end < b->end)
136 return (-1);
137 else if (b->end < a->end)
138 return (1);
139 return (0);
140 }
141
142 /*
143 * Update augmentation data based on data from children.
144 * Return true if and only if the update changes the augmentation data.
145 */
146 static bool
147 iommu_gas_augment_entry(struct iommu_map_entry *entry)
148 {
149 struct iommu_map_entry *child;
150 iommu_gaddr_t bound, delta, free_down;
151
152 free_down = 0;
153 bound = entry->start;
154 if ((child = RB_LEFT(entry, rb_entry)) != NULL) {
155 free_down = MAX(child->free_down, bound - child->last);
156 bound = child->first;
157 }
158 delta = bound - entry->first;
159 entry->first = bound;
160 bound = entry->end;
161 if ((child = RB_RIGHT(entry, rb_entry)) != NULL) {
162 free_down = MAX(free_down, child->free_down);
163 free_down = MAX(free_down, child->first - bound);
164 bound = child->last;
165 }
166 delta += entry->last - bound;
167 if (delta == 0)
168 delta = entry->free_down - free_down;
169 entry->last = bound;
170 entry->free_down = free_down;
171
172 /*
173 * Return true either if the value of last-first changed,
174 * or if free_down changed.
175 */
176 return (delta != 0);
177 }
178
179 RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry,
180 iommu_gas_cmp_entries);
181
182 #ifdef INVARIANTS
183 static void
184 iommu_gas_check_free(struct iommu_domain *domain)
185 {
186 struct iommu_map_entry *entry, *l, *r;
187 iommu_gaddr_t v;
188
189 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
190 KASSERT(domain == entry->domain,
191 ("mismatched free domain %p entry %p entry->domain %p",
192 domain, entry, entry->domain));
193 l = RB_LEFT(entry, rb_entry);
194 r = RB_RIGHT(entry, rb_entry);
195 v = 0;
196 if (l != NULL) {
197 v = MAX(v, l->free_down);
198 v = MAX(v, entry->start - l->last);
199 }
200 if (r != NULL) {
201 v = MAX(v, r->free_down);
202 v = MAX(v, r->first - entry->end);
203 }
204 MPASS(entry->free_down == v);
205 }
206 }
207 #endif
208
209 static void
210 iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry)
211 {
212 struct iommu_map_entry *nbr;
213
214 /* Removing entry may open a new free gap before domain->start_gap. */
215 if (entry->end <= domain->start_gap->end) {
216 if (RB_RIGHT(entry, rb_entry) != NULL)
217 nbr = iommu_gas_entries_tree_RB_NEXT(entry);
218 else if (RB_LEFT(entry, rb_entry) != NULL)
219 nbr = RB_LEFT(entry, rb_entry);
220 else
221 nbr = RB_PARENT(entry, rb_entry);
222 domain->start_gap = nbr;
223 }
224 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry);
225 }
226
227 struct iommu_domain *
228 iommu_get_ctx_domain(struct iommu_ctx *ctx)
229 {
230
231 return (ctx->domain);
232 }
233
234 void
235 iommu_gas_init_domain(struct iommu_domain *domain)
236 {
237 struct iommu_map_entry *begin, *end;
238
239 begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
240 end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
241
242 IOMMU_DOMAIN_LOCK(domain);
243 KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain));
244 KASSERT(RB_EMPTY(&domain->rb_root),
245 ("non-empty entries %p", domain));
246
247 /*
248 * The end entry must be inserted first because it has a zero-length gap
249 * between start and end. Initially, all augmentation data for a new
250 * entry is zero. Function iommu_gas_augment_entry will compute no
251 * change in the value of (start-end) and no change in the value of
252 * free_down, so it will return false to suggest that nothing changed in
253 * the entry. Thus, inserting the end entry second prevents
254 * augmentation information to be propogated to the begin entry at the
255 * tree root. So it is inserted first.
256 */
257 end->start = domain->end;
258 end->end = domain->end;
259 end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED;
260 RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, end);
261
262 begin->start = 0;
263 begin->end = IOMMU_PAGE_SIZE;
264 begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED;
265 RB_INSERT_PREV(iommu_gas_entries_tree, &domain->rb_root, end, begin);
266
267 domain->start_gap = end;
268 domain->first_place = begin;
269 domain->last_place = end;
270 domain->flags |= IOMMU_DOMAIN_GAS_INITED;
271 IOMMU_DOMAIN_UNLOCK(domain);
272 }
273
274 void
275 iommu_gas_fini_domain(struct iommu_domain *domain)
276 {
277 struct iommu_map_entry *entry;
278
279 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
280 KASSERT(domain->entries_cnt == 2,
281 ("domain still in use %p", domain));
282
283 entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root);
284 KASSERT(entry->start == 0, ("start entry start %p", domain));
285 KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain));
286 KASSERT(entry->flags ==
287 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED),
288 ("start entry flags %p", domain));
289 iommu_gas_rb_remove(domain, entry);
290 iommu_gas_free_entry(entry);
291
292 entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root);
293 KASSERT(entry->start == domain->end, ("end entry start %p", domain));
294 KASSERT(entry->end == domain->end, ("end entry end %p", domain));
295 KASSERT(entry->flags ==
296 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED),
297 ("end entry flags %p", domain));
298 iommu_gas_rb_remove(domain, entry);
299 iommu_gas_free_entry(entry);
300 }
301
302 struct iommu_gas_match_args {
303 iommu_gaddr_t size;
304 int offset;
305 const struct bus_dma_tag_common *common;
306 u_int gas_flags;
307 struct iommu_map_entry *entry;
308 };
309
310 /*
311 * The interval [beg, end) is a free interval between two iommu_map_entries.
312 * Addresses can be allocated only in the range [lbound, ubound]. Try to
313 * allocate space in the free interval, subject to the conditions expressed by
314 * a, and return 'true' if and only if the allocation attempt succeeds.
315 */
316 static bool
317 iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg,
318 iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound)
319 {
320 struct iommu_map_entry *entry;
321 iommu_gaddr_t first, size, start;
322 int offset;
323
324 /*
325 * The prev->end is always aligned on the page size, which
326 * causes page alignment for the entry->start too.
327 *
328 * Create IOMMU_PAGE_SIZE gaps before, after new entry
329 * to ensure that out-of-bounds accesses fault.
330 */
331 beg = MAX(beg + IOMMU_PAGE_SIZE, lbound);
332 start = roundup2(beg, a->common->alignment);
333 if (start < beg)
334 return (false);
335 end = MIN(end - IOMMU_PAGE_SIZE - 1, ubound);
336 offset = a->offset;
337 size = a->size;
338 if (start + offset + size - 1 > end)
339 return (false);
340
341 /* Check for and try to skip past boundary crossing. */
342 if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) {
343 /*
344 * The start + offset to start + offset + size region crosses
345 * the boundary. Check if there is enough space after the next
346 * boundary after the beg.
347 */
348 first = start;
349 beg = roundup2(start + offset + 1, a->common->boundary);
350 start = roundup2(beg, a->common->alignment);
351
352 if (start + offset + size - 1 > end ||
353 !vm_addr_bound_ok(start + offset, size,
354 a->common->boundary)) {
355 /*
356 * Not enough space to align at the requested boundary,
357 * or boundary is smaller than the size, but allowed to
358 * split. We already checked that start + size does not
359 * overlap ubound.
360 *
361 * XXXKIB. It is possible that beg is exactly at the
362 * start of the next entry, then we do not have gap.
363 * Ignore for now.
364 */
365 if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0)
366 return (false);
367 size = beg - first - offset;
368 start = first;
369 }
370 }
371 entry = a->entry;
372 entry->start = start;
373 entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE);
374 entry->flags = IOMMU_MAP_ENTRY_MAP;
375 return (true);
376 }
377
378 /* Find the next entry that might abut a big-enough range. */
379 static struct iommu_map_entry *
380 iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free)
381 {
382 struct iommu_map_entry *next;
383
384 if ((next = RB_RIGHT(curr, rb_entry)) != NULL &&
385 next->free_down >= min_free) {
386 /* Find next entry in right subtree. */
387 do
388 curr = next;
389 while ((next = RB_LEFT(curr, rb_entry)) != NULL &&
390 next->free_down >= min_free);
391 } else {
392 /* Find next entry in a left-parent ancestor. */
393 while ((next = RB_PARENT(curr, rb_entry)) != NULL &&
394 curr == RB_RIGHT(next, rb_entry))
395 curr = next;
396 curr = next;
397 }
398 return (curr);
399 }
400
401 /*
402 * Address-ordered first-fit search of 'domain' for free space satisfying the
403 * conditions of 'a'. The space allocated is at least one page big, and is
404 * bounded by guard pages to the left and right. The allocated space for
405 * 'domain' is described by an rb-tree of map entries at domain->rb_root, and
406 * domain->start_gap points to a map entry less than or adjacent to the first
407 * free-space of size at least 3 pages.
408 */
409 static int
410 iommu_gas_find_space(struct iommu_domain *domain,
411 struct iommu_gas_match_args *a)
412 {
413 struct iommu_map_entry *curr, *first;
414 iommu_gaddr_t addr, min_free;
415
416 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
417 KASSERT(a->entry->flags == 0,
418 ("dirty entry %p %p", domain, a->entry));
419
420 /*
421 * start_gap may point to an entry adjacent to gaps too small for any
422 * new allocation. In that case, advance start_gap to the first free
423 * space big enough for a minimum allocation plus two guard pages.
424 */
425 min_free = 3 * IOMMU_PAGE_SIZE;
426 first = domain->start_gap;
427 while (first != NULL && first->free_down < min_free)
428 first = RB_PARENT(first, rb_entry);
429 for (curr = first; curr != NULL;
430 curr = iommu_gas_next(curr, min_free)) {
431 if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
432 first->last + min_free <= curr->start)
433 break;
434 if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
435 curr->end + min_free <= first->first)
436 break;
437 }
438 domain->start_gap = curr;
439
440 /*
441 * If the subtree doesn't have free space for the requested allocation
442 * plus two guard pages, skip it.
443 */
444 min_free = 2 * IOMMU_PAGE_SIZE +
445 roundup2(a->size + a->offset, IOMMU_PAGE_SIZE);
446
447 /* Climb to find a node in the subtree of big-enough ranges. */
448 first = curr;
449 while (first != NULL && first->free_down < min_free)
450 first = RB_PARENT(first, rb_entry);
451
452 /*
453 * Walk the big-enough ranges tree until one satisfies alignment
454 * requirements, or violates lowaddr address requirement.
455 */
456 addr = a->common->lowaddr;
457 for (curr = first; curr != NULL;
458 curr = iommu_gas_next(curr, min_free)) {
459 if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
460 iommu_gas_match_one(a, first->last, curr->start,
461 0, addr)) {
462 RB_INSERT_PREV(iommu_gas_entries_tree,
463 &domain->rb_root, curr, a->entry);
464 return (0);
465 }
466 if (curr->end >= addr) {
467 /* All remaining ranges > addr */
468 break;
469 }
470 if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
471 iommu_gas_match_one(a, curr->end, first->first,
472 0, addr)) {
473 RB_INSERT_NEXT(iommu_gas_entries_tree,
474 &domain->rb_root, curr, a->entry);
475 return (0);
476 }
477 }
478
479 /*
480 * To resume the search at the start of the upper region, first climb to
481 * the nearest ancestor that spans highaddr. Then find the last entry
482 * before highaddr that could abut a big-enough range.
483 */
484 addr = a->common->highaddr;
485 while (curr != NULL && curr->last < addr)
486 curr = RB_PARENT(curr, rb_entry);
487 first = NULL;
488 while (curr != NULL && curr->free_down >= min_free) {
489 if (addr < curr->end)
490 curr = RB_LEFT(curr, rb_entry);
491 else {
492 first = curr;
493 curr = RB_RIGHT(curr, rb_entry);
494 }
495 }
496
497 /*
498 * Walk the remaining big-enough ranges until one satisfies alignment
499 * requirements.
500 */
501 for (curr = first; curr != NULL;
502 curr = iommu_gas_next(curr, min_free)) {
503 if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
504 iommu_gas_match_one(a, first->last, curr->start,
505 addr + 1, domain->end - 1)) {
506 RB_INSERT_PREV(iommu_gas_entries_tree,
507 &domain->rb_root, curr, a->entry);
508 return (0);
509 }
510 if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
511 iommu_gas_match_one(a, curr->end, first->first,
512 addr + 1, domain->end - 1)) {
513 RB_INSERT_NEXT(iommu_gas_entries_tree,
514 &domain->rb_root, curr, a->entry);
515 return (0);
516 }
517 }
518
519 return (ENOMEM);
520 }
521
522 static int
523 iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry,
524 u_int flags)
525 {
526 struct iommu_map_entry *next, *prev;
527
528 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
529
530 if ((entry->start & IOMMU_PAGE_MASK) != 0 ||
531 (entry->end & IOMMU_PAGE_MASK) != 0)
532 return (EINVAL);
533 if (entry->start >= entry->end)
534 return (EINVAL);
535 if (entry->end >= domain->end)
536 return (EINVAL);
537
538 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry);
539 KASSERT(next != NULL, ("next must be non-null %p %jx", domain,
540 (uintmax_t)entry->start));
541 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next);
542 /* prev could be NULL */
543
544 /*
545 * Adapt to broken BIOSes which specify overlapping RMRR
546 * entries.
547 *
548 * XXXKIB: this does not handle a case when prev or next
549 * entries are completely covered by the current one, which
550 * extends both ways.
551 */
552 if (prev != NULL && prev->end > entry->start &&
553 (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) {
554 if ((flags & IOMMU_MF_RMRR) == 0 ||
555 (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0)
556 return (EBUSY);
557 entry->start = prev->end;
558 }
559 if (next->start < entry->end &&
560 (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) {
561 if ((flags & IOMMU_MF_RMRR) == 0 ||
562 (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0)
563 return (EBUSY);
564 entry->end = next->start;
565 }
566 if (entry->end == entry->start)
567 return (0);
568
569 if (prev != NULL && prev->end > entry->start) {
570 /* This assumes that prev is the placeholder entry. */
571 iommu_gas_rb_remove(domain, prev);
572 prev = NULL;
573 }
574 RB_INSERT_PREV(iommu_gas_entries_tree,
575 &domain->rb_root, next, entry);
576 if (next->start < entry->end) {
577 iommu_gas_rb_remove(domain, next);
578 next = NULL;
579 }
580
581 if ((flags & IOMMU_MF_RMRR) != 0)
582 entry->flags = IOMMU_MAP_ENTRY_RMRR;
583
584 #ifdef INVARIANTS
585 struct iommu_map_entry *ip, *in;
586 ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry);
587 in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry);
588 KASSERT(prev == NULL || ip == prev,
589 ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)",
590 entry, entry->start, entry->end, prev,
591 prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end,
592 ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end));
593 KASSERT(next == NULL || in == next,
594 ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)",
595 entry, entry->start, entry->end, next,
596 next == NULL ? 0 : next->start, next == NULL ? 0 : next->end,
597 in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end));
598 #endif
599
600 return (0);
601 }
602
603 void
604 iommu_gas_free_space(struct iommu_map_entry *entry)
605 {
606 struct iommu_domain *domain;
607
608 domain = entry->domain;
609 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR |
610 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP,
611 ("permanent entry %p %p", domain, entry));
612
613 IOMMU_DOMAIN_LOCK(domain);
614 iommu_gas_rb_remove(domain, entry);
615 entry->flags &= ~IOMMU_MAP_ENTRY_MAP;
616 #ifdef INVARIANTS
617 if (iommu_check_free)
618 iommu_gas_check_free(domain);
619 #endif
620 IOMMU_DOMAIN_UNLOCK(domain);
621 }
622
623 void
624 iommu_gas_free_region(struct iommu_map_entry *entry)
625 {
626 struct iommu_domain *domain;
627
628 domain = entry->domain;
629 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR |
630 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR,
631 ("non-RMRR entry %p %p", domain, entry));
632
633 IOMMU_DOMAIN_LOCK(domain);
634 if (entry != domain->first_place &&
635 entry != domain->last_place)
636 iommu_gas_rb_remove(domain, entry);
637 entry->flags &= ~IOMMU_MAP_ENTRY_RMRR;
638 IOMMU_DOMAIN_UNLOCK(domain);
639 }
640
641 static struct iommu_map_entry *
642 iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start,
643 iommu_gaddr_t end, struct iommu_map_entry **r)
644 {
645 struct iommu_map_entry *entry, *res, fentry;
646
647 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
648 MPASS(start <= end);
649 MPASS(end <= domain->end);
650
651 /*
652 * Find an entry which contains the supplied guest's address
653 * start, or the first entry after the start. Since we
654 * asserted that start is below domain end, entry should
655 * exist. Then clip it if needed.
656 */
657 fentry.start = start + 1;
658 fentry.end = start + 1;
659 entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry);
660
661 if (entry->start >= start ||
662 (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
663 return (entry);
664
665 res = *r;
666 *r = NULL;
667 *res = *entry;
668 res->start = entry->end = start;
669 RB_UPDATE_AUGMENT(entry, rb_entry);
670 RB_INSERT_NEXT(iommu_gas_entries_tree,
671 &domain->rb_root, entry, res);
672 return (res);
673 }
674
675 static bool
676 iommu_gas_remove_clip_right(struct iommu_domain *domain,
677 iommu_gaddr_t end, struct iommu_map_entry *entry,
678 struct iommu_map_entry *r)
679 {
680 if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
681 return (false);
682
683 *r = *entry;
684 r->end = entry->start = end;
685 RB_UPDATE_AUGMENT(entry, rb_entry);
686 RB_INSERT_PREV(iommu_gas_entries_tree,
687 &domain->rb_root, entry, r);
688 return (true);
689 }
690
691 static void
692 iommu_gas_remove_unmap(struct iommu_domain *domain,
693 struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp)
694 {
695 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
696
697 if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED |
698 IOMMU_MAP_ENTRY_REMOVING)) != 0)
699 return;
700 MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0);
701 entry->flags |= IOMMU_MAP_ENTRY_REMOVING;
702 TAILQ_INSERT_TAIL(gcp, entry, dmamap_link);
703 }
704
705 /*
706 * Remove specified range from the GAS of the domain. Note that the
707 * removal is not guaranteed to occur upon the function return, it
708 * might be finalized some time after, when hardware reports that
709 * (queued) IOTLB invalidation was performed.
710 */
711 void
712 iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start,
713 iommu_gaddr_t size)
714 {
715 struct iommu_map_entry *entry, *nentry, *r1, *r2;
716 struct iommu_map_entries_tailq gc;
717 iommu_gaddr_t end;
718
719 end = start + size;
720 r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
721 r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
722 TAILQ_INIT(&gc);
723
724 IOMMU_DOMAIN_LOCK(domain);
725
726 nentry = iommu_gas_remove_clip_left(domain, start, end, &r1);
727 RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) {
728 if (entry->start >= end)
729 break;
730 KASSERT(start <= entry->start,
731 ("iommu_gas_remove entry (%#jx, %#jx) start %#jx",
732 entry->start, entry->end, start));
733 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
734 continue;
735 iommu_gas_remove_unmap(domain, entry, &gc);
736 }
737 if (iommu_gas_remove_clip_right(domain, end, entry, r2)) {
738 iommu_gas_remove_unmap(domain, r2, &gc);
739 r2 = NULL;
740 }
741
742 #ifdef INVARIANTS
743 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
744 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
745 continue;
746 KASSERT(entry->end <= start || entry->start >= end,
747 ("iommu_gas_remove leftover entry (%#jx, %#jx) range "
748 "(%#jx, %#jx)",
749 entry->start, entry->end, start, end));
750 }
751 #endif
752
753 IOMMU_DOMAIN_UNLOCK(domain);
754 if (r1 != NULL)
755 iommu_gas_free_entry(r1);
756 if (r2 != NULL)
757 iommu_gas_free_entry(r2);
758 iommu_domain_unload(domain, &gc, true);
759 }
760
761 int
762 iommu_gas_map(struct iommu_domain *domain,
763 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset,
764 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res)
765 {
766 struct iommu_gas_match_args a;
767 struct iommu_map_entry *entry;
768 int error;
769
770 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0,
771 ("invalid flags 0x%x", flags));
772
773 a.size = size;
774 a.offset = offset;
775 a.common = common;
776 a.gas_flags = flags;
777 entry = iommu_gas_alloc_entry(domain,
778 (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0);
779 if (entry == NULL)
780 return (ENOMEM);
781 a.entry = entry;
782 IOMMU_DOMAIN_LOCK(domain);
783 error = iommu_gas_find_space(domain, &a);
784 if (error == ENOMEM) {
785 IOMMU_DOMAIN_UNLOCK(domain);
786 iommu_gas_free_entry(entry);
787 return (error);
788 }
789 #ifdef INVARIANTS
790 if (iommu_check_free)
791 iommu_gas_check_free(domain);
792 #endif
793 KASSERT(error == 0,
794 ("unexpected error %d from iommu_gas_find_entry", error));
795 KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx",
796 (uintmax_t)entry->end, (uintmax_t)domain->end));
797 entry->flags |= eflags;
798 IOMMU_DOMAIN_UNLOCK(domain);
799
800 error = domain->ops->map(domain, entry->start,
801 entry->end - entry->start, ma, eflags,
802 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
803 if (error == ENOMEM) {
804 iommu_domain_unload_entry(entry, true,
805 (flags & IOMMU_MF_CANWAIT) != 0);
806 return (error);
807 }
808 KASSERT(error == 0,
809 ("unexpected error %d from domain_map_buf", error));
810
811 *res = entry;
812 return (0);
813 }
814
815 int
816 iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry,
817 u_int eflags, u_int flags, vm_page_t *ma)
818 {
819 iommu_gaddr_t start;
820 int error;
821
822 KASSERT(entry->domain == domain,
823 ("mismatched domain %p entry %p entry->domain %p", domain,
824 entry, entry->domain));
825 KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain,
826 entry, entry->flags));
827 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0,
828 ("invalid flags 0x%x", flags));
829
830 start = entry->start;
831 IOMMU_DOMAIN_LOCK(domain);
832 error = iommu_gas_alloc_region(domain, entry, flags);
833 if (error != 0) {
834 IOMMU_DOMAIN_UNLOCK(domain);
835 return (error);
836 }
837 entry->flags |= eflags;
838 IOMMU_DOMAIN_UNLOCK(domain);
839 if (entry->end == entry->start)
840 return (0);
841
842 error = domain->ops->map(domain, entry->start,
843 entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start),
844 eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
845 if (error == ENOMEM) {
846 iommu_domain_unload_entry(entry, false,
847 (flags & IOMMU_MF_CANWAIT) != 0);
848 return (error);
849 }
850 KASSERT(error == 0,
851 ("unexpected error %d from domain_map_buf", error));
852
853 return (0);
854 }
855
856 static int
857 iommu_gas_reserve_region_locked(struct iommu_domain *domain,
858 iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry)
859 {
860 int error;
861
862 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
863
864 entry->start = start;
865 entry->end = end;
866 error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT);
867 if (error == 0)
868 entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED;
869 return (error);
870 }
871
872 int
873 iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start,
874 iommu_gaddr_t end, struct iommu_map_entry **entry0)
875 {
876 struct iommu_map_entry *entry;
877 int error;
878
879 entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
880 IOMMU_DOMAIN_LOCK(domain);
881 error = iommu_gas_reserve_region_locked(domain, start, end, entry);
882 IOMMU_DOMAIN_UNLOCK(domain);
883 if (error != 0)
884 iommu_gas_free_entry(entry);
885 else if (entry0 != NULL)
886 *entry0 = entry;
887 return (error);
888 }
889
890 /*
891 * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing
892 * entries.
893 */
894 int
895 iommu_gas_reserve_region_extend(struct iommu_domain *domain,
896 iommu_gaddr_t start, iommu_gaddr_t end)
897 {
898 struct iommu_map_entry *entry, *next, *prev, key = {};
899 iommu_gaddr_t entry_start, entry_end;
900 int error;
901
902 error = 0;
903 entry = NULL;
904 end = ummin(end, domain->end);
905 while (start < end) {
906 /* Preallocate an entry. */
907 if (entry == NULL)
908 entry = iommu_gas_alloc_entry(domain,
909 IOMMU_PGF_WAITOK);
910 /* Calculate the free region from here to the next entry. */
911 key.start = key.end = start;
912 IOMMU_DOMAIN_LOCK(domain);
913 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key);
914 KASSERT(next != NULL, ("domain %p with end %#jx has no entry "
915 "after %#jx", domain, (uintmax_t)domain->end,
916 (uintmax_t)start));
917 entry_end = ummin(end, next->start);
918 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next);
919 if (prev != NULL)
920 entry_start = ummax(start, prev->end);
921 else
922 entry_start = start;
923 start = next->end;
924 /* Reserve the region if non-empty. */
925 if (entry_start != entry_end) {
926 error = iommu_gas_reserve_region_locked(domain,
927 entry_start, entry_end, entry);
928 if (error != 0) {
929 IOMMU_DOMAIN_UNLOCK(domain);
930 break;
931 }
932 entry = NULL;
933 }
934 IOMMU_DOMAIN_UNLOCK(domain);
935 }
936 /* Release a preallocated entry if it was not used. */
937 if (entry != NULL)
938 iommu_gas_free_entry(entry);
939 return (error);
940 }
941
942 void
943 iommu_unmap_msi(struct iommu_ctx *ctx)
944 {
945 struct iommu_map_entry *entry;
946 struct iommu_domain *domain;
947
948 domain = ctx->domain;
949 entry = domain->msi_entry;
950 if (entry == NULL)
951 return;
952
953 domain->ops->unmap(domain, entry->start, entry->end -
954 entry->start, IOMMU_PGF_WAITOK);
955
956 iommu_gas_free_space(entry);
957
958 iommu_gas_free_entry(entry);
959
960 domain->msi_entry = NULL;
961 domain->msi_base = 0;
962 domain->msi_phys = 0;
963 }
964
965 int
966 iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset,
967 u_int eflags, u_int flags, vm_page_t *ma)
968 {
969 struct iommu_domain *domain;
970 struct iommu_map_entry *entry;
971 int error;
972
973 error = 0;
974 domain = ctx->domain;
975
976 /* Check if there is already an MSI page allocated */
977 IOMMU_DOMAIN_LOCK(domain);
978 entry = domain->msi_entry;
979 IOMMU_DOMAIN_UNLOCK(domain);
980
981 if (entry == NULL) {
982 error = iommu_gas_map(domain, &ctx->tag->common, size, offset,
983 eflags, flags, ma, &entry);
984 IOMMU_DOMAIN_LOCK(domain);
985 if (error == 0) {
986 if (domain->msi_entry == NULL) {
987 MPASS(domain->msi_base == 0);
988 MPASS(domain->msi_phys == 0);
989
990 domain->msi_entry = entry;
991 domain->msi_base = entry->start;
992 domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]);
993 } else {
994 /*
995 * We lost the race and already have an
996 * MSI page allocated. Free the unneeded entry.
997 */
998 iommu_gas_free_entry(entry);
999 }
1000 } else if (domain->msi_entry != NULL) {
1001 /*
1002 * The allocation failed, but another succeeded.
1003 * Return success as there is a valid MSI page.
1004 */
1005 error = 0;
1006 }
1007 IOMMU_DOMAIN_UNLOCK(domain);
1008 }
1009
1010 return (error);
1011 }
1012
1013 void
1014 iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr)
1015 {
1016
1017 *addr = (*addr - domain->msi_phys) + domain->msi_base;
1018
1019 KASSERT(*addr >= domain->msi_entry->start,
1020 ("%s: Address is below the MSI entry start address (%jx < %jx)",
1021 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start));
1022
1023 KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end,
1024 ("%s: Address is above the MSI entry end address (%jx < %jx)",
1025 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end));
1026 }
1027
1028 SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "");
1029
1030 #ifdef INVARIANTS
1031 SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN,
1032 &iommu_check_free, 0,
1033 "Check the GPA RBtree for free_down and free_after validity");
1034 #endif
Cache object: 6f36febcd72a3ab43752d377fd034278
|