1 /*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/queue.h>
35 #include <sys/lock.h>
36 #include <sys/mutex.h>
37 #include <sys/malloc.h>
38 #include <sys/conf.h>
39 #include <sys/sysctl.h>
40 #include <sys/libkern.h>
41 #include <sys/ioccom.h>
42 #include <sys/mman.h>
43 #include <sys/uio.h>
44
45 #include <vm/vm.h>
46 #include <vm/pmap.h>
47 #include <vm/vm_map.h>
48 #include <vm/vm_object.h>
49
50 #include <machine/vmparam.h>
51 #include <machine/vmm.h>
52 #include <machine/vmm_instruction_emul.h>
53 #include <machine/vmm_dev.h>
54
55 #include "vmm_lapic.h"
56 #include "vmm_stat.h"
57 #include "vmm_mem.h"
58 #include "io/ppt.h"
59 #include "io/vatpic.h"
60 #include "io/vioapic.h"
61 #include "io/vhpet.h"
62 #include "io/vrtc.h"
63
64 struct devmem_softc {
65 int segid;
66 char *name;
67 struct cdev *cdev;
68 struct vmmdev_softc *sc;
69 SLIST_ENTRY(devmem_softc) link;
70 };
71
72 struct vmmdev_softc {
73 struct vm *vm; /* vm instance cookie */
74 struct cdev *cdev;
75 SLIST_ENTRY(vmmdev_softc) link;
76 SLIST_HEAD(, devmem_softc) devmem;
77 int flags;
78 };
79 #define VSC_LINKED 0x01
80
81 static SLIST_HEAD(, vmmdev_softc) head;
82
83 static struct mtx vmmdev_mtx;
84
85 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
86
87 SYSCTL_DECL(_hw_vmm);
88
89 static int devmem_create_cdev(const char *vmname, int id, char *devmem);
90 static void devmem_destroy(void *arg);
91
92 static int
93 vcpu_lock_one(struct vmmdev_softc *sc, int vcpu)
94 {
95 int error;
96
97 if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vm))
98 return (EINVAL);
99
100 error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
101 return (error);
102 }
103
104 static void
105 vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu)
106 {
107 enum vcpu_state state;
108
109 state = vcpu_get_state(sc->vm, vcpu, NULL);
110 if (state != VCPU_FROZEN) {
111 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
112 vcpu, state);
113 }
114
115 vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
116 }
117
118 static int
119 vcpu_lock_all(struct vmmdev_softc *sc)
120 {
121 int error, vcpu;
122 uint16_t maxcpus;
123
124 maxcpus = vm_get_maxcpus(sc->vm);
125 for (vcpu = 0; vcpu < maxcpus; vcpu++) {
126 error = vcpu_lock_one(sc, vcpu);
127 if (error)
128 break;
129 }
130
131 if (error) {
132 while (--vcpu >= 0)
133 vcpu_unlock_one(sc, vcpu);
134 }
135
136 return (error);
137 }
138
139 static void
140 vcpu_unlock_all(struct vmmdev_softc *sc)
141 {
142 int vcpu;
143 uint16_t maxcpus;
144
145 maxcpus = vm_get_maxcpus(sc->vm);
146 for (vcpu = 0; vcpu < maxcpus; vcpu++)
147 vcpu_unlock_one(sc, vcpu);
148 }
149
150 static struct vmmdev_softc *
151 vmmdev_lookup(const char *name)
152 {
153 struct vmmdev_softc *sc;
154
155 #ifdef notyet /* XXX kernel is not compiled with invariants */
156 mtx_assert(&vmmdev_mtx, MA_OWNED);
157 #endif
158
159 SLIST_FOREACH(sc, &head, link) {
160 if (strcmp(name, vm_name(sc->vm)) == 0)
161 break;
162 }
163
164 return (sc);
165 }
166
167 static struct vmmdev_softc *
168 vmmdev_lookup2(struct cdev *cdev)
169 {
170
171 return (cdev->si_drv1);
172 }
173
174 static int
175 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
176 {
177 int error, off, c, prot;
178 vm_paddr_t gpa, maxaddr;
179 void *hpa, *cookie;
180 struct vmmdev_softc *sc;
181 uint16_t lastcpu;
182
183 sc = vmmdev_lookup2(cdev);
184 if (sc == NULL)
185 return (ENXIO);
186
187 /*
188 * Get a read lock on the guest memory map by freezing any vcpu.
189 */
190 lastcpu = vm_get_maxcpus(sc->vm) - 1;
191 error = vcpu_lock_one(sc, lastcpu);
192 if (error)
193 return (error);
194
195 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
196 maxaddr = vmm_sysmem_maxaddr(sc->vm);
197 while (uio->uio_resid > 0 && error == 0) {
198 gpa = uio->uio_offset;
199 off = gpa & PAGE_MASK;
200 c = min(uio->uio_resid, PAGE_SIZE - off);
201
202 /*
203 * The VM has a hole in its physical memory map. If we want to
204 * use 'dd' to inspect memory beyond the hole we need to
205 * provide bogus data for memory that lies in the hole.
206 *
207 * Since this device does not support lseek(2), dd(1) will
208 * read(2) blocks of data to simulate the lseek(2).
209 */
210 hpa = vm_gpa_hold(sc->vm, lastcpu, gpa, c,
211 prot, &cookie);
212 if (hpa == NULL) {
213 if (uio->uio_rw == UIO_READ && gpa < maxaddr)
214 error = uiomove(__DECONST(void *, zero_region),
215 c, uio);
216 else
217 error = EFAULT;
218 } else {
219 error = uiomove(hpa, c, uio);
220 vm_gpa_release(cookie);
221 }
222 }
223 vcpu_unlock_one(sc, lastcpu);
224 return (error);
225 }
226
227 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1);
228
229 static int
230 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
231 {
232 struct devmem_softc *dsc;
233 int error;
234 bool sysmem;
235
236 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
237 if (error || mseg->len == 0)
238 return (error);
239
240 if (!sysmem) {
241 SLIST_FOREACH(dsc, &sc->devmem, link) {
242 if (dsc->segid == mseg->segid)
243 break;
244 }
245 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
246 __func__, mseg->segid));
247 error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL);
248 } else {
249 bzero(mseg->name, sizeof(mseg->name));
250 }
251
252 return (error);
253 }
254
255 static int
256 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
257 {
258 char *name;
259 int error;
260 bool sysmem;
261
262 error = 0;
263 name = NULL;
264 sysmem = true;
265
266 if (VM_MEMSEG_NAME(mseg)) {
267 sysmem = false;
268 name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK);
269 error = copystr(mseg->name, name, SPECNAMELEN + 1, 0);
270 if (error)
271 goto done;
272 }
273
274 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
275 if (error)
276 goto done;
277
278 if (VM_MEMSEG_NAME(mseg)) {
279 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
280 if (error)
281 vm_free_memseg(sc->vm, mseg->segid);
282 else
283 name = NULL; /* freed when 'cdev' is destroyed */
284 }
285 done:
286 free(name, M_VMMDEV);
287 return (error);
288 }
289
290 static int
291 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
292 struct thread *td)
293 {
294 int error, vcpu, state_changed, size;
295 cpuset_t *cpuset;
296 struct vmmdev_softc *sc;
297 struct vm_register *vmreg;
298 struct vm_seg_desc *vmsegdesc;
299 struct vm_run *vmrun;
300 struct vm_exception *vmexc;
301 struct vm_lapic_irq *vmirq;
302 struct vm_lapic_msi *vmmsi;
303 struct vm_ioapic_irq *ioapic_irq;
304 struct vm_isa_irq *isa_irq;
305 struct vm_isa_irq_trigger *isa_irq_trigger;
306 struct vm_capability *vmcap;
307 struct vm_pptdev *pptdev;
308 struct vm_pptdev_mmio *pptmmio;
309 struct vm_pptdev_msi *pptmsi;
310 struct vm_pptdev_msix *pptmsix;
311 struct vm_nmi *vmnmi;
312 struct vm_stats *vmstats;
313 struct vm_stat_desc *statdesc;
314 struct vm_x2apic *x2apic;
315 struct vm_gpa_pte *gpapte;
316 struct vm_suspend *vmsuspend;
317 struct vm_gla2gpa *gg;
318 struct vm_activate_cpu *vac;
319 struct vm_cpuset *vm_cpuset;
320 struct vm_intinfo *vmii;
321 struct vm_rtc_time *rtctime;
322 struct vm_rtc_data *rtcdata;
323 struct vm_memmap *mm;
324 struct vm_cpu_topology *topology;
325
326 sc = vmmdev_lookup2(cdev);
327 if (sc == NULL)
328 return (ENXIO);
329
330 error = 0;
331 vcpu = -1;
332 state_changed = 0;
333
334 /*
335 * Some VMM ioctls can operate only on vcpus that are not running.
336 */
337 switch (cmd) {
338 case VM_RUN:
339 case VM_GET_REGISTER:
340 case VM_SET_REGISTER:
341 case VM_GET_SEGMENT_DESCRIPTOR:
342 case VM_SET_SEGMENT_DESCRIPTOR:
343 case VM_INJECT_EXCEPTION:
344 case VM_GET_CAPABILITY:
345 case VM_SET_CAPABILITY:
346 case VM_PPTDEV_MSI:
347 case VM_PPTDEV_MSIX:
348 case VM_SET_X2APIC_STATE:
349 case VM_GLA2GPA:
350 case VM_ACTIVATE_CPU:
351 case VM_SET_INTINFO:
352 case VM_GET_INTINFO:
353 case VM_RESTART_INSTRUCTION:
354 /*
355 * XXX fragile, handle with care
356 * Assumes that the first field of the ioctl data is the vcpu.
357 */
358 vcpu = *(int *)data;
359 error = vcpu_lock_one(sc, vcpu);
360 if (error)
361 goto done;
362 state_changed = 1;
363 break;
364
365 case VM_MAP_PPTDEV_MMIO:
366 case VM_BIND_PPTDEV:
367 case VM_UNBIND_PPTDEV:
368 case VM_ALLOC_MEMSEG:
369 case VM_MMAP_MEMSEG:
370 case VM_REINIT:
371 /*
372 * ioctls that operate on the entire virtual machine must
373 * prevent all vcpus from running.
374 */
375 error = vcpu_lock_all(sc);
376 if (error)
377 goto done;
378 state_changed = 2;
379 break;
380
381 case VM_GET_MEMSEG:
382 case VM_MMAP_GETNEXT:
383 /*
384 * Lock a vcpu to make sure that the memory map cannot be
385 * modified while it is being inspected.
386 */
387 vcpu = vm_get_maxcpus(sc->vm) - 1;
388 error = vcpu_lock_one(sc, vcpu);
389 if (error)
390 goto done;
391 state_changed = 1;
392 break;
393
394 default:
395 break;
396 }
397
398 switch(cmd) {
399 case VM_RUN:
400 vmrun = (struct vm_run *)data;
401 error = vm_run(sc->vm, vmrun);
402 break;
403 case VM_SUSPEND:
404 vmsuspend = (struct vm_suspend *)data;
405 error = vm_suspend(sc->vm, vmsuspend->how);
406 break;
407 case VM_REINIT:
408 error = vm_reinit(sc->vm);
409 break;
410 case VM_STAT_DESC: {
411 statdesc = (struct vm_stat_desc *)data;
412 error = vmm_stat_desc_copy(statdesc->index,
413 statdesc->desc, sizeof(statdesc->desc));
414 break;
415 }
416 case VM_STATS: {
417 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
418 vmstats = (struct vm_stats *)data;
419 getmicrotime(&vmstats->tv);
420 error = vmm_stat_copy(sc->vm, vmstats->cpuid,
421 &vmstats->num_entries, vmstats->statbuf);
422 break;
423 }
424 case VM_PPTDEV_MSI:
425 pptmsi = (struct vm_pptdev_msi *)data;
426 error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
427 pptmsi->bus, pptmsi->slot, pptmsi->func,
428 pptmsi->addr, pptmsi->msg,
429 pptmsi->numvec);
430 break;
431 case VM_PPTDEV_MSIX:
432 pptmsix = (struct vm_pptdev_msix *)data;
433 error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
434 pptmsix->bus, pptmsix->slot,
435 pptmsix->func, pptmsix->idx,
436 pptmsix->addr, pptmsix->msg,
437 pptmsix->vector_control);
438 break;
439 case VM_MAP_PPTDEV_MMIO:
440 pptmmio = (struct vm_pptdev_mmio *)data;
441 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
442 pptmmio->func, pptmmio->gpa, pptmmio->len,
443 pptmmio->hpa);
444 break;
445 case VM_BIND_PPTDEV:
446 pptdev = (struct vm_pptdev *)data;
447 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
448 pptdev->func);
449 break;
450 case VM_UNBIND_PPTDEV:
451 pptdev = (struct vm_pptdev *)data;
452 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
453 pptdev->func);
454 break;
455 case VM_INJECT_EXCEPTION:
456 vmexc = (struct vm_exception *)data;
457 error = vm_inject_exception(sc->vm, vmexc->cpuid,
458 vmexc->vector, vmexc->error_code_valid, vmexc->error_code,
459 vmexc->restart_instruction);
460 break;
461 case VM_INJECT_NMI:
462 vmnmi = (struct vm_nmi *)data;
463 error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
464 break;
465 case VM_LAPIC_IRQ:
466 vmirq = (struct vm_lapic_irq *)data;
467 error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector);
468 break;
469 case VM_LAPIC_LOCAL_IRQ:
470 vmirq = (struct vm_lapic_irq *)data;
471 error = lapic_set_local_intr(sc->vm, vmirq->cpuid,
472 vmirq->vector);
473 break;
474 case VM_LAPIC_MSI:
475 vmmsi = (struct vm_lapic_msi *)data;
476 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
477 break;
478 case VM_IOAPIC_ASSERT_IRQ:
479 ioapic_irq = (struct vm_ioapic_irq *)data;
480 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);
481 break;
482 case VM_IOAPIC_DEASSERT_IRQ:
483 ioapic_irq = (struct vm_ioapic_irq *)data;
484 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq);
485 break;
486 case VM_IOAPIC_PULSE_IRQ:
487 ioapic_irq = (struct vm_ioapic_irq *)data;
488 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq);
489 break;
490 case VM_IOAPIC_PINCOUNT:
491 *(int *)data = vioapic_pincount(sc->vm);
492 break;
493 case VM_ISA_ASSERT_IRQ:
494 isa_irq = (struct vm_isa_irq *)data;
495 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq);
496 if (error == 0 && isa_irq->ioapic_irq != -1)
497 error = vioapic_assert_irq(sc->vm,
498 isa_irq->ioapic_irq);
499 break;
500 case VM_ISA_DEASSERT_IRQ:
501 isa_irq = (struct vm_isa_irq *)data;
502 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq);
503 if (error == 0 && isa_irq->ioapic_irq != -1)
504 error = vioapic_deassert_irq(sc->vm,
505 isa_irq->ioapic_irq);
506 break;
507 case VM_ISA_PULSE_IRQ:
508 isa_irq = (struct vm_isa_irq *)data;
509 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq);
510 if (error == 0 && isa_irq->ioapic_irq != -1)
511 error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq);
512 break;
513 case VM_ISA_SET_IRQ_TRIGGER:
514 isa_irq_trigger = (struct vm_isa_irq_trigger *)data;
515 error = vatpic_set_irq_trigger(sc->vm,
516 isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
517 break;
518 case VM_MMAP_GETNEXT:
519 mm = (struct vm_memmap *)data;
520 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
521 &mm->segoff, &mm->len, &mm->prot, &mm->flags);
522 break;
523 case VM_MMAP_MEMSEG:
524 mm = (struct vm_memmap *)data;
525 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
526 mm->len, mm->prot, mm->flags);
527 break;
528 case VM_ALLOC_MEMSEG:
529 error = alloc_memseg(sc, (struct vm_memseg *)data);
530 break;
531 case VM_GET_MEMSEG:
532 error = get_memseg(sc, (struct vm_memseg *)data);
533 break;
534 case VM_GET_REGISTER:
535 vmreg = (struct vm_register *)data;
536 error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
537 &vmreg->regval);
538 break;
539 case VM_SET_REGISTER:
540 vmreg = (struct vm_register *)data;
541 error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
542 vmreg->regval);
543 break;
544 case VM_SET_SEGMENT_DESCRIPTOR:
545 vmsegdesc = (struct vm_seg_desc *)data;
546 error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
547 vmsegdesc->regnum,
548 &vmsegdesc->desc);
549 break;
550 case VM_GET_SEGMENT_DESCRIPTOR:
551 vmsegdesc = (struct vm_seg_desc *)data;
552 error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
553 vmsegdesc->regnum,
554 &vmsegdesc->desc);
555 break;
556 case VM_GET_CAPABILITY:
557 vmcap = (struct vm_capability *)data;
558 error = vm_get_capability(sc->vm, vmcap->cpuid,
559 vmcap->captype,
560 &vmcap->capval);
561 break;
562 case VM_SET_CAPABILITY:
563 vmcap = (struct vm_capability *)data;
564 error = vm_set_capability(sc->vm, vmcap->cpuid,
565 vmcap->captype,
566 vmcap->capval);
567 break;
568 case VM_SET_X2APIC_STATE:
569 x2apic = (struct vm_x2apic *)data;
570 error = vm_set_x2apic_state(sc->vm,
571 x2apic->cpuid, x2apic->state);
572 break;
573 case VM_GET_X2APIC_STATE:
574 x2apic = (struct vm_x2apic *)data;
575 error = vm_get_x2apic_state(sc->vm,
576 x2apic->cpuid, &x2apic->state);
577 break;
578 case VM_GET_GPA_PMAP:
579 gpapte = (struct vm_gpa_pte *)data;
580 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
581 gpapte->gpa, gpapte->pte, &gpapte->ptenum);
582 error = 0;
583 break;
584 case VM_GET_HPET_CAPABILITIES:
585 error = vhpet_getcap((struct vm_hpet_cap *)data);
586 break;
587 case VM_GLA2GPA: {
588 CTASSERT(PROT_READ == VM_PROT_READ);
589 CTASSERT(PROT_WRITE == VM_PROT_WRITE);
590 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
591 gg = (struct vm_gla2gpa *)data;
592 error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
593 gg->prot, &gg->gpa, &gg->fault);
594 KASSERT(error == 0 || error == EFAULT,
595 ("%s: vm_gla2gpa unknown error %d", __func__, error));
596 break;
597 }
598 case VM_ACTIVATE_CPU:
599 vac = (struct vm_activate_cpu *)data;
600 error = vm_activate_cpu(sc->vm, vac->vcpuid);
601 break;
602 case VM_GET_CPUS:
603 error = 0;
604 vm_cpuset = (struct vm_cpuset *)data;
605 size = vm_cpuset->cpusetsize;
606 if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
607 error = ERANGE;
608 break;
609 }
610 cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
611 if (vm_cpuset->which == VM_ACTIVE_CPUS)
612 *cpuset = vm_active_cpus(sc->vm);
613 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
614 *cpuset = vm_suspended_cpus(sc->vm);
615 else
616 error = EINVAL;
617 if (error == 0)
618 error = copyout(cpuset, vm_cpuset->cpus, size);
619 free(cpuset, M_TEMP);
620 break;
621 case VM_SET_INTINFO:
622 vmii = (struct vm_intinfo *)data;
623 error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1);
624 break;
625 case VM_GET_INTINFO:
626 vmii = (struct vm_intinfo *)data;
627 error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1,
628 &vmii->info2);
629 break;
630 case VM_RTC_WRITE:
631 rtcdata = (struct vm_rtc_data *)data;
632 error = vrtc_nvram_write(sc->vm, rtcdata->offset,
633 rtcdata->value);
634 break;
635 case VM_RTC_READ:
636 rtcdata = (struct vm_rtc_data *)data;
637 error = vrtc_nvram_read(sc->vm, rtcdata->offset,
638 &rtcdata->value);
639 break;
640 case VM_RTC_SETTIME:
641 rtctime = (struct vm_rtc_time *)data;
642 error = vrtc_set_time(sc->vm, rtctime->secs);
643 break;
644 case VM_RTC_GETTIME:
645 error = 0;
646 rtctime = (struct vm_rtc_time *)data;
647 rtctime->secs = vrtc_get_time(sc->vm);
648 break;
649 case VM_RESTART_INSTRUCTION:
650 error = vm_restart_instruction(sc->vm, vcpu);
651 break;
652 case VM_SET_TOPOLOGY:
653 topology = (struct vm_cpu_topology *)data;
654 error = vm_set_topology(sc->vm, topology->sockets,
655 topology->cores, topology->threads, topology->maxcpus);
656 break;
657 case VM_GET_TOPOLOGY:
658 topology = (struct vm_cpu_topology *)data;
659 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
660 &topology->threads, &topology->maxcpus);
661 error = 0;
662 break;
663 default:
664 error = ENOTTY;
665 break;
666 }
667
668 if (state_changed == 1)
669 vcpu_unlock_one(sc, vcpu);
670 else if (state_changed == 2)
671 vcpu_unlock_all(sc);
672
673 done:
674 /* Make sure that no handler returns a bogus value like ERESTART */
675 KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
676 return (error);
677 }
678
679 static int
680 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
681 struct vm_object **objp, int nprot)
682 {
683 struct vmmdev_softc *sc;
684 vm_paddr_t gpa;
685 size_t len;
686 vm_ooffset_t segoff, first, last;
687 int error, found, segid;
688 uint16_t lastcpu;
689 bool sysmem;
690
691 first = *offset;
692 last = first + mapsize;
693 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
694 return (EINVAL);
695
696 sc = vmmdev_lookup2(cdev);
697 if (sc == NULL) {
698 /* virtual machine is in the process of being created */
699 return (EINVAL);
700 }
701
702 /*
703 * Get a read lock on the guest memory map by freezing any vcpu.
704 */
705 lastcpu = vm_get_maxcpus(sc->vm) - 1;
706 error = vcpu_lock_one(sc, lastcpu);
707 if (error)
708 return (error);
709
710 gpa = 0;
711 found = 0;
712 while (!found) {
713 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
714 NULL, NULL);
715 if (error)
716 break;
717
718 if (first >= gpa && last <= gpa + len)
719 found = 1;
720 else
721 gpa += len;
722 }
723
724 if (found) {
725 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
726 KASSERT(error == 0 && *objp != NULL,
727 ("%s: invalid memory segment %d", __func__, segid));
728 if (sysmem) {
729 vm_object_reference(*objp);
730 *offset = segoff + (first - gpa);
731 } else {
732 error = EINVAL;
733 }
734 }
735 vcpu_unlock_one(sc, lastcpu);
736 return (error);
737 }
738
739 static void
740 vmmdev_destroy(void *arg)
741 {
742 struct vmmdev_softc *sc = arg;
743 struct devmem_softc *dsc;
744 int error;
745
746 error = vcpu_lock_all(sc);
747 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
748
749 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
750 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
751 SLIST_REMOVE_HEAD(&sc->devmem, link);
752 free(dsc->name, M_VMMDEV);
753 free(dsc, M_VMMDEV);
754 }
755
756 if (sc->cdev != NULL)
757 destroy_dev(sc->cdev);
758
759 if (sc->vm != NULL)
760 vm_destroy(sc->vm);
761
762 if ((sc->flags & VSC_LINKED) != 0) {
763 mtx_lock(&vmmdev_mtx);
764 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
765 mtx_unlock(&vmmdev_mtx);
766 }
767
768 free(sc, M_VMMDEV);
769 }
770
771 static int
772 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
773 {
774 int error;
775 char buf[VM_MAX_NAMELEN];
776 struct devmem_softc *dsc;
777 struct vmmdev_softc *sc;
778 struct cdev *cdev;
779
780 strlcpy(buf, "beavis", sizeof(buf));
781 error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
782 if (error != 0 || req->newptr == NULL)
783 return (error);
784
785 mtx_lock(&vmmdev_mtx);
786 sc = vmmdev_lookup(buf);
787 if (sc == NULL || sc->cdev == NULL) {
788 mtx_unlock(&vmmdev_mtx);
789 return (EINVAL);
790 }
791
792 /*
793 * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
794 * goes down to 0 so we should not do it again in the callback.
795 *
796 * Setting 'sc->cdev' to NULL is also used to indicate that the VM
797 * is scheduled for destruction.
798 */
799 cdev = sc->cdev;
800 sc->cdev = NULL;
801 mtx_unlock(&vmmdev_mtx);
802
803 /*
804 * Schedule all cdevs to be destroyed:
805 *
806 * - any new operations on the 'cdev' will return an error (ENXIO).
807 *
808 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
809 * be destroyed and the callback will be invoked in a taskqueue
810 * context.
811 *
812 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
813 */
814 SLIST_FOREACH(dsc, &sc->devmem, link) {
815 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
816 destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc);
817 }
818 destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
819 return (0);
820 }
821 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
822 NULL, 0, sysctl_vmm_destroy, "A", NULL);
823
824 static struct cdevsw vmmdevsw = {
825 .d_name = "vmmdev",
826 .d_version = D_VERSION,
827 .d_ioctl = vmmdev_ioctl,
828 .d_mmap_single = vmmdev_mmap_single,
829 .d_read = vmmdev_rw,
830 .d_write = vmmdev_rw,
831 };
832
833 static int
834 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
835 {
836 int error;
837 struct vm *vm;
838 struct cdev *cdev;
839 struct vmmdev_softc *sc, *sc2;
840 char buf[VM_MAX_NAMELEN];
841
842 strlcpy(buf, "beavis", sizeof(buf));
843 error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
844 if (error != 0 || req->newptr == NULL)
845 return (error);
846
847 mtx_lock(&vmmdev_mtx);
848 sc = vmmdev_lookup(buf);
849 mtx_unlock(&vmmdev_mtx);
850 if (sc != NULL)
851 return (EEXIST);
852
853 error = vm_create(buf, &vm);
854 if (error != 0)
855 return (error);
856
857 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
858 sc->vm = vm;
859 SLIST_INIT(&sc->devmem);
860
861 /*
862 * Lookup the name again just in case somebody sneaked in when we
863 * dropped the lock.
864 */
865 mtx_lock(&vmmdev_mtx);
866 sc2 = vmmdev_lookup(buf);
867 if (sc2 == NULL) {
868 SLIST_INSERT_HEAD(&head, sc, link);
869 sc->flags |= VSC_LINKED;
870 }
871 mtx_unlock(&vmmdev_mtx);
872
873 if (sc2 != NULL) {
874 vmmdev_destroy(sc);
875 return (EEXIST);
876 }
877
878 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
879 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
880 if (error != 0) {
881 vmmdev_destroy(sc);
882 return (error);
883 }
884
885 mtx_lock(&vmmdev_mtx);
886 sc->cdev = cdev;
887 sc->cdev->si_drv1 = sc;
888 mtx_unlock(&vmmdev_mtx);
889
890 return (0);
891 }
892 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
893 NULL, 0, sysctl_vmm_create, "A", NULL);
894
895 void
896 vmmdev_init(void)
897 {
898 mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
899 }
900
901 int
902 vmmdev_cleanup(void)
903 {
904 int error;
905
906 if (SLIST_EMPTY(&head))
907 error = 0;
908 else
909 error = EBUSY;
910
911 return (error);
912 }
913
914 static int
915 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
916 struct vm_object **objp, int nprot)
917 {
918 struct devmem_softc *dsc;
919 vm_ooffset_t first, last;
920 size_t seglen;
921 int error;
922 uint16_t lastcpu;
923 bool sysmem;
924
925 dsc = cdev->si_drv1;
926 if (dsc == NULL) {
927 /* 'cdev' has been created but is not ready for use */
928 return (ENXIO);
929 }
930
931 first = *offset;
932 last = *offset + len;
933 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
934 return (EINVAL);
935
936 lastcpu = vm_get_maxcpus(dsc->sc->vm) - 1;
937 error = vcpu_lock_one(dsc->sc, lastcpu);
938 if (error)
939 return (error);
940
941 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
942 KASSERT(error == 0 && !sysmem && *objp != NULL,
943 ("%s: invalid devmem segment %d", __func__, dsc->segid));
944
945 vcpu_unlock_one(dsc->sc, lastcpu);
946
947 if (seglen >= last) {
948 vm_object_reference(*objp);
949 return (0);
950 } else {
951 return (EINVAL);
952 }
953 }
954
955 static struct cdevsw devmemsw = {
956 .d_name = "devmem",
957 .d_version = D_VERSION,
958 .d_mmap_single = devmem_mmap_single,
959 };
960
961 static int
962 devmem_create_cdev(const char *vmname, int segid, char *devname)
963 {
964 struct devmem_softc *dsc;
965 struct vmmdev_softc *sc;
966 struct cdev *cdev;
967 int error;
968
969 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
970 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
971 if (error)
972 return (error);
973
974 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
975
976 mtx_lock(&vmmdev_mtx);
977 sc = vmmdev_lookup(vmname);
978 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
979 if (sc->cdev == NULL) {
980 /* virtual machine is being created or destroyed */
981 mtx_unlock(&vmmdev_mtx);
982 free(dsc, M_VMMDEV);
983 destroy_dev_sched_cb(cdev, NULL, 0);
984 return (ENODEV);
985 }
986
987 dsc->segid = segid;
988 dsc->name = devname;
989 dsc->cdev = cdev;
990 dsc->sc = sc;
991 SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
992 mtx_unlock(&vmmdev_mtx);
993
994 /* The 'cdev' is ready for use after 'si_drv1' is initialized */
995 cdev->si_drv1 = dsc;
996 return (0);
997 }
998
999 static void
1000 devmem_destroy(void *arg)
1001 {
1002 struct devmem_softc *dsc = arg;
1003
1004 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
1005 dsc->cdev = NULL;
1006 dsc->sc = NULL;
1007 }
Cache object: 671bf9d7ec08b20181044e8a91eaa594
|