The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/vmm/vmm_dev.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2011 NetApp, Inc.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  *
   28  * $FreeBSD$
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include "opt_bhyve_snapshot.h"
   35 
   36 #include <sys/param.h>
   37 #include <sys/kernel.h>
   38 #include <sys/jail.h>
   39 #include <sys/queue.h>
   40 #include <sys/lock.h>
   41 #include <sys/mutex.h>
   42 #include <sys/malloc.h>
   43 #include <sys/conf.h>
   44 #include <sys/sysctl.h>
   45 #include <sys/libkern.h>
   46 #include <sys/ioccom.h>
   47 #include <sys/mman.h>
   48 #include <sys/uio.h>
   49 #include <sys/proc.h>
   50 
   51 #include <vm/vm.h>
   52 #include <vm/pmap.h>
   53 #include <vm/vm_map.h>
   54 #include <vm/vm_object.h>
   55 
   56 #include <machine/vmparam.h>
   57 #include <machine/vmm.h>
   58 #include <machine/vmm_dev.h>
   59 #include <machine/vmm_instruction_emul.h>
   60 #include <machine/vmm_snapshot.h>
   61 #include <x86/apicreg.h>
   62 
   63 #include "vmm_lapic.h"
   64 #include "vmm_stat.h"
   65 #include "vmm_mem.h"
   66 #include "io/ppt.h"
   67 #include "io/vatpic.h"
   68 #include "io/vioapic.h"
   69 #include "io/vhpet.h"
   70 #include "io/vrtc.h"
   71 
   72 #ifdef COMPAT_FREEBSD13
   73 struct vm_stats_old {
   74         int             cpuid;                          /* in */
   75         int             num_entries;                    /* out */
   76         struct timeval  tv;
   77         uint64_t        statbuf[MAX_VM_STATS];
   78 };
   79 
   80 #define VM_STATS_OLD \
   81         _IOWR('v', IOCNUM_VM_STATS, struct vm_stats_old)
   82 #endif
   83 
   84 struct devmem_softc {
   85         int     segid;
   86         char    *name;
   87         struct cdev *cdev;
   88         struct vmmdev_softc *sc;
   89         SLIST_ENTRY(devmem_softc) link;
   90 };
   91 
   92 struct vmmdev_softc {
   93         struct vm       *vm;            /* vm instance cookie */
   94         struct cdev     *cdev;
   95         struct ucred    *ucred;
   96         SLIST_ENTRY(vmmdev_softc) link;
   97         SLIST_HEAD(, devmem_softc) devmem;
   98         int             flags;
   99 };
  100 #define VSC_LINKED              0x01
  101 
  102 static SLIST_HEAD(, vmmdev_softc) head;
  103 
  104 static unsigned pr_allow_flag;
  105 static struct mtx vmmdev_mtx;
  106 MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF);
  107 
  108 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
  109 
  110 SYSCTL_DECL(_hw_vmm);
  111 
  112 static int vmm_priv_check(struct ucred *ucred);
  113 static int devmem_create_cdev(const char *vmname, int id, char *devmem);
  114 static void devmem_destroy(void *arg);
  115 
  116 static int
  117 vmm_priv_check(struct ucred *ucred)
  118 {
  119 
  120         if (jailed(ucred) &&
  121             !(ucred->cr_prison->pr_allow & pr_allow_flag))
  122                 return (EPERM);
  123 
  124         return (0);
  125 }
  126 
  127 static int
  128 vcpu_lock_one(struct vcpu *vcpu)
  129 {
  130         return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
  131 }
  132 
  133 static void
  134 vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid, struct vcpu *vcpu)
  135 {
  136         enum vcpu_state state;
  137 
  138         state = vcpu_get_state(vcpu, NULL);
  139         if (state != VCPU_FROZEN) {
  140                 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
  141                     vcpuid, state);
  142         }
  143 
  144         vcpu_set_state(vcpu, VCPU_IDLE, false);
  145 }
  146 
  147 static int
  148 vcpu_lock_all(struct vmmdev_softc *sc)
  149 {
  150         struct vcpu *vcpu;
  151         int error;
  152         uint16_t i, j, maxcpus;
  153 
  154         vm_slock_vcpus(sc->vm);
  155         maxcpus = vm_get_maxcpus(sc->vm);
  156         for (i = 0; i < maxcpus; i++) {
  157                 vcpu = vm_vcpu(sc->vm, i);
  158                 if (vcpu == NULL)
  159                         continue;
  160                 error = vcpu_lock_one(vcpu);
  161                 if (error)
  162                         break;
  163         }
  164 
  165         if (error) {
  166                 for (j = 0; j < i; j++) {
  167                         vcpu = vm_vcpu(sc->vm, j);
  168                         if (vcpu == NULL)
  169                                 continue;
  170                         vcpu_unlock_one(sc, j, vcpu);
  171                 }
  172                 vm_unlock_vcpus(sc->vm);
  173         }
  174 
  175         return (error);
  176 }
  177 
  178 static void
  179 vcpu_unlock_all(struct vmmdev_softc *sc)
  180 {
  181         struct vcpu *vcpu;
  182         uint16_t i, maxcpus;
  183 
  184         maxcpus = vm_get_maxcpus(sc->vm);
  185         for (i = 0; i < maxcpus; i++) {
  186                 vcpu = vm_vcpu(sc->vm, i);
  187                 if (vcpu == NULL)
  188                         continue;
  189                 vcpu_unlock_one(sc, i, vcpu);
  190         }
  191         vm_unlock_vcpus(sc->vm);
  192 }
  193 
  194 static struct vmmdev_softc *
  195 vmmdev_lookup(const char *name)
  196 {
  197         struct vmmdev_softc *sc;
  198 
  199 #ifdef notyet   /* XXX kernel is not compiled with invariants */
  200         mtx_assert(&vmmdev_mtx, MA_OWNED);
  201 #endif
  202 
  203         SLIST_FOREACH(sc, &head, link) {
  204                 if (strcmp(name, vm_name(sc->vm)) == 0)
  205                         break;
  206         }
  207 
  208         if (sc == NULL)
  209                 return (NULL);
  210 
  211         if (cr_cansee(curthread->td_ucred, sc->ucred))
  212                 return (NULL);
  213 
  214         return (sc);
  215 }
  216 
  217 static struct vmmdev_softc *
  218 vmmdev_lookup2(struct cdev *cdev)
  219 {
  220 
  221         return (cdev->si_drv1);
  222 }
  223 
  224 static int
  225 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
  226 {
  227         int error, off, c, prot;
  228         vm_paddr_t gpa, maxaddr;
  229         void *hpa, *cookie;
  230         struct vmmdev_softc *sc;
  231 
  232         error = vmm_priv_check(curthread->td_ucred);
  233         if (error)
  234                 return (error);
  235 
  236         sc = vmmdev_lookup2(cdev);
  237         if (sc == NULL)
  238                 return (ENXIO);
  239 
  240         /*
  241          * Get a read lock on the guest memory map.
  242          */
  243         vm_slock_memsegs(sc->vm);
  244 
  245         prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
  246         maxaddr = vmm_sysmem_maxaddr(sc->vm);
  247         while (uio->uio_resid > 0 && error == 0) {
  248                 gpa = uio->uio_offset;
  249                 off = gpa & PAGE_MASK;
  250                 c = min(uio->uio_resid, PAGE_SIZE - off);
  251 
  252                 /*
  253                  * The VM has a hole in its physical memory map. If we want to
  254                  * use 'dd' to inspect memory beyond the hole we need to
  255                  * provide bogus data for memory that lies in the hole.
  256                  *
  257                  * Since this device does not support lseek(2), dd(1) will
  258                  * read(2) blocks of data to simulate the lseek(2).
  259                  */
  260                 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
  261                 if (hpa == NULL) {
  262                         if (uio->uio_rw == UIO_READ && gpa < maxaddr)
  263                                 error = uiomove(__DECONST(void *, zero_region),
  264                                     c, uio);
  265                         else
  266                                 error = EFAULT;
  267                 } else {
  268                         error = uiomove(hpa, c, uio);
  269                         vm_gpa_release(cookie);
  270                 }
  271         }
  272         vm_unlock_memsegs(sc->vm);
  273         return (error);
  274 }
  275 
  276 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
  277 
  278 static int
  279 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
  280 {
  281         struct devmem_softc *dsc;
  282         int error;
  283         bool sysmem;
  284 
  285         error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
  286         if (error || mseg->len == 0)
  287                 return (error);
  288 
  289         if (!sysmem) {
  290                 SLIST_FOREACH(dsc, &sc->devmem, link) {
  291                         if (dsc->segid == mseg->segid)
  292                                 break;
  293                 }
  294                 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
  295                     __func__, mseg->segid));
  296                 error = copystr(dsc->name, mseg->name, len, NULL);
  297         } else {
  298                 bzero(mseg->name, len);
  299         }
  300 
  301         return (error);
  302 }
  303 
  304 static int
  305 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
  306 {
  307         char *name;
  308         int error;
  309         bool sysmem;
  310 
  311         error = 0;
  312         name = NULL;
  313         sysmem = true;
  314 
  315         /*
  316          * The allocation is lengthened by 1 to hold a terminating NUL.  It'll
  317          * by stripped off when devfs processes the full string.
  318          */
  319         if (VM_MEMSEG_NAME(mseg)) {
  320                 sysmem = false;
  321                 name = malloc(len, M_VMMDEV, M_WAITOK);
  322                 error = copystr(mseg->name, name, len, NULL);
  323                 if (error)
  324                         goto done;
  325         }
  326 
  327         error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
  328         if (error)
  329                 goto done;
  330 
  331         if (VM_MEMSEG_NAME(mseg)) {
  332                 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
  333                 if (error)
  334                         vm_free_memseg(sc->vm, mseg->segid);
  335                 else
  336                         name = NULL;    /* freed when 'cdev' is destroyed */
  337         }
  338 done:
  339         free(name, M_VMMDEV);
  340         return (error);
  341 }
  342 
  343 static int
  344 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
  345     uint64_t *regval)
  346 {
  347         int error, i;
  348 
  349         error = 0;
  350         for (i = 0; i < count; i++) {
  351                 error = vm_get_register(vcpu, regnum[i], &regval[i]);
  352                 if (error)
  353                         break;
  354         }
  355         return (error);
  356 }
  357 
  358 static int
  359 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
  360     uint64_t *regval)
  361 {
  362         int error, i;
  363 
  364         error = 0;
  365         for (i = 0; i < count; i++) {
  366                 error = vm_set_register(vcpu, regnum[i], regval[i]);
  367                 if (error)
  368                         break;
  369         }
  370         return (error);
  371 }
  372 
  373 static int
  374 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
  375              struct thread *td)
  376 {
  377         int error, vcpuid, size;
  378         cpuset_t *cpuset;
  379         struct vmmdev_softc *sc;
  380         struct vcpu *vcpu;
  381         struct vm_register *vmreg;
  382         struct vm_seg_desc *vmsegdesc;
  383         struct vm_register_set *vmregset;
  384         struct vm_run *vmrun;
  385         struct vm_exception *vmexc;
  386         struct vm_lapic_irq *vmirq;
  387         struct vm_lapic_msi *vmmsi;
  388         struct vm_ioapic_irq *ioapic_irq;
  389         struct vm_isa_irq *isa_irq;
  390         struct vm_isa_irq_trigger *isa_irq_trigger;
  391         struct vm_capability *vmcap;
  392         struct vm_pptdev *pptdev;
  393         struct vm_pptdev_mmio *pptmmio;
  394         struct vm_pptdev_msi *pptmsi;
  395         struct vm_pptdev_msix *pptmsix;
  396 #ifdef COMPAT_FREEBSD13
  397         struct vm_stats_old *vmstats_old;
  398 #endif
  399         struct vm_stats *vmstats;
  400         struct vm_stat_desc *statdesc;
  401         struct vm_x2apic *x2apic;
  402         struct vm_gpa_pte *gpapte;
  403         struct vm_suspend *vmsuspend;
  404         struct vm_gla2gpa *gg;
  405         struct vm_cpuset *vm_cpuset;
  406         struct vm_intinfo *vmii;
  407         struct vm_rtc_time *rtctime;
  408         struct vm_rtc_data *rtcdata;
  409         struct vm_memmap *mm;
  410         struct vm_munmap *mu;
  411         struct vm_cpu_topology *topology;
  412         struct vm_readwrite_kernemu_device *kernemu;
  413         uint64_t *regvals;
  414         int *regnums;
  415         enum { NONE, SINGLE, ALL } vcpus_locked;
  416         bool memsegs_locked;
  417 #ifdef BHYVE_SNAPSHOT
  418         struct vm_snapshot_meta *snapshot_meta;
  419 #endif
  420 
  421         error = vmm_priv_check(curthread->td_ucred);
  422         if (error)
  423                 return (error);
  424 
  425         sc = vmmdev_lookup2(cdev);
  426         if (sc == NULL)
  427                 return (ENXIO);
  428 
  429         vcpuid = -1;
  430         vcpu = NULL;
  431         vcpus_locked = NONE;
  432         memsegs_locked = false;
  433 
  434         /*
  435          * For VMM ioctls that operate on a single vCPU, lookup the
  436          * vcpu.  For VMM ioctls which require one or more vCPUs to
  437          * not be running, lock necessary vCPUs.
  438          *
  439          * XXX fragile, handle with care
  440          * Most of these assume that the first field of the ioctl data
  441          * is the vcpuid.
  442          */
  443         switch (cmd) {
  444         case VM_RUN:
  445         case VM_GET_REGISTER:
  446         case VM_SET_REGISTER:
  447         case VM_GET_SEGMENT_DESCRIPTOR:
  448         case VM_SET_SEGMENT_DESCRIPTOR:
  449         case VM_GET_REGISTER_SET:
  450         case VM_SET_REGISTER_SET:
  451         case VM_INJECT_EXCEPTION:
  452         case VM_GET_CAPABILITY:
  453         case VM_SET_CAPABILITY:
  454         case VM_SET_X2APIC_STATE:
  455         case VM_GLA2GPA:
  456         case VM_GLA2GPA_NOFAULT:
  457         case VM_ACTIVATE_CPU:
  458         case VM_SET_INTINFO:
  459         case VM_GET_INTINFO:
  460         case VM_RESTART_INSTRUCTION:
  461         case VM_GET_KERNEMU_DEV:
  462         case VM_SET_KERNEMU_DEV:
  463                 /*
  464                  * ioctls that can operate only on vcpus that are not running.
  465                  */
  466                 vcpuid = *(int *)data;
  467                 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
  468                 if (vcpu == NULL) {
  469                         error = EINVAL;
  470                         goto done;
  471                 }
  472                 error = vcpu_lock_one(vcpu);
  473                 if (error)
  474                         goto done;
  475                 vcpus_locked = SINGLE;
  476                 break;
  477 
  478 #ifdef COMPAT_FREEBSD12
  479         case VM_ALLOC_MEMSEG_FBSD12:
  480 #endif
  481         case VM_ALLOC_MEMSEG:
  482         case VM_BIND_PPTDEV:
  483         case VM_UNBIND_PPTDEV:
  484         case VM_MMAP_MEMSEG:
  485         case VM_MUNMAP_MEMSEG:
  486         case VM_REINIT:
  487                 /*
  488                  * ioctls that modify the memory map must lock memory
  489                  * segments exclusively.
  490                  */
  491                 vm_xlock_memsegs(sc->vm);
  492                 memsegs_locked = true;
  493                 /* FALLTHROUGH */
  494         case VM_MAP_PPTDEV_MMIO:
  495         case VM_UNMAP_PPTDEV_MMIO:
  496 #ifdef BHYVE_SNAPSHOT
  497         case VM_SNAPSHOT_REQ:
  498         case VM_RESTORE_TIME:
  499 #endif
  500                 /*
  501                  * ioctls that operate on the entire virtual machine must
  502                  * prevent all vcpus from running.
  503                  */
  504                 error = vcpu_lock_all(sc);
  505                 if (error)
  506                         goto done;
  507                 vcpus_locked = ALL;
  508                 break;
  509 
  510 #ifdef COMPAT_FREEBSD12
  511         case VM_GET_MEMSEG_FBSD12:
  512 #endif
  513         case VM_GET_MEMSEG:
  514         case VM_MMAP_GETNEXT:
  515                 /*
  516                  * Lock the memory map while it is being inspected.
  517                  */
  518                 vm_slock_memsegs(sc->vm);
  519                 memsegs_locked = true;
  520                 break;
  521 
  522 #ifdef COMPAT_FREEBSD13
  523         case VM_STATS_OLD:
  524 #endif
  525         case VM_STATS:
  526         case VM_INJECT_NMI:
  527         case VM_LAPIC_IRQ:
  528         case VM_GET_X2APIC_STATE:
  529                 /*
  530                  * These do not need the vCPU locked but do operate on
  531                  * a specific vCPU.
  532                  */
  533                 vcpuid = *(int *)data;
  534                 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
  535                 if (vcpu == NULL) {
  536                         error = EINVAL;
  537                         goto done;
  538                 }
  539                 break;
  540 
  541         case VM_LAPIC_LOCAL_IRQ:
  542         case VM_SUSPEND_CPU:
  543         case VM_RESUME_CPU:
  544                 /*
  545                  * These can either operate on all CPUs via a vcpuid of
  546                  * -1 or on a specific vCPU.
  547                  */
  548                 vcpuid = *(int *)data;
  549                 if (vcpuid == -1)
  550                         break;
  551                 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
  552                 if (vcpu == NULL) {
  553                         error = EINVAL;
  554                         goto done;
  555                 }
  556                 break;
  557 
  558         default:
  559                 break;
  560         }
  561 
  562         switch(cmd) {
  563         case VM_RUN:
  564                 vmrun = (struct vm_run *)data;
  565                 error = vm_run(vcpu, &vmrun->vm_exit);
  566                 break;
  567         case VM_SUSPEND:
  568                 vmsuspend = (struct vm_suspend *)data;
  569                 error = vm_suspend(sc->vm, vmsuspend->how);
  570                 break;
  571         case VM_REINIT:
  572                 error = vm_reinit(sc->vm);
  573                 break;
  574         case VM_STAT_DESC: {
  575                 statdesc = (struct vm_stat_desc *)data;
  576                 error = vmm_stat_desc_copy(statdesc->index,
  577                                         statdesc->desc, sizeof(statdesc->desc));
  578                 break;
  579         }
  580 #ifdef COMPAT_FREEBSD13
  581         case VM_STATS_OLD:
  582                 vmstats_old = (struct vm_stats_old *)data;
  583                 getmicrotime(&vmstats_old->tv);
  584                 error = vmm_stat_copy(vcpu, 0,
  585                                       nitems(vmstats_old->statbuf),
  586                                       &vmstats_old->num_entries,
  587                                       vmstats_old->statbuf);
  588                 break;
  589 #endif
  590         case VM_STATS: {
  591                 vmstats = (struct vm_stats *)data;
  592                 getmicrotime(&vmstats->tv);
  593                 error = vmm_stat_copy(vcpu, vmstats->index,
  594                                       nitems(vmstats->statbuf),
  595                                       &vmstats->num_entries, vmstats->statbuf);
  596                 break;
  597         }
  598         case VM_PPTDEV_MSI:
  599                 pptmsi = (struct vm_pptdev_msi *)data;
  600                 error = ppt_setup_msi(sc->vm,
  601                                       pptmsi->bus, pptmsi->slot, pptmsi->func,
  602                                       pptmsi->addr, pptmsi->msg,
  603                                       pptmsi->numvec);
  604                 break;
  605         case VM_PPTDEV_MSIX:
  606                 pptmsix = (struct vm_pptdev_msix *)data;
  607                 error = ppt_setup_msix(sc->vm,
  608                                        pptmsix->bus, pptmsix->slot, 
  609                                        pptmsix->func, pptmsix->idx,
  610                                        pptmsix->addr, pptmsix->msg,
  611                                        pptmsix->vector_control);
  612                 break;
  613         case VM_PPTDEV_DISABLE_MSIX:
  614                 pptdev = (struct vm_pptdev *)data;
  615                 error = ppt_disable_msix(sc->vm, pptdev->bus, pptdev->slot,
  616                                          pptdev->func);
  617                 break;
  618         case VM_MAP_PPTDEV_MMIO:
  619                 pptmmio = (struct vm_pptdev_mmio *)data;
  620                 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
  621                                      pptmmio->func, pptmmio->gpa, pptmmio->len,
  622                                      pptmmio->hpa);
  623                 break;
  624         case VM_UNMAP_PPTDEV_MMIO:
  625                 pptmmio = (struct vm_pptdev_mmio *)data;
  626                 error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
  627                                        pptmmio->func, pptmmio->gpa, pptmmio->len);
  628                 break;
  629         case VM_BIND_PPTDEV:
  630                 pptdev = (struct vm_pptdev *)data;
  631                 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
  632                                          pptdev->func);
  633                 break;
  634         case VM_UNBIND_PPTDEV:
  635                 pptdev = (struct vm_pptdev *)data;
  636                 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
  637                                            pptdev->func);
  638                 break;
  639         case VM_INJECT_EXCEPTION:
  640                 vmexc = (struct vm_exception *)data;
  641                 error = vm_inject_exception(vcpu,
  642                     vmexc->vector, vmexc->error_code_valid, vmexc->error_code,
  643                     vmexc->restart_instruction);
  644                 break;
  645         case VM_INJECT_NMI:
  646                 error = vm_inject_nmi(vcpu);
  647                 break;
  648         case VM_LAPIC_IRQ:
  649                 vmirq = (struct vm_lapic_irq *)data;
  650                 error = lapic_intr_edge(vcpu, vmirq->vector);
  651                 break;
  652         case VM_LAPIC_LOCAL_IRQ:
  653                 vmirq = (struct vm_lapic_irq *)data;
  654                 error = lapic_set_local_intr(sc->vm, vcpu, vmirq->vector);
  655                 break;
  656         case VM_LAPIC_MSI:
  657                 vmmsi = (struct vm_lapic_msi *)data;
  658                 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
  659                 break;
  660         case VM_IOAPIC_ASSERT_IRQ:
  661                 ioapic_irq = (struct vm_ioapic_irq *)data;
  662                 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);
  663                 break;
  664         case VM_IOAPIC_DEASSERT_IRQ:
  665                 ioapic_irq = (struct vm_ioapic_irq *)data;
  666                 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq);
  667                 break;
  668         case VM_IOAPIC_PULSE_IRQ:
  669                 ioapic_irq = (struct vm_ioapic_irq *)data;
  670                 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq);
  671                 break;
  672         case VM_IOAPIC_PINCOUNT:
  673                 *(int *)data = vioapic_pincount(sc->vm);
  674                 break;
  675         case VM_SET_KERNEMU_DEV:
  676         case VM_GET_KERNEMU_DEV: {
  677                 mem_region_write_t mwrite;
  678                 mem_region_read_t mread;
  679                 bool arg;
  680 
  681                 kernemu = (void *)data;
  682 
  683                 if (kernemu->access_width > 0)
  684                         size = (1u << kernemu->access_width);
  685                 else
  686                         size = 1;
  687 
  688                 if (kernemu->gpa >= DEFAULT_APIC_BASE && kernemu->gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
  689                         mread = lapic_mmio_read;
  690                         mwrite = lapic_mmio_write;
  691                 } else if (kernemu->gpa >= VIOAPIC_BASE && kernemu->gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
  692                         mread = vioapic_mmio_read;
  693                         mwrite = vioapic_mmio_write;
  694                 } else if (kernemu->gpa >= VHPET_BASE && kernemu->gpa < VHPET_BASE + VHPET_SIZE) {
  695                         mread = vhpet_mmio_read;
  696                         mwrite = vhpet_mmio_write;
  697                 } else {
  698                         error = EINVAL;
  699                         break;
  700                 }
  701 
  702                 if (cmd == VM_SET_KERNEMU_DEV)
  703                         error = mwrite(vcpu, kernemu->gpa,
  704                             kernemu->value, size, &arg);
  705                 else
  706                         error = mread(vcpu, kernemu->gpa,
  707                             &kernemu->value, size, &arg);
  708                 break;
  709                 }
  710         case VM_ISA_ASSERT_IRQ:
  711                 isa_irq = (struct vm_isa_irq *)data;
  712                 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq);
  713                 if (error == 0 && isa_irq->ioapic_irq != -1)
  714                         error = vioapic_assert_irq(sc->vm,
  715                             isa_irq->ioapic_irq);
  716                 break;
  717         case VM_ISA_DEASSERT_IRQ:
  718                 isa_irq = (struct vm_isa_irq *)data;
  719                 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq);
  720                 if (error == 0 && isa_irq->ioapic_irq != -1)
  721                         error = vioapic_deassert_irq(sc->vm,
  722                             isa_irq->ioapic_irq);
  723                 break;
  724         case VM_ISA_PULSE_IRQ:
  725                 isa_irq = (struct vm_isa_irq *)data;
  726                 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq);
  727                 if (error == 0 && isa_irq->ioapic_irq != -1)
  728                         error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq);
  729                 break;
  730         case VM_ISA_SET_IRQ_TRIGGER:
  731                 isa_irq_trigger = (struct vm_isa_irq_trigger *)data;
  732                 error = vatpic_set_irq_trigger(sc->vm,
  733                     isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
  734                 break;
  735         case VM_MMAP_GETNEXT:
  736                 mm = (struct vm_memmap *)data;
  737                 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
  738                     &mm->segoff, &mm->len, &mm->prot, &mm->flags);
  739                 break;
  740         case VM_MMAP_MEMSEG:
  741                 mm = (struct vm_memmap *)data;
  742                 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
  743                     mm->len, mm->prot, mm->flags);
  744                 break;
  745         case VM_MUNMAP_MEMSEG:
  746                 mu = (struct vm_munmap *)data;
  747                 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
  748                 break;
  749 #ifdef COMPAT_FREEBSD12
  750         case VM_ALLOC_MEMSEG_FBSD12:
  751                 error = alloc_memseg(sc, (struct vm_memseg *)data,
  752                     sizeof(((struct vm_memseg_fbsd12 *)0)->name));
  753                 break;
  754 #endif
  755         case VM_ALLOC_MEMSEG:
  756                 error = alloc_memseg(sc, (struct vm_memseg *)data,
  757                     sizeof(((struct vm_memseg *)0)->name));
  758                 break;
  759 #ifdef COMPAT_FREEBSD12
  760         case VM_GET_MEMSEG_FBSD12:
  761                 error = get_memseg(sc, (struct vm_memseg *)data,
  762                     sizeof(((struct vm_memseg_fbsd12 *)0)->name));
  763                 break;
  764 #endif
  765         case VM_GET_MEMSEG:
  766                 error = get_memseg(sc, (struct vm_memseg *)data,
  767                     sizeof(((struct vm_memseg *)0)->name));
  768                 break;
  769         case VM_GET_REGISTER:
  770                 vmreg = (struct vm_register *)data;
  771                 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
  772                 break;
  773         case VM_SET_REGISTER:
  774                 vmreg = (struct vm_register *)data;
  775                 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
  776                 break;
  777         case VM_SET_SEGMENT_DESCRIPTOR:
  778                 vmsegdesc = (struct vm_seg_desc *)data;
  779                 error = vm_set_seg_desc(vcpu,
  780                                         vmsegdesc->regnum,
  781                                         &vmsegdesc->desc);
  782                 break;
  783         case VM_GET_SEGMENT_DESCRIPTOR:
  784                 vmsegdesc = (struct vm_seg_desc *)data;
  785                 error = vm_get_seg_desc(vcpu,
  786                                         vmsegdesc->regnum,
  787                                         &vmsegdesc->desc);
  788                 break;
  789         case VM_GET_REGISTER_SET:
  790                 vmregset = (struct vm_register_set *)data;
  791                 if (vmregset->count > VM_REG_LAST) {
  792                         error = EINVAL;
  793                         break;
  794                 }
  795                 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
  796                     M_WAITOK);
  797                 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
  798                     M_WAITOK);
  799                 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
  800                     vmregset->count);
  801                 if (error == 0)
  802                         error = vm_get_register_set(vcpu,
  803                             vmregset->count, regnums, regvals);
  804                 if (error == 0)
  805                         error = copyout(regvals, vmregset->regvals,
  806                             sizeof(regvals[0]) * vmregset->count);
  807                 free(regvals, M_VMMDEV);
  808                 free(regnums, M_VMMDEV);
  809                 break;
  810         case VM_SET_REGISTER_SET:
  811                 vmregset = (struct vm_register_set *)data;
  812                 if (vmregset->count > VM_REG_LAST) {
  813                         error = EINVAL;
  814                         break;
  815                 }
  816                 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
  817                     M_WAITOK);
  818                 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
  819                     M_WAITOK);
  820                 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
  821                     vmregset->count);
  822                 if (error == 0)
  823                         error = copyin(vmregset->regvals, regvals,
  824                             sizeof(regvals[0]) * vmregset->count);
  825                 if (error == 0)
  826                         error = vm_set_register_set(vcpu,
  827                             vmregset->count, regnums, regvals);
  828                 free(regvals, M_VMMDEV);
  829                 free(regnums, M_VMMDEV);
  830                 break;
  831         case VM_GET_CAPABILITY:
  832                 vmcap = (struct vm_capability *)data;
  833                 error = vm_get_capability(vcpu,
  834                                           vmcap->captype,
  835                                           &vmcap->capval);
  836                 break;
  837         case VM_SET_CAPABILITY:
  838                 vmcap = (struct vm_capability *)data;
  839                 error = vm_set_capability(vcpu,
  840                                           vmcap->captype,
  841                                           vmcap->capval);
  842                 break;
  843         case VM_SET_X2APIC_STATE:
  844                 x2apic = (struct vm_x2apic *)data;
  845                 error = vm_set_x2apic_state(vcpu, x2apic->state);
  846                 break;
  847         case VM_GET_X2APIC_STATE:
  848                 x2apic = (struct vm_x2apic *)data;
  849                 error = vm_get_x2apic_state(vcpu, &x2apic->state);
  850                 break;
  851         case VM_GET_GPA_PMAP:
  852                 gpapte = (struct vm_gpa_pte *)data;
  853                 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
  854                                  gpapte->gpa, gpapte->pte, &gpapte->ptenum);
  855                 error = 0;
  856                 break;
  857         case VM_GET_HPET_CAPABILITIES:
  858                 error = vhpet_getcap((struct vm_hpet_cap *)data);
  859                 break;
  860         case VM_GLA2GPA: {
  861                 CTASSERT(PROT_READ == VM_PROT_READ);
  862                 CTASSERT(PROT_WRITE == VM_PROT_WRITE);
  863                 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
  864                 gg = (struct vm_gla2gpa *)data;
  865                 error = vm_gla2gpa(vcpu, &gg->paging, gg->gla,
  866                     gg->prot, &gg->gpa, &gg->fault);
  867                 KASSERT(error == 0 || error == EFAULT,
  868                     ("%s: vm_gla2gpa unknown error %d", __func__, error));
  869                 break;
  870         }
  871         case VM_GLA2GPA_NOFAULT:
  872                 gg = (struct vm_gla2gpa *)data;
  873                 error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla,
  874                     gg->prot, &gg->gpa, &gg->fault);
  875                 KASSERT(error == 0 || error == EFAULT,
  876                     ("%s: vm_gla2gpa unknown error %d", __func__, error));
  877                 break;
  878         case VM_ACTIVATE_CPU:
  879                 error = vm_activate_cpu(vcpu);
  880                 break;
  881         case VM_GET_CPUS:
  882                 error = 0;
  883                 vm_cpuset = (struct vm_cpuset *)data;
  884                 size = vm_cpuset->cpusetsize;
  885                 if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
  886                         error = ERANGE;
  887                         break;
  888                 }
  889                 cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
  890                 if (vm_cpuset->which == VM_ACTIVE_CPUS)
  891                         *cpuset = vm_active_cpus(sc->vm);
  892                 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
  893                         *cpuset = vm_suspended_cpus(sc->vm);
  894                 else if (vm_cpuset->which == VM_DEBUG_CPUS)
  895                         *cpuset = vm_debug_cpus(sc->vm);
  896                 else
  897                         error = EINVAL;
  898                 if (error == 0)
  899                         error = copyout(cpuset, vm_cpuset->cpus, size);
  900                 free(cpuset, M_TEMP);
  901                 break;
  902         case VM_SUSPEND_CPU:
  903                 error = vm_suspend_cpu(sc->vm, vcpu);
  904                 break;
  905         case VM_RESUME_CPU:
  906                 error = vm_resume_cpu(sc->vm, vcpu);
  907                 break;
  908         case VM_SET_INTINFO:
  909                 vmii = (struct vm_intinfo *)data;
  910                 error = vm_exit_intinfo(vcpu, vmii->info1);
  911                 break;
  912         case VM_GET_INTINFO:
  913                 vmii = (struct vm_intinfo *)data;
  914                 error = vm_get_intinfo(vcpu, &vmii->info1, &vmii->info2);
  915                 break;
  916         case VM_RTC_WRITE:
  917                 rtcdata = (struct vm_rtc_data *)data;
  918                 error = vrtc_nvram_write(sc->vm, rtcdata->offset,
  919                     rtcdata->value);
  920                 break;
  921         case VM_RTC_READ:
  922                 rtcdata = (struct vm_rtc_data *)data;
  923                 error = vrtc_nvram_read(sc->vm, rtcdata->offset,
  924                     &rtcdata->value);
  925                 break;
  926         case VM_RTC_SETTIME:
  927                 rtctime = (struct vm_rtc_time *)data;
  928                 error = vrtc_set_time(sc->vm, rtctime->secs);
  929                 break;
  930         case VM_RTC_GETTIME:
  931                 error = 0;
  932                 rtctime = (struct vm_rtc_time *)data;
  933                 rtctime->secs = vrtc_get_time(sc->vm);
  934                 break;
  935         case VM_RESTART_INSTRUCTION:
  936                 error = vm_restart_instruction(vcpu);
  937                 break;
  938         case VM_SET_TOPOLOGY:
  939                 topology = (struct vm_cpu_topology *)data;
  940                 error = vm_set_topology(sc->vm, topology->sockets,
  941                     topology->cores, topology->threads, topology->maxcpus);
  942                 break;
  943         case VM_GET_TOPOLOGY:
  944                 topology = (struct vm_cpu_topology *)data;
  945                 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
  946                     &topology->threads, &topology->maxcpus);
  947                 error = 0;
  948                 break;
  949 #ifdef BHYVE_SNAPSHOT
  950         case VM_SNAPSHOT_REQ:
  951                 snapshot_meta = (struct vm_snapshot_meta *)data;
  952                 error = vm_snapshot_req(sc->vm, snapshot_meta);
  953                 break;
  954         case VM_RESTORE_TIME:
  955                 error = vm_restore_time(sc->vm);
  956                 break;
  957 #endif
  958         default:
  959                 error = ENOTTY;
  960                 break;
  961         }
  962 
  963         if (vcpus_locked == SINGLE)
  964                 vcpu_unlock_one(sc, vcpuid, vcpu);
  965         else if (vcpus_locked == ALL)
  966                 vcpu_unlock_all(sc);
  967         if (memsegs_locked)
  968                 vm_unlock_memsegs(sc->vm);
  969 
  970 done:
  971         /*
  972          * Make sure that no handler returns a kernel-internal
  973          * error value to userspace.
  974          */
  975         KASSERT(error == ERESTART || error >= 0,
  976             ("vmmdev_ioctl: invalid error return %d", error));
  977         return (error);
  978 }
  979 
  980 static int
  981 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
  982     struct vm_object **objp, int nprot)
  983 {
  984         struct vmmdev_softc *sc;
  985         vm_paddr_t gpa;
  986         size_t len;
  987         vm_ooffset_t segoff, first, last;
  988         int error, found, segid;
  989         bool sysmem;
  990 
  991         error = vmm_priv_check(curthread->td_ucred);
  992         if (error)
  993                 return (error);
  994 
  995         first = *offset;
  996         last = first + mapsize;
  997         if ((nprot & PROT_EXEC) || first < 0 || first >= last)
  998                 return (EINVAL);
  999 
 1000         sc = vmmdev_lookup2(cdev);
 1001         if (sc == NULL) {
 1002                 /* virtual machine is in the process of being created */
 1003                 return (EINVAL);
 1004         }
 1005 
 1006         /*
 1007          * Get a read lock on the guest memory map.
 1008          */
 1009         vm_slock_memsegs(sc->vm);
 1010 
 1011         gpa = 0;
 1012         found = 0;
 1013         while (!found) {
 1014                 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
 1015                     NULL, NULL);
 1016                 if (error)
 1017                         break;
 1018 
 1019                 if (first >= gpa && last <= gpa + len)
 1020                         found = 1;
 1021                 else
 1022                         gpa += len;
 1023         }
 1024 
 1025         if (found) {
 1026                 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
 1027                 KASSERT(error == 0 && *objp != NULL,
 1028                     ("%s: invalid memory segment %d", __func__, segid));
 1029                 if (sysmem) {
 1030                         vm_object_reference(*objp);
 1031                         *offset = segoff + (first - gpa);
 1032                 } else {
 1033                         error = EINVAL;
 1034                 }
 1035         }
 1036         vm_unlock_memsegs(sc->vm);
 1037         return (error);
 1038 }
 1039 
 1040 static void
 1041 vmmdev_destroy(void *arg)
 1042 {
 1043         struct vmmdev_softc *sc = arg;
 1044         struct devmem_softc *dsc;
 1045         int error __diagused;
 1046 
 1047         vm_disable_vcpu_creation(sc->vm);
 1048         error = vcpu_lock_all(sc);
 1049         KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
 1050         vm_unlock_vcpus(sc->vm);
 1051 
 1052         while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
 1053                 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
 1054                 SLIST_REMOVE_HEAD(&sc->devmem, link);
 1055                 free(dsc->name, M_VMMDEV);
 1056                 free(dsc, M_VMMDEV);
 1057         }
 1058 
 1059         if (sc->cdev != NULL)
 1060                 destroy_dev(sc->cdev);
 1061 
 1062         if (sc->vm != NULL)
 1063                 vm_destroy(sc->vm);
 1064 
 1065         if (sc->ucred != NULL)
 1066                 crfree(sc->ucred);
 1067 
 1068         if ((sc->flags & VSC_LINKED) != 0) {
 1069                 mtx_lock(&vmmdev_mtx);
 1070                 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
 1071                 mtx_unlock(&vmmdev_mtx);
 1072         }
 1073 
 1074         free(sc, M_VMMDEV);
 1075 }
 1076 
 1077 static int
 1078 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
 1079 {
 1080         struct devmem_softc *dsc;
 1081         struct vmmdev_softc *sc;
 1082         struct cdev *cdev;
 1083         char *buf;
 1084         int error, buflen;
 1085 
 1086         error = vmm_priv_check(req->td->td_ucred);
 1087         if (error)
 1088                 return (error);
 1089 
 1090         buflen = VM_MAX_NAMELEN + 1;
 1091         buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
 1092         strlcpy(buf, "beavis", buflen);
 1093         error = sysctl_handle_string(oidp, buf, buflen, req);
 1094         if (error != 0 || req->newptr == NULL)
 1095                 goto out;
 1096 
 1097         mtx_lock(&vmmdev_mtx);
 1098         sc = vmmdev_lookup(buf);
 1099         if (sc == NULL || sc->cdev == NULL) {
 1100                 mtx_unlock(&vmmdev_mtx);
 1101                 error = EINVAL;
 1102                 goto out;
 1103         }
 1104 
 1105         /*
 1106          * Setting 'sc->cdev' to NULL is used to indicate that the VM
 1107          * is scheduled for destruction.
 1108          */
 1109         cdev = sc->cdev;
 1110         sc->cdev = NULL;                
 1111         mtx_unlock(&vmmdev_mtx);
 1112 
 1113         /*
 1114          * Destroy all cdevs:
 1115          *
 1116          * - any new operations on the 'cdev' will return an error (ENXIO).
 1117          *
 1118          * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
 1119          */
 1120         SLIST_FOREACH(dsc, &sc->devmem, link) {
 1121                 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
 1122                 destroy_dev(dsc->cdev);
 1123                 devmem_destroy(dsc);
 1124         }
 1125         destroy_dev(cdev);
 1126         vmmdev_destroy(sc);
 1127         error = 0;
 1128 
 1129 out:
 1130         free(buf, M_VMMDEV);
 1131         return (error);
 1132 }
 1133 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
 1134     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
 1135     NULL, 0, sysctl_vmm_destroy, "A",
 1136     NULL);
 1137 
 1138 static struct cdevsw vmmdevsw = {
 1139         .d_name         = "vmmdev",
 1140         .d_version      = D_VERSION,
 1141         .d_ioctl        = vmmdev_ioctl,
 1142         .d_mmap_single  = vmmdev_mmap_single,
 1143         .d_read         = vmmdev_rw,
 1144         .d_write        = vmmdev_rw,
 1145 };
 1146 
 1147 static int
 1148 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
 1149 {
 1150         struct vm *vm;
 1151         struct cdev *cdev;
 1152         struct vmmdev_softc *sc, *sc2;
 1153         char *buf;
 1154         int error, buflen;
 1155 
 1156         error = vmm_priv_check(req->td->td_ucred);
 1157         if (error)
 1158                 return (error);
 1159 
 1160         buflen = VM_MAX_NAMELEN + 1;
 1161         buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
 1162         strlcpy(buf, "beavis", buflen);
 1163         error = sysctl_handle_string(oidp, buf, buflen, req);
 1164         if (error != 0 || req->newptr == NULL)
 1165                 goto out;
 1166 
 1167         mtx_lock(&vmmdev_mtx);
 1168         sc = vmmdev_lookup(buf);
 1169         mtx_unlock(&vmmdev_mtx);
 1170         if (sc != NULL) {
 1171                 error = EEXIST;
 1172                 goto out;
 1173         }
 1174 
 1175         error = vm_create(buf, &vm);
 1176         if (error != 0)
 1177                 goto out;
 1178 
 1179         sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
 1180         sc->ucred = crhold(curthread->td_ucred);
 1181         sc->vm = vm;
 1182         SLIST_INIT(&sc->devmem);
 1183 
 1184         /*
 1185          * Lookup the name again just in case somebody sneaked in when we
 1186          * dropped the lock.
 1187          */
 1188         mtx_lock(&vmmdev_mtx);
 1189         sc2 = vmmdev_lookup(buf);
 1190         if (sc2 == NULL) {
 1191                 SLIST_INSERT_HEAD(&head, sc, link);
 1192                 sc->flags |= VSC_LINKED;
 1193         }
 1194         mtx_unlock(&vmmdev_mtx);
 1195 
 1196         if (sc2 != NULL) {
 1197                 vmmdev_destroy(sc);
 1198                 error = EEXIST;
 1199                 goto out;
 1200         }
 1201 
 1202         error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred,
 1203             UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
 1204         if (error != 0) {
 1205                 vmmdev_destroy(sc);
 1206                 goto out;
 1207         }
 1208 
 1209         mtx_lock(&vmmdev_mtx);
 1210         sc->cdev = cdev;
 1211         sc->cdev->si_drv1 = sc;
 1212         mtx_unlock(&vmmdev_mtx);
 1213 
 1214 out:
 1215         free(buf, M_VMMDEV);
 1216         return (error);
 1217 }
 1218 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
 1219     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
 1220     NULL, 0, sysctl_vmm_create, "A",
 1221     NULL);
 1222 
 1223 void
 1224 vmmdev_init(void)
 1225 {
 1226         pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
 1227             "Allow use of vmm in a jail.");
 1228 }
 1229 
 1230 int
 1231 vmmdev_cleanup(void)
 1232 {
 1233         int error;
 1234 
 1235         if (SLIST_EMPTY(&head))
 1236                 error = 0;
 1237         else
 1238                 error = EBUSY;
 1239 
 1240         return (error);
 1241 }
 1242 
 1243 static int
 1244 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
 1245     struct vm_object **objp, int nprot)
 1246 {
 1247         struct devmem_softc *dsc;
 1248         vm_ooffset_t first, last;
 1249         size_t seglen;
 1250         int error;
 1251         bool sysmem;
 1252 
 1253         dsc = cdev->si_drv1;
 1254         if (dsc == NULL) {
 1255                 /* 'cdev' has been created but is not ready for use */
 1256                 return (ENXIO);
 1257         }
 1258 
 1259         first = *offset;
 1260         last = *offset + len;
 1261         if ((nprot & PROT_EXEC) || first < 0 || first >= last)
 1262                 return (EINVAL);
 1263 
 1264         vm_slock_memsegs(dsc->sc->vm);
 1265 
 1266         error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
 1267         KASSERT(error == 0 && !sysmem && *objp != NULL,
 1268             ("%s: invalid devmem segment %d", __func__, dsc->segid));
 1269 
 1270         if (seglen >= last)
 1271                 vm_object_reference(*objp);
 1272         else
 1273                 error = EINVAL;
 1274 
 1275         vm_unlock_memsegs(dsc->sc->vm);
 1276         return (error);
 1277 }
 1278 
 1279 static struct cdevsw devmemsw = {
 1280         .d_name         = "devmem",
 1281         .d_version      = D_VERSION,
 1282         .d_mmap_single  = devmem_mmap_single,
 1283 };
 1284 
 1285 static int
 1286 devmem_create_cdev(const char *vmname, int segid, char *devname)
 1287 {
 1288         struct devmem_softc *dsc;
 1289         struct vmmdev_softc *sc;
 1290         struct cdev *cdev;
 1291         int error;
 1292 
 1293         error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
 1294             UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
 1295         if (error)
 1296                 return (error);
 1297 
 1298         dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
 1299 
 1300         mtx_lock(&vmmdev_mtx);
 1301         sc = vmmdev_lookup(vmname);
 1302         KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
 1303         if (sc->cdev == NULL) {
 1304                 /* virtual machine is being created or destroyed */
 1305                 mtx_unlock(&vmmdev_mtx);
 1306                 free(dsc, M_VMMDEV);
 1307                 destroy_dev_sched_cb(cdev, NULL, 0);
 1308                 return (ENODEV);
 1309         }
 1310 
 1311         dsc->segid = segid;
 1312         dsc->name = devname;
 1313         dsc->cdev = cdev;
 1314         dsc->sc = sc;
 1315         SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
 1316         mtx_unlock(&vmmdev_mtx);
 1317 
 1318         /* The 'cdev' is ready for use after 'si_drv1' is initialized */
 1319         cdev->si_drv1 = dsc;
 1320         return (0);
 1321 }
 1322 
 1323 static void
 1324 devmem_destroy(void *arg)
 1325 {
 1326         struct devmem_softc *dsc = arg;
 1327 
 1328         KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
 1329         dsc->cdev = NULL;
 1330         dsc->sc = NULL;
 1331 }

Cache object: 38747bf53672a9fc6f01f49b35d7986b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.