The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/vmm/vmm_dev.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2011 NetApp, Inc.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  *
   28  * $FreeBSD: releng/12.0/sys/amd64/vmm/vmm_dev.c 337023 2018-08-01 00:39:21Z araujo $
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD: releng/12.0/sys/amd64/vmm/vmm_dev.c 337023 2018-08-01 00:39:21Z araujo $");
   33 
   34 #include <sys/param.h>
   35 #include <sys/kernel.h>
   36 #include <sys/jail.h>
   37 #include <sys/queue.h>
   38 #include <sys/lock.h>
   39 #include <sys/mutex.h>
   40 #include <sys/malloc.h>
   41 #include <sys/conf.h>
   42 #include <sys/sysctl.h>
   43 #include <sys/libkern.h>
   44 #include <sys/ioccom.h>
   45 #include <sys/mman.h>
   46 #include <sys/uio.h>
   47 #include <sys/proc.h>
   48 
   49 #include <vm/vm.h>
   50 #include <vm/pmap.h>
   51 #include <vm/vm_map.h>
   52 #include <vm/vm_object.h>
   53 
   54 #include <machine/vmparam.h>
   55 #include <machine/vmm.h>
   56 #include <machine/vmm_instruction_emul.h>
   57 #include <machine/vmm_dev.h>
   58 
   59 #include "vmm_lapic.h"
   60 #include "vmm_stat.h"
   61 #include "vmm_mem.h"
   62 #include "io/ppt.h"
   63 #include "io/vatpic.h"
   64 #include "io/vioapic.h"
   65 #include "io/vhpet.h"
   66 #include "io/vrtc.h"
   67 
   68 struct devmem_softc {
   69         int     segid;
   70         char    *name;
   71         struct cdev *cdev;
   72         struct vmmdev_softc *sc;
   73         SLIST_ENTRY(devmem_softc) link;
   74 };
   75 
   76 struct vmmdev_softc {
   77         struct vm       *vm;            /* vm instance cookie */
   78         struct cdev     *cdev;
   79         SLIST_ENTRY(vmmdev_softc) link;
   80         SLIST_HEAD(, devmem_softc) devmem;
   81         int             flags;
   82 };
   83 #define VSC_LINKED              0x01
   84 
   85 static SLIST_HEAD(, vmmdev_softc) head;
   86 
   87 static unsigned pr_allow_flag;
   88 static struct mtx vmmdev_mtx;
   89 
   90 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
   91 
   92 SYSCTL_DECL(_hw_vmm);
   93 
   94 static int vmm_priv_check(struct ucred *ucred);
   95 static int devmem_create_cdev(const char *vmname, int id, char *devmem);
   96 static void devmem_destroy(void *arg);
   97 
   98 static int
   99 vmm_priv_check(struct ucred *ucred)
  100 {
  101 
  102         if (jailed(ucred) &&
  103             !(ucred->cr_prison->pr_allow & pr_allow_flag))
  104                 return (EPERM);
  105 
  106         return (0);
  107 }
  108 
  109 static int
  110 vcpu_lock_one(struct vmmdev_softc *sc, int vcpu)
  111 {
  112         int error;
  113 
  114         if (vcpu < 0 || vcpu >= VM_MAXCPU)
  115                 return (EINVAL);
  116 
  117         error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
  118         return (error);
  119 }
  120 
  121 static void
  122 vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu)
  123 {
  124         enum vcpu_state state;
  125 
  126         state = vcpu_get_state(sc->vm, vcpu, NULL);
  127         if (state != VCPU_FROZEN) {
  128                 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
  129                     vcpu, state);
  130         }
  131 
  132         vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
  133 }
  134 
  135 static int
  136 vcpu_lock_all(struct vmmdev_softc *sc)
  137 {
  138         int error, vcpu;
  139 
  140         for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
  141                 error = vcpu_lock_one(sc, vcpu);
  142                 if (error)
  143                         break;
  144         }
  145 
  146         if (error) {
  147                 while (--vcpu >= 0)
  148                         vcpu_unlock_one(sc, vcpu);
  149         }
  150 
  151         return (error);
  152 }
  153 
  154 static void
  155 vcpu_unlock_all(struct vmmdev_softc *sc)
  156 {
  157         int vcpu;
  158 
  159         for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
  160                 vcpu_unlock_one(sc, vcpu);
  161 }
  162 
  163 static struct vmmdev_softc *
  164 vmmdev_lookup(const char *name)
  165 {
  166         struct vmmdev_softc *sc;
  167 
  168 #ifdef notyet   /* XXX kernel is not compiled with invariants */
  169         mtx_assert(&vmmdev_mtx, MA_OWNED);
  170 #endif
  171 
  172         SLIST_FOREACH(sc, &head, link) {
  173                 if (strcmp(name, vm_name(sc->vm)) == 0)
  174                         break;
  175         }
  176 
  177         return (sc);
  178 }
  179 
  180 static struct vmmdev_softc *
  181 vmmdev_lookup2(struct cdev *cdev)
  182 {
  183 
  184         return (cdev->si_drv1);
  185 }
  186 
  187 static int
  188 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
  189 {
  190         int error, off, c, prot;
  191         vm_paddr_t gpa, maxaddr;
  192         void *hpa, *cookie;
  193         struct vmmdev_softc *sc;
  194 
  195         error = vmm_priv_check(curthread->td_ucred);
  196         if (error)
  197                 return (error);
  198 
  199         sc = vmmdev_lookup2(cdev);
  200         if (sc == NULL)
  201                 return (ENXIO);
  202 
  203         /*
  204          * Get a read lock on the guest memory map by freezing any vcpu.
  205          */
  206         error = vcpu_lock_one(sc, VM_MAXCPU - 1);
  207         if (error)
  208                 return (error);
  209 
  210         prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
  211         maxaddr = vmm_sysmem_maxaddr(sc->vm);
  212         while (uio->uio_resid > 0 && error == 0) {
  213                 gpa = uio->uio_offset;
  214                 off = gpa & PAGE_MASK;
  215                 c = min(uio->uio_resid, PAGE_SIZE - off);
  216 
  217                 /*
  218                  * The VM has a hole in its physical memory map. If we want to
  219                  * use 'dd' to inspect memory beyond the hole we need to
  220                  * provide bogus data for memory that lies in the hole.
  221                  *
  222                  * Since this device does not support lseek(2), dd(1) will
  223                  * read(2) blocks of data to simulate the lseek(2).
  224                  */
  225                 hpa = vm_gpa_hold(sc->vm, VM_MAXCPU - 1, gpa, c, prot, &cookie);
  226                 if (hpa == NULL) {
  227                         if (uio->uio_rw == UIO_READ && gpa < maxaddr)
  228                                 error = uiomove(__DECONST(void *, zero_region),
  229                                     c, uio);
  230                         else
  231                                 error = EFAULT;
  232                 } else {
  233                         error = uiomove(hpa, c, uio);
  234                         vm_gpa_release(cookie);
  235                 }
  236         }
  237         vcpu_unlock_one(sc, VM_MAXCPU - 1);
  238         return (error);
  239 }
  240 
  241 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1);
  242 
  243 static int
  244 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
  245 {
  246         struct devmem_softc *dsc;
  247         int error;
  248         bool sysmem;
  249 
  250         error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
  251         if (error || mseg->len == 0)
  252                 return (error);
  253 
  254         if (!sysmem) {
  255                 SLIST_FOREACH(dsc, &sc->devmem, link) {
  256                         if (dsc->segid == mseg->segid)
  257                                 break;
  258                 }
  259                 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
  260                     __func__, mseg->segid));
  261                 error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL);
  262         } else {
  263                 bzero(mseg->name, sizeof(mseg->name));
  264         }
  265 
  266         return (error);
  267 }
  268 
  269 static int
  270 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
  271 {
  272         char *name;
  273         int error;
  274         bool sysmem;
  275 
  276         error = 0;
  277         name = NULL;
  278         sysmem = true;
  279 
  280         if (VM_MEMSEG_NAME(mseg)) {
  281                 sysmem = false;
  282                 name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK);
  283                 error = copystr(mseg->name, name, SPECNAMELEN + 1, 0);
  284                 if (error)
  285                         goto done;
  286         }
  287 
  288         error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
  289         if (error)
  290                 goto done;
  291 
  292         if (VM_MEMSEG_NAME(mseg)) {
  293                 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
  294                 if (error)
  295                         vm_free_memseg(sc->vm, mseg->segid);
  296                 else
  297                         name = NULL;    /* freed when 'cdev' is destroyed */
  298         }
  299 done:
  300         free(name, M_VMMDEV);
  301         return (error);
  302 }
  303 
  304 static int
  305 vm_get_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum,
  306     uint64_t *regval)
  307 {
  308         int error, i;
  309 
  310         error = 0;
  311         for (i = 0; i < count; i++) {
  312                 error = vm_get_register(vm, vcpu, regnum[i], &regval[i]);
  313                 if (error)
  314                         break;
  315         }
  316         return (error);
  317 }
  318 
  319 static int
  320 vm_set_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum,
  321     uint64_t *regval)
  322 {
  323         int error, i;
  324 
  325         error = 0;
  326         for (i = 0; i < count; i++) {
  327                 error = vm_set_register(vm, vcpu, regnum[i], regval[i]);
  328                 if (error)
  329                         break;
  330         }
  331         return (error);
  332 }
  333 
  334 static int
  335 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
  336              struct thread *td)
  337 {
  338         int error, vcpu, state_changed, size;
  339         cpuset_t *cpuset;
  340         struct vmmdev_softc *sc;
  341         struct vm_register *vmreg;
  342         struct vm_seg_desc *vmsegdesc;
  343         struct vm_register_set *vmregset;
  344         struct vm_run *vmrun;
  345         struct vm_exception *vmexc;
  346         struct vm_lapic_irq *vmirq;
  347         struct vm_lapic_msi *vmmsi;
  348         struct vm_ioapic_irq *ioapic_irq;
  349         struct vm_isa_irq *isa_irq;
  350         struct vm_isa_irq_trigger *isa_irq_trigger;
  351         struct vm_capability *vmcap;
  352         struct vm_pptdev *pptdev;
  353         struct vm_pptdev_mmio *pptmmio;
  354         struct vm_pptdev_msi *pptmsi;
  355         struct vm_pptdev_msix *pptmsix;
  356         struct vm_nmi *vmnmi;
  357         struct vm_stats *vmstats;
  358         struct vm_stat_desc *statdesc;
  359         struct vm_x2apic *x2apic;
  360         struct vm_gpa_pte *gpapte;
  361         struct vm_suspend *vmsuspend;
  362         struct vm_gla2gpa *gg;
  363         struct vm_activate_cpu *vac;
  364         struct vm_cpuset *vm_cpuset;
  365         struct vm_intinfo *vmii;
  366         struct vm_rtc_time *rtctime;
  367         struct vm_rtc_data *rtcdata;
  368         struct vm_memmap *mm;
  369         struct vm_cpu_topology *topology;
  370         uint64_t *regvals;
  371         int *regnums;
  372 
  373         error = vmm_priv_check(curthread->td_ucred);
  374         if (error)
  375                 return (error);
  376 
  377         sc = vmmdev_lookup2(cdev);
  378         if (sc == NULL)
  379                 return (ENXIO);
  380 
  381         vcpu = -1;
  382         state_changed = 0;
  383 
  384         /*
  385          * Some VMM ioctls can operate only on vcpus that are not running.
  386          */
  387         switch (cmd) {
  388         case VM_RUN:
  389         case VM_GET_REGISTER:
  390         case VM_SET_REGISTER:
  391         case VM_GET_SEGMENT_DESCRIPTOR:
  392         case VM_SET_SEGMENT_DESCRIPTOR:
  393         case VM_GET_REGISTER_SET:
  394         case VM_SET_REGISTER_SET:
  395         case VM_INJECT_EXCEPTION:
  396         case VM_GET_CAPABILITY:
  397         case VM_SET_CAPABILITY:
  398         case VM_PPTDEV_MSI:
  399         case VM_PPTDEV_MSIX:
  400         case VM_SET_X2APIC_STATE:
  401         case VM_GLA2GPA:
  402         case VM_GLA2GPA_NOFAULT:
  403         case VM_ACTIVATE_CPU:
  404         case VM_SET_INTINFO:
  405         case VM_GET_INTINFO:
  406         case VM_RESTART_INSTRUCTION:
  407                 /*
  408                  * XXX fragile, handle with care
  409                  * Assumes that the first field of the ioctl data is the vcpu.
  410                  */
  411                 vcpu = *(int *)data;
  412                 error = vcpu_lock_one(sc, vcpu);
  413                 if (error)
  414                         goto done;
  415                 state_changed = 1;
  416                 break;
  417 
  418         case VM_MAP_PPTDEV_MMIO:
  419         case VM_BIND_PPTDEV:
  420         case VM_UNBIND_PPTDEV:
  421         case VM_ALLOC_MEMSEG:
  422         case VM_MMAP_MEMSEG:
  423         case VM_REINIT:
  424                 /*
  425                  * ioctls that operate on the entire virtual machine must
  426                  * prevent all vcpus from running.
  427                  */
  428                 error = vcpu_lock_all(sc);
  429                 if (error)
  430                         goto done;
  431                 state_changed = 2;
  432                 break;
  433 
  434         case VM_GET_MEMSEG:
  435         case VM_MMAP_GETNEXT:
  436                 /*
  437                  * Lock a vcpu to make sure that the memory map cannot be
  438                  * modified while it is being inspected.
  439                  */
  440                 vcpu = VM_MAXCPU - 1;
  441                 error = vcpu_lock_one(sc, vcpu);
  442                 if (error)
  443                         goto done;
  444                 state_changed = 1;
  445                 break;
  446 
  447         default:
  448                 break;
  449         }
  450 
  451         switch(cmd) {
  452         case VM_RUN:
  453                 vmrun = (struct vm_run *)data;
  454                 error = vm_run(sc->vm, vmrun);
  455                 break;
  456         case VM_SUSPEND:
  457                 vmsuspend = (struct vm_suspend *)data;
  458                 error = vm_suspend(sc->vm, vmsuspend->how);
  459                 break;
  460         case VM_REINIT:
  461                 error = vm_reinit(sc->vm);
  462                 break;
  463         case VM_STAT_DESC: {
  464                 statdesc = (struct vm_stat_desc *)data;
  465                 error = vmm_stat_desc_copy(statdesc->index,
  466                                         statdesc->desc, sizeof(statdesc->desc));
  467                 break;
  468         }
  469         case VM_STATS: {
  470                 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
  471                 vmstats = (struct vm_stats *)data;
  472                 getmicrotime(&vmstats->tv);
  473                 error = vmm_stat_copy(sc->vm, vmstats->cpuid,
  474                                       &vmstats->num_entries, vmstats->statbuf);
  475                 break;
  476         }
  477         case VM_PPTDEV_MSI:
  478                 pptmsi = (struct vm_pptdev_msi *)data;
  479                 error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
  480                                       pptmsi->bus, pptmsi->slot, pptmsi->func,
  481                                       pptmsi->addr, pptmsi->msg,
  482                                       pptmsi->numvec);
  483                 break;
  484         case VM_PPTDEV_MSIX:
  485                 pptmsix = (struct vm_pptdev_msix *)data;
  486                 error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
  487                                        pptmsix->bus, pptmsix->slot, 
  488                                        pptmsix->func, pptmsix->idx,
  489                                        pptmsix->addr, pptmsix->msg,
  490                                        pptmsix->vector_control);
  491                 break;
  492         case VM_MAP_PPTDEV_MMIO:
  493                 pptmmio = (struct vm_pptdev_mmio *)data;
  494                 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
  495                                      pptmmio->func, pptmmio->gpa, pptmmio->len,
  496                                      pptmmio->hpa);
  497                 break;
  498         case VM_BIND_PPTDEV:
  499                 pptdev = (struct vm_pptdev *)data;
  500                 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
  501                                          pptdev->func);
  502                 break;
  503         case VM_UNBIND_PPTDEV:
  504                 pptdev = (struct vm_pptdev *)data;
  505                 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
  506                                            pptdev->func);
  507                 break;
  508         case VM_INJECT_EXCEPTION:
  509                 vmexc = (struct vm_exception *)data;
  510                 error = vm_inject_exception(sc->vm, vmexc->cpuid,
  511                     vmexc->vector, vmexc->error_code_valid, vmexc->error_code,
  512                     vmexc->restart_instruction);
  513                 break;
  514         case VM_INJECT_NMI:
  515                 vmnmi = (struct vm_nmi *)data;
  516                 error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
  517                 break;
  518         case VM_LAPIC_IRQ:
  519                 vmirq = (struct vm_lapic_irq *)data;
  520                 error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector);
  521                 break;
  522         case VM_LAPIC_LOCAL_IRQ:
  523                 vmirq = (struct vm_lapic_irq *)data;
  524                 error = lapic_set_local_intr(sc->vm, vmirq->cpuid,
  525                     vmirq->vector);
  526                 break;
  527         case VM_LAPIC_MSI:
  528                 vmmsi = (struct vm_lapic_msi *)data;
  529                 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
  530                 break;
  531         case VM_IOAPIC_ASSERT_IRQ:
  532                 ioapic_irq = (struct vm_ioapic_irq *)data;
  533                 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);
  534                 break;
  535         case VM_IOAPIC_DEASSERT_IRQ:
  536                 ioapic_irq = (struct vm_ioapic_irq *)data;
  537                 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq);
  538                 break;
  539         case VM_IOAPIC_PULSE_IRQ:
  540                 ioapic_irq = (struct vm_ioapic_irq *)data;
  541                 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq);
  542                 break;
  543         case VM_IOAPIC_PINCOUNT:
  544                 *(int *)data = vioapic_pincount(sc->vm);
  545                 break;
  546         case VM_ISA_ASSERT_IRQ:
  547                 isa_irq = (struct vm_isa_irq *)data;
  548                 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq);
  549                 if (error == 0 && isa_irq->ioapic_irq != -1)
  550                         error = vioapic_assert_irq(sc->vm,
  551                             isa_irq->ioapic_irq);
  552                 break;
  553         case VM_ISA_DEASSERT_IRQ:
  554                 isa_irq = (struct vm_isa_irq *)data;
  555                 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq);
  556                 if (error == 0 && isa_irq->ioapic_irq != -1)
  557                         error = vioapic_deassert_irq(sc->vm,
  558                             isa_irq->ioapic_irq);
  559                 break;
  560         case VM_ISA_PULSE_IRQ:
  561                 isa_irq = (struct vm_isa_irq *)data;
  562                 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq);
  563                 if (error == 0 && isa_irq->ioapic_irq != -1)
  564                         error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq);
  565                 break;
  566         case VM_ISA_SET_IRQ_TRIGGER:
  567                 isa_irq_trigger = (struct vm_isa_irq_trigger *)data;
  568                 error = vatpic_set_irq_trigger(sc->vm,
  569                     isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
  570                 break;
  571         case VM_MMAP_GETNEXT:
  572                 mm = (struct vm_memmap *)data;
  573                 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
  574                     &mm->segoff, &mm->len, &mm->prot, &mm->flags);
  575                 break;
  576         case VM_MMAP_MEMSEG:
  577                 mm = (struct vm_memmap *)data;
  578                 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
  579                     mm->len, mm->prot, mm->flags);
  580                 break;
  581         case VM_ALLOC_MEMSEG:
  582                 error = alloc_memseg(sc, (struct vm_memseg *)data);
  583                 break;
  584         case VM_GET_MEMSEG:
  585                 error = get_memseg(sc, (struct vm_memseg *)data);
  586                 break;
  587         case VM_GET_REGISTER:
  588                 vmreg = (struct vm_register *)data;
  589                 error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
  590                                         &vmreg->regval);
  591                 break;
  592         case VM_SET_REGISTER:
  593                 vmreg = (struct vm_register *)data;
  594                 error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
  595                                         vmreg->regval);
  596                 break;
  597         case VM_SET_SEGMENT_DESCRIPTOR:
  598                 vmsegdesc = (struct vm_seg_desc *)data;
  599                 error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
  600                                         vmsegdesc->regnum,
  601                                         &vmsegdesc->desc);
  602                 break;
  603         case VM_GET_SEGMENT_DESCRIPTOR:
  604                 vmsegdesc = (struct vm_seg_desc *)data;
  605                 error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
  606                                         vmsegdesc->regnum,
  607                                         &vmsegdesc->desc);
  608                 break;
  609         case VM_GET_REGISTER_SET:
  610                 vmregset = (struct vm_register_set *)data;
  611                 if (vmregset->count > VM_REG_LAST) {
  612                         error = EINVAL;
  613                         break;
  614                 }
  615                 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
  616                     M_WAITOK);
  617                 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
  618                     M_WAITOK);
  619                 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
  620                     vmregset->count);
  621                 if (error == 0)
  622                         error = vm_get_register_set(sc->vm, vmregset->cpuid,
  623                             vmregset->count, regnums, regvals);
  624                 if (error == 0)
  625                         error = copyout(regvals, vmregset->regvals,
  626                             sizeof(regvals[0]) * vmregset->count);
  627                 free(regvals, M_VMMDEV);
  628                 free(regnums, M_VMMDEV);
  629                 break;
  630         case VM_SET_REGISTER_SET:
  631                 vmregset = (struct vm_register_set *)data;
  632                 if (vmregset->count > VM_REG_LAST) {
  633                         error = EINVAL;
  634                         break;
  635                 }
  636                 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
  637                     M_WAITOK);
  638                 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
  639                     M_WAITOK);
  640                 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
  641                     vmregset->count);
  642                 if (error == 0)
  643                         error = copyin(vmregset->regvals, regvals,
  644                             sizeof(regvals[0]) * vmregset->count);
  645                 if (error == 0)
  646                         error = vm_set_register_set(sc->vm, vmregset->cpuid,
  647                             vmregset->count, regnums, regvals);
  648                 free(regvals, M_VMMDEV);
  649                 free(regnums, M_VMMDEV);
  650                 break;
  651         case VM_GET_CAPABILITY:
  652                 vmcap = (struct vm_capability *)data;
  653                 error = vm_get_capability(sc->vm, vmcap->cpuid,
  654                                           vmcap->captype,
  655                                           &vmcap->capval);
  656                 break;
  657         case VM_SET_CAPABILITY:
  658                 vmcap = (struct vm_capability *)data;
  659                 error = vm_set_capability(sc->vm, vmcap->cpuid,
  660                                           vmcap->captype,
  661                                           vmcap->capval);
  662                 break;
  663         case VM_SET_X2APIC_STATE:
  664                 x2apic = (struct vm_x2apic *)data;
  665                 error = vm_set_x2apic_state(sc->vm,
  666                                             x2apic->cpuid, x2apic->state);
  667                 break;
  668         case VM_GET_X2APIC_STATE:
  669                 x2apic = (struct vm_x2apic *)data;
  670                 error = vm_get_x2apic_state(sc->vm,
  671                                             x2apic->cpuid, &x2apic->state);
  672                 break;
  673         case VM_GET_GPA_PMAP:
  674                 gpapte = (struct vm_gpa_pte *)data;
  675                 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
  676                                  gpapte->gpa, gpapte->pte, &gpapte->ptenum);
  677                 error = 0;
  678                 break;
  679         case VM_GET_HPET_CAPABILITIES:
  680                 error = vhpet_getcap((struct vm_hpet_cap *)data);
  681                 break;
  682         case VM_GLA2GPA: {
  683                 CTASSERT(PROT_READ == VM_PROT_READ);
  684                 CTASSERT(PROT_WRITE == VM_PROT_WRITE);
  685                 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
  686                 gg = (struct vm_gla2gpa *)data;
  687                 error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
  688                     gg->prot, &gg->gpa, &gg->fault);
  689                 KASSERT(error == 0 || error == EFAULT,
  690                     ("%s: vm_gla2gpa unknown error %d", __func__, error));
  691                 break;
  692         }
  693         case VM_GLA2GPA_NOFAULT:
  694                 gg = (struct vm_gla2gpa *)data;
  695                 error = vm_gla2gpa_nofault(sc->vm, gg->vcpuid, &gg->paging,
  696                     gg->gla, gg->prot, &gg->gpa, &gg->fault);
  697                 KASSERT(error == 0 || error == EFAULT,
  698                     ("%s: vm_gla2gpa unknown error %d", __func__, error));
  699                 break;
  700         case VM_ACTIVATE_CPU:
  701                 vac = (struct vm_activate_cpu *)data;
  702                 error = vm_activate_cpu(sc->vm, vac->vcpuid);
  703                 break;
  704         case VM_GET_CPUS:
  705                 error = 0;
  706                 vm_cpuset = (struct vm_cpuset *)data;
  707                 size = vm_cpuset->cpusetsize;
  708                 if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
  709                         error = ERANGE;
  710                         break;
  711                 }
  712                 cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
  713                 if (vm_cpuset->which == VM_ACTIVE_CPUS)
  714                         *cpuset = vm_active_cpus(sc->vm);
  715                 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
  716                         *cpuset = vm_suspended_cpus(sc->vm);
  717                 else if (vm_cpuset->which == VM_DEBUG_CPUS)
  718                         *cpuset = vm_debug_cpus(sc->vm);
  719                 else
  720                         error = EINVAL;
  721                 if (error == 0)
  722                         error = copyout(cpuset, vm_cpuset->cpus, size);
  723                 free(cpuset, M_TEMP);
  724                 break;
  725         case VM_SUSPEND_CPU:
  726                 vac = (struct vm_activate_cpu *)data;
  727                 error = vm_suspend_cpu(sc->vm, vac->vcpuid);
  728                 break;
  729         case VM_RESUME_CPU:
  730                 vac = (struct vm_activate_cpu *)data;
  731                 error = vm_resume_cpu(sc->vm, vac->vcpuid);
  732                 break;
  733         case VM_SET_INTINFO:
  734                 vmii = (struct vm_intinfo *)data;
  735                 error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1);
  736                 break;
  737         case VM_GET_INTINFO:
  738                 vmii = (struct vm_intinfo *)data;
  739                 error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1,
  740                     &vmii->info2);
  741                 break;
  742         case VM_RTC_WRITE:
  743                 rtcdata = (struct vm_rtc_data *)data;
  744                 error = vrtc_nvram_write(sc->vm, rtcdata->offset,
  745                     rtcdata->value);
  746                 break;
  747         case VM_RTC_READ:
  748                 rtcdata = (struct vm_rtc_data *)data;
  749                 error = vrtc_nvram_read(sc->vm, rtcdata->offset,
  750                     &rtcdata->value);
  751                 break;
  752         case VM_RTC_SETTIME:
  753                 rtctime = (struct vm_rtc_time *)data;
  754                 error = vrtc_set_time(sc->vm, rtctime->secs);
  755                 break;
  756         case VM_RTC_GETTIME:
  757                 error = 0;
  758                 rtctime = (struct vm_rtc_time *)data;
  759                 rtctime->secs = vrtc_get_time(sc->vm);
  760                 break;
  761         case VM_RESTART_INSTRUCTION:
  762                 error = vm_restart_instruction(sc->vm, vcpu);
  763                 break;
  764         case VM_SET_TOPOLOGY:
  765                 topology = (struct vm_cpu_topology *)data;
  766                 error = vm_set_topology(sc->vm, topology->sockets,
  767                     topology->cores, topology->threads, topology->maxcpus);
  768                 break;
  769         case VM_GET_TOPOLOGY:
  770                 topology = (struct vm_cpu_topology *)data;
  771                 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
  772                     &topology->threads, &topology->maxcpus);
  773                 error = 0;
  774                 break;
  775         default:
  776                 error = ENOTTY;
  777                 break;
  778         }
  779 
  780         if (state_changed == 1)
  781                 vcpu_unlock_one(sc, vcpu);
  782         else if (state_changed == 2)
  783                 vcpu_unlock_all(sc);
  784 
  785 done:
  786         /* Make sure that no handler returns a bogus value like ERESTART */
  787         KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
  788         return (error);
  789 }
  790 
  791 static int
  792 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
  793     struct vm_object **objp, int nprot)
  794 {
  795         struct vmmdev_softc *sc;
  796         vm_paddr_t gpa;
  797         size_t len;
  798         vm_ooffset_t segoff, first, last;
  799         int error, found, segid;
  800         bool sysmem;
  801 
  802         error = vmm_priv_check(curthread->td_ucred);
  803         if (error)
  804                 return (error);
  805 
  806         first = *offset;
  807         last = first + mapsize;
  808         if ((nprot & PROT_EXEC) || first < 0 || first >= last)
  809                 return (EINVAL);
  810 
  811         sc = vmmdev_lookup2(cdev);
  812         if (sc == NULL) {
  813                 /* virtual machine is in the process of being created */
  814                 return (EINVAL);
  815         }
  816 
  817         /*
  818          * Get a read lock on the guest memory map by freezing any vcpu.
  819          */
  820         error = vcpu_lock_one(sc, VM_MAXCPU - 1);
  821         if (error)
  822                 return (error);
  823 
  824         gpa = 0;
  825         found = 0;
  826         while (!found) {
  827                 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
  828                     NULL, NULL);
  829                 if (error)
  830                         break;
  831 
  832                 if (first >= gpa && last <= gpa + len)
  833                         found = 1;
  834                 else
  835                         gpa += len;
  836         }
  837 
  838         if (found) {
  839                 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
  840                 KASSERT(error == 0 && *objp != NULL,
  841                     ("%s: invalid memory segment %d", __func__, segid));
  842                 if (sysmem) {
  843                         vm_object_reference(*objp);
  844                         *offset = segoff + (first - gpa);
  845                 } else {
  846                         error = EINVAL;
  847                 }
  848         }
  849         vcpu_unlock_one(sc, VM_MAXCPU - 1);
  850         return (error);
  851 }
  852 
  853 static void
  854 vmmdev_destroy(void *arg)
  855 {
  856         struct vmmdev_softc *sc = arg;
  857         struct devmem_softc *dsc;
  858         int error;
  859 
  860         error = vcpu_lock_all(sc);
  861         KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
  862 
  863         while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
  864                 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
  865                 SLIST_REMOVE_HEAD(&sc->devmem, link);
  866                 free(dsc->name, M_VMMDEV);
  867                 free(dsc, M_VMMDEV);
  868         }
  869 
  870         if (sc->cdev != NULL)
  871                 destroy_dev(sc->cdev);
  872 
  873         if (sc->vm != NULL)
  874                 vm_destroy(sc->vm);
  875 
  876         if ((sc->flags & VSC_LINKED) != 0) {
  877                 mtx_lock(&vmmdev_mtx);
  878                 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
  879                 mtx_unlock(&vmmdev_mtx);
  880         }
  881 
  882         free(sc, M_VMMDEV);
  883 }
  884 
  885 static int
  886 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
  887 {
  888         int error;
  889         char buf[VM_MAX_NAMELEN];
  890         struct devmem_softc *dsc;
  891         struct vmmdev_softc *sc;
  892         struct cdev *cdev;
  893 
  894         error = vmm_priv_check(req->td->td_ucred);
  895         if (error)
  896                 return (error);
  897 
  898         strlcpy(buf, "beavis", sizeof(buf));
  899         error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
  900         if (error != 0 || req->newptr == NULL)
  901                 return (error);
  902 
  903         mtx_lock(&vmmdev_mtx);
  904         sc = vmmdev_lookup(buf);
  905         if (sc == NULL || sc->cdev == NULL) {
  906                 mtx_unlock(&vmmdev_mtx);
  907                 return (EINVAL);
  908         }
  909 
  910         /*
  911          * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
  912          * goes down to 0 so we should not do it again in the callback.
  913          *
  914          * Setting 'sc->cdev' to NULL is also used to indicate that the VM
  915          * is scheduled for destruction.
  916          */
  917         cdev = sc->cdev;
  918         sc->cdev = NULL;                
  919         mtx_unlock(&vmmdev_mtx);
  920 
  921         /*
  922          * Schedule all cdevs to be destroyed:
  923          *
  924          * - any new operations on the 'cdev' will return an error (ENXIO).
  925          *
  926          * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
  927          *   be destroyed and the callback will be invoked in a taskqueue
  928          *   context.
  929          *
  930          * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
  931          */
  932         SLIST_FOREACH(dsc, &sc->devmem, link) {
  933                 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
  934                 destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc);
  935         }
  936         destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
  937         return (0);
  938 }
  939 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
  940             CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON,
  941             NULL, 0, sysctl_vmm_destroy, "A", NULL);
  942 
  943 static struct cdevsw vmmdevsw = {
  944         .d_name         = "vmmdev",
  945         .d_version      = D_VERSION,
  946         .d_ioctl        = vmmdev_ioctl,
  947         .d_mmap_single  = vmmdev_mmap_single,
  948         .d_read         = vmmdev_rw,
  949         .d_write        = vmmdev_rw,
  950 };
  951 
  952 static int
  953 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
  954 {
  955         int error;
  956         struct vm *vm;
  957         struct cdev *cdev;
  958         struct vmmdev_softc *sc, *sc2;
  959         char buf[VM_MAX_NAMELEN];
  960 
  961         error = vmm_priv_check(req->td->td_ucred);
  962         if (error)
  963                 return (error);
  964 
  965         strlcpy(buf, "beavis", sizeof(buf));
  966         error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
  967         if (error != 0 || req->newptr == NULL)
  968                 return (error);
  969 
  970         mtx_lock(&vmmdev_mtx);
  971         sc = vmmdev_lookup(buf);
  972         mtx_unlock(&vmmdev_mtx);
  973         if (sc != NULL)
  974                 return (EEXIST);
  975 
  976         error = vm_create(buf, &vm);
  977         if (error != 0)
  978                 return (error);
  979 
  980         sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
  981         sc->vm = vm;
  982         SLIST_INIT(&sc->devmem);
  983 
  984         /*
  985          * Lookup the name again just in case somebody sneaked in when we
  986          * dropped the lock.
  987          */
  988         mtx_lock(&vmmdev_mtx);
  989         sc2 = vmmdev_lookup(buf);
  990         if (sc2 == NULL) {
  991                 SLIST_INSERT_HEAD(&head, sc, link);
  992                 sc->flags |= VSC_LINKED;
  993         }
  994         mtx_unlock(&vmmdev_mtx);
  995 
  996         if (sc2 != NULL) {
  997                 vmmdev_destroy(sc);
  998                 return (EEXIST);
  999         }
 1000 
 1001         error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
 1002                            UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
 1003         if (error != 0) {
 1004                 vmmdev_destroy(sc);
 1005                 return (error);
 1006         }
 1007 
 1008         mtx_lock(&vmmdev_mtx);
 1009         sc->cdev = cdev;
 1010         sc->cdev->si_drv1 = sc;
 1011         mtx_unlock(&vmmdev_mtx);
 1012 
 1013         return (0);
 1014 }
 1015 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
 1016             CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON,
 1017             NULL, 0, sysctl_vmm_create, "A", NULL);
 1018 
 1019 void
 1020 vmmdev_init(void)
 1021 {
 1022         mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
 1023         pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
 1024             "Allow use of vmm in a jail.");
 1025 }
 1026 
 1027 int
 1028 vmmdev_cleanup(void)
 1029 {
 1030         int error;
 1031 
 1032         if (SLIST_EMPTY(&head))
 1033                 error = 0;
 1034         else
 1035                 error = EBUSY;
 1036 
 1037         return (error);
 1038 }
 1039 
 1040 static int
 1041 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
 1042     struct vm_object **objp, int nprot)
 1043 {
 1044         struct devmem_softc *dsc;
 1045         vm_ooffset_t first, last;
 1046         size_t seglen;
 1047         int error;
 1048         bool sysmem;
 1049 
 1050         dsc = cdev->si_drv1;
 1051         if (dsc == NULL) {
 1052                 /* 'cdev' has been created but is not ready for use */
 1053                 return (ENXIO);
 1054         }
 1055 
 1056         first = *offset;
 1057         last = *offset + len;
 1058         if ((nprot & PROT_EXEC) || first < 0 || first >= last)
 1059                 return (EINVAL);
 1060 
 1061         error = vcpu_lock_one(dsc->sc, VM_MAXCPU - 1);
 1062         if (error)
 1063                 return (error);
 1064 
 1065         error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
 1066         KASSERT(error == 0 && !sysmem && *objp != NULL,
 1067             ("%s: invalid devmem segment %d", __func__, dsc->segid));
 1068 
 1069         vcpu_unlock_one(dsc->sc, VM_MAXCPU - 1);
 1070 
 1071         if (seglen >= last) {
 1072                 vm_object_reference(*objp);
 1073                 return (0);
 1074         } else {
 1075                 return (EINVAL);
 1076         }
 1077 }
 1078 
 1079 static struct cdevsw devmemsw = {
 1080         .d_name         = "devmem",
 1081         .d_version      = D_VERSION,
 1082         .d_mmap_single  = devmem_mmap_single,
 1083 };
 1084 
 1085 static int
 1086 devmem_create_cdev(const char *vmname, int segid, char *devname)
 1087 {
 1088         struct devmem_softc *dsc;
 1089         struct vmmdev_softc *sc;
 1090         struct cdev *cdev;
 1091         int error;
 1092 
 1093         error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
 1094             UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
 1095         if (error)
 1096                 return (error);
 1097 
 1098         dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
 1099 
 1100         mtx_lock(&vmmdev_mtx);
 1101         sc = vmmdev_lookup(vmname);
 1102         KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
 1103         if (sc->cdev == NULL) {
 1104                 /* virtual machine is being created or destroyed */
 1105                 mtx_unlock(&vmmdev_mtx);
 1106                 free(dsc, M_VMMDEV);
 1107                 destroy_dev_sched_cb(cdev, NULL, 0);
 1108                 return (ENODEV);
 1109         }
 1110 
 1111         dsc->segid = segid;
 1112         dsc->name = devname;
 1113         dsc->cdev = cdev;
 1114         dsc->sc = sc;
 1115         SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
 1116         mtx_unlock(&vmmdev_mtx);
 1117 
 1118         /* The 'cdev' is ready for use after 'si_drv1' is initialized */
 1119         cdev->si_drv1 = dsc;
 1120         return (0);
 1121 }
 1122 
 1123 static void
 1124 devmem_destroy(void *arg)
 1125 {
 1126         struct devmem_softc *dsc = arg;
 1127 
 1128         KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
 1129         dsc->cdev = NULL;
 1130         dsc->sc = NULL;
 1131 }

Cache object: 026b01d939853d6aa81e454e334f4acd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.