The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/vmm/vmm_dev.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2011 NetApp, Inc.
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  *
   26  * $FreeBSD: releng/10.0/sys/amd64/vmm/vmm_dev.c 256651 2013-10-16 21:52:54Z neel $
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD: releng/10.0/sys/amd64/vmm/vmm_dev.c 256651 2013-10-16 21:52:54Z neel $");
   31 
   32 #include <sys/param.h>
   33 #include <sys/kernel.h>
   34 #include <sys/queue.h>
   35 #include <sys/lock.h>
   36 #include <sys/mutex.h>
   37 #include <sys/malloc.h>
   38 #include <sys/conf.h>
   39 #include <sys/sysctl.h>
   40 #include <sys/libkern.h>
   41 #include <sys/ioccom.h>
   42 #include <sys/mman.h>
   43 #include <sys/uio.h>
   44 
   45 #include <vm/vm.h>
   46 #include <vm/pmap.h>
   47 #include <vm/vm_map.h>
   48 
   49 #include <machine/pmap.h>
   50 #include <machine/vmparam.h>
   51 
   52 #include <machine/vmm.h>
   53 #include "vmm_lapic.h"
   54 #include "vmm_stat.h"
   55 #include "vmm_mem.h"
   56 #include "io/ppt.h"
   57 #include <machine/vmm_dev.h>
   58 
   59 struct vmmdev_softc {
   60         struct vm       *vm;            /* vm instance cookie */
   61         struct cdev     *cdev;
   62         SLIST_ENTRY(vmmdev_softc) link;
   63         int             flags;
   64 };
   65 #define VSC_LINKED              0x01
   66 
   67 static SLIST_HEAD(, vmmdev_softc) head;
   68 
   69 static struct mtx vmmdev_mtx;
   70 
   71 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
   72 
   73 SYSCTL_DECL(_hw_vmm);
   74 
   75 static struct vmmdev_softc *
   76 vmmdev_lookup(const char *name)
   77 {
   78         struct vmmdev_softc *sc;
   79 
   80 #ifdef notyet   /* XXX kernel is not compiled with invariants */
   81         mtx_assert(&vmmdev_mtx, MA_OWNED);
   82 #endif
   83 
   84         SLIST_FOREACH(sc, &head, link) {
   85                 if (strcmp(name, vm_name(sc->vm)) == 0)
   86                         break;
   87         }
   88 
   89         return (sc);
   90 }
   91 
   92 static struct vmmdev_softc *
   93 vmmdev_lookup2(struct cdev *cdev)
   94 {
   95 
   96         return (cdev->si_drv1);
   97 }
   98 
   99 static int
  100 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
  101 {
  102         int error, off, c, prot;
  103         vm_paddr_t gpa;
  104         void *hpa, *cookie;
  105         struct vmmdev_softc *sc;
  106 
  107         static char zerobuf[PAGE_SIZE];
  108 
  109         error = 0;
  110         sc = vmmdev_lookup2(cdev);
  111         if (sc == NULL)
  112                 error = ENXIO;
  113 
  114         prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
  115         while (uio->uio_resid > 0 && error == 0) {
  116                 gpa = uio->uio_offset;
  117                 off = gpa & PAGE_MASK;
  118                 c = min(uio->uio_resid, PAGE_SIZE - off);
  119 
  120                 /*
  121                  * The VM has a hole in its physical memory map. If we want to
  122                  * use 'dd' to inspect memory beyond the hole we need to
  123                  * provide bogus data for memory that lies in the hole.
  124                  *
  125                  * Since this device does not support lseek(2), dd(1) will
  126                  * read(2) blocks of data to simulate the lseek(2).
  127                  */
  128                 hpa = vm_gpa_hold(sc->vm, gpa, c, prot, &cookie);
  129                 if (hpa == NULL) {
  130                         if (uio->uio_rw == UIO_READ)
  131                                 error = uiomove(zerobuf, c, uio);
  132                         else
  133                                 error = EFAULT;
  134                 } else {
  135                         error = uiomove(hpa, c, uio);
  136                         vm_gpa_release(cookie);
  137                 }
  138         }
  139         return (error);
  140 }
  141 
  142 static int
  143 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
  144              struct thread *td)
  145 {
  146         int error, vcpu, state_changed;
  147         struct vmmdev_softc *sc;
  148         struct vm_memory_segment *seg;
  149         struct vm_register *vmreg;
  150         struct vm_seg_desc* vmsegdesc;
  151         struct vm_run *vmrun;
  152         struct vm_event *vmevent;
  153         struct vm_lapic_irq *vmirq;
  154         struct vm_capability *vmcap;
  155         struct vm_pptdev *pptdev;
  156         struct vm_pptdev_mmio *pptmmio;
  157         struct vm_pptdev_msi *pptmsi;
  158         struct vm_pptdev_msix *pptmsix;
  159         struct vm_nmi *vmnmi;
  160         struct vm_stats *vmstats;
  161         struct vm_stat_desc *statdesc;
  162         struct vm_x2apic *x2apic;
  163         struct vm_gpa_pte *gpapte;
  164 
  165         sc = vmmdev_lookup2(cdev);
  166         if (sc == NULL)
  167                 return (ENXIO);
  168 
  169         vcpu = -1;
  170         state_changed = 0;
  171 
  172         /*
  173          * Some VMM ioctls can operate only on vcpus that are not running.
  174          */
  175         switch (cmd) {
  176         case VM_RUN:
  177         case VM_GET_REGISTER:
  178         case VM_SET_REGISTER:
  179         case VM_GET_SEGMENT_DESCRIPTOR:
  180         case VM_SET_SEGMENT_DESCRIPTOR:
  181         case VM_INJECT_EVENT:
  182         case VM_GET_CAPABILITY:
  183         case VM_SET_CAPABILITY:
  184         case VM_PPTDEV_MSI:
  185         case VM_PPTDEV_MSIX:
  186         case VM_SET_X2APIC_STATE:
  187                 /*
  188                  * XXX fragile, handle with care
  189                  * Assumes that the first field of the ioctl data is the vcpu.
  190                  */
  191                 vcpu = *(int *)data;
  192                 if (vcpu < 0 || vcpu >= VM_MAXCPU) {
  193                         error = EINVAL;
  194                         goto done;
  195                 }
  196 
  197                 error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN);
  198                 if (error)
  199                         goto done;
  200 
  201                 state_changed = 1;
  202                 break;
  203 
  204         case VM_MAP_PPTDEV_MMIO:
  205         case VM_BIND_PPTDEV:
  206         case VM_UNBIND_PPTDEV:
  207         case VM_MAP_MEMORY:
  208                 /*
  209                  * ioctls that operate on the entire virtual machine must
  210                  * prevent all vcpus from running.
  211                  */
  212                 error = 0;
  213                 for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
  214                         error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN);
  215                         if (error)
  216                                 break;
  217                 }
  218 
  219                 if (error) {
  220                         while (--vcpu >= 0)
  221                                 vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
  222                         goto done;
  223                 }
  224 
  225                 state_changed = 2;
  226                 break;
  227 
  228         default:
  229                 break;
  230         }
  231 
  232         switch(cmd) {
  233         case VM_RUN:
  234                 vmrun = (struct vm_run *)data;
  235                 error = vm_run(sc->vm, vmrun);
  236                 break;
  237         case VM_STAT_DESC: {
  238                 statdesc = (struct vm_stat_desc *)data;
  239                 error = vmm_stat_desc_copy(statdesc->index,
  240                                         statdesc->desc, sizeof(statdesc->desc));
  241                 break;
  242         }
  243         case VM_STATS: {
  244                 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
  245                 vmstats = (struct vm_stats *)data;
  246                 getmicrotime(&vmstats->tv);
  247                 error = vmm_stat_copy(sc->vm, vmstats->cpuid,
  248                                       &vmstats->num_entries, vmstats->statbuf);
  249                 break;
  250         }
  251         case VM_PPTDEV_MSI:
  252                 pptmsi = (struct vm_pptdev_msi *)data;
  253                 error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
  254                                       pptmsi->bus, pptmsi->slot, pptmsi->func,
  255                                       pptmsi->destcpu, pptmsi->vector,
  256                                       pptmsi->numvec);
  257                 break;
  258         case VM_PPTDEV_MSIX:
  259                 pptmsix = (struct vm_pptdev_msix *)data;
  260                 error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
  261                                        pptmsix->bus, pptmsix->slot, 
  262                                        pptmsix->func, pptmsix->idx,
  263                                        pptmsix->msg, pptmsix->vector_control,
  264                                        pptmsix->addr);
  265                 break;
  266         case VM_MAP_PPTDEV_MMIO:
  267                 pptmmio = (struct vm_pptdev_mmio *)data;
  268                 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
  269                                      pptmmio->func, pptmmio->gpa, pptmmio->len,
  270                                      pptmmio->hpa);
  271                 break;
  272         case VM_BIND_PPTDEV:
  273                 pptdev = (struct vm_pptdev *)data;
  274                 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
  275                                          pptdev->func);
  276                 break;
  277         case VM_UNBIND_PPTDEV:
  278                 pptdev = (struct vm_pptdev *)data;
  279                 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
  280                                            pptdev->func);
  281                 break;
  282         case VM_INJECT_EVENT:
  283                 vmevent = (struct vm_event *)data;
  284                 error = vm_inject_event(sc->vm, vmevent->cpuid, vmevent->type,
  285                                         vmevent->vector,
  286                                         vmevent->error_code,
  287                                         vmevent->error_code_valid);
  288                 break;
  289         case VM_INJECT_NMI:
  290                 vmnmi = (struct vm_nmi *)data;
  291                 error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
  292                 break;
  293         case VM_LAPIC_IRQ:
  294                 vmirq = (struct vm_lapic_irq *)data;
  295                 error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector);
  296                 break;
  297         case VM_MAP_MEMORY:
  298                 seg = (struct vm_memory_segment *)data;
  299                 error = vm_malloc(sc->vm, seg->gpa, seg->len);
  300                 break;
  301         case VM_GET_MEMORY_SEG:
  302                 seg = (struct vm_memory_segment *)data;
  303                 seg->len = 0;
  304                 (void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
  305                 error = 0;
  306                 break;
  307         case VM_GET_REGISTER:
  308                 vmreg = (struct vm_register *)data;
  309                 error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
  310                                         &vmreg->regval);
  311                 break;
  312         case VM_SET_REGISTER:
  313                 vmreg = (struct vm_register *)data;
  314                 error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
  315                                         vmreg->regval);
  316                 break;
  317         case VM_SET_SEGMENT_DESCRIPTOR:
  318                 vmsegdesc = (struct vm_seg_desc *)data;
  319                 error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
  320                                         vmsegdesc->regnum,
  321                                         &vmsegdesc->desc);
  322                 break;
  323         case VM_GET_SEGMENT_DESCRIPTOR:
  324                 vmsegdesc = (struct vm_seg_desc *)data;
  325                 error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
  326                                         vmsegdesc->regnum,
  327                                         &vmsegdesc->desc);
  328                 break;
  329         case VM_GET_CAPABILITY:
  330                 vmcap = (struct vm_capability *)data;
  331                 error = vm_get_capability(sc->vm, vmcap->cpuid,
  332                                           vmcap->captype,
  333                                           &vmcap->capval);
  334                 break;
  335         case VM_SET_CAPABILITY:
  336                 vmcap = (struct vm_capability *)data;
  337                 error = vm_set_capability(sc->vm, vmcap->cpuid,
  338                                           vmcap->captype,
  339                                           vmcap->capval);
  340                 break;
  341         case VM_SET_X2APIC_STATE:
  342                 x2apic = (struct vm_x2apic *)data;
  343                 error = vm_set_x2apic_state(sc->vm,
  344                                             x2apic->cpuid, x2apic->state);
  345                 break;
  346         case VM_GET_X2APIC_STATE:
  347                 x2apic = (struct vm_x2apic *)data;
  348                 error = vm_get_x2apic_state(sc->vm,
  349                                             x2apic->cpuid, &x2apic->state);
  350                 break;
  351         case VM_GET_GPA_PMAP:
  352                 gpapte = (struct vm_gpa_pte *)data;
  353                 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
  354                                  gpapte->gpa, gpapte->pte, &gpapte->ptenum);
  355                 error = 0;
  356                 break;
  357         default:
  358                 error = ENOTTY;
  359                 break;
  360         }
  361 
  362         if (state_changed == 1) {
  363                 vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
  364         } else if (state_changed == 2) {
  365                 for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
  366                         vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
  367         }
  368 
  369 done:
  370         /* Make sure that no handler returns a bogus value like ERESTART */
  371         KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
  372         return (error);
  373 }
  374 
  375 static int
  376 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
  377                    vm_size_t size, struct vm_object **object, int nprot)
  378 {
  379         int error;
  380         struct vmmdev_softc *sc;
  381 
  382         sc = vmmdev_lookup2(cdev);
  383         if (sc != NULL && (nprot & PROT_EXEC) == 0)
  384                 error = vm_get_memobj(sc->vm, *offset, size, offset, object);
  385         else
  386                 error = EINVAL;
  387 
  388         return (error);
  389 }
  390 
  391 static void
  392 vmmdev_destroy(void *arg)
  393 {
  394 
  395         struct vmmdev_softc *sc = arg;
  396 
  397         if (sc->cdev != NULL)
  398                 destroy_dev(sc->cdev);
  399 
  400         if (sc->vm != NULL)
  401                 vm_destroy(sc->vm);
  402 
  403         if ((sc->flags & VSC_LINKED) != 0) {
  404                 mtx_lock(&vmmdev_mtx);
  405                 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
  406                 mtx_unlock(&vmmdev_mtx);
  407         }
  408 
  409         free(sc, M_VMMDEV);
  410 }
  411 
  412 static int
  413 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
  414 {
  415         int error;
  416         char buf[VM_MAX_NAMELEN];
  417         struct vmmdev_softc *sc;
  418         struct cdev *cdev;
  419 
  420         strlcpy(buf, "beavis", sizeof(buf));
  421         error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
  422         if (error != 0 || req->newptr == NULL)
  423                 return (error);
  424 
  425         mtx_lock(&vmmdev_mtx);
  426         sc = vmmdev_lookup(buf);
  427         if (sc == NULL || sc->cdev == NULL) {
  428                 mtx_unlock(&vmmdev_mtx);
  429                 return (EINVAL);
  430         }
  431 
  432         /*
  433          * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
  434          * goes down to 0 so we should not do it again in the callback.
  435          */
  436         cdev = sc->cdev;
  437         sc->cdev = NULL;                
  438         mtx_unlock(&vmmdev_mtx);
  439 
  440         /*
  441          * Schedule the 'cdev' to be destroyed:
  442          *
  443          * - any new operations on this 'cdev' will return an error (ENXIO).
  444          *
  445          * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
  446          *   be destroyed and the callback will be invoked in a taskqueue
  447          *   context.
  448          */
  449         destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
  450 
  451         return (0);
  452 }
  453 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
  454             NULL, 0, sysctl_vmm_destroy, "A", NULL);
  455 
  456 static struct cdevsw vmmdevsw = {
  457         .d_name         = "vmmdev",
  458         .d_version      = D_VERSION,
  459         .d_ioctl        = vmmdev_ioctl,
  460         .d_mmap_single  = vmmdev_mmap_single,
  461         .d_read         = vmmdev_rw,
  462         .d_write        = vmmdev_rw,
  463 };
  464 
  465 static int
  466 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
  467 {
  468         int error;
  469         struct vm *vm;
  470         struct cdev *cdev;
  471         struct vmmdev_softc *sc, *sc2;
  472         char buf[VM_MAX_NAMELEN];
  473 
  474         strlcpy(buf, "beavis", sizeof(buf));
  475         error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
  476         if (error != 0 || req->newptr == NULL)
  477                 return (error);
  478 
  479         mtx_lock(&vmmdev_mtx);
  480         sc = vmmdev_lookup(buf);
  481         mtx_unlock(&vmmdev_mtx);
  482         if (sc != NULL)
  483                 return (EEXIST);
  484 
  485         error = vm_create(buf, &vm);
  486         if (error != 0)
  487                 return (error);
  488 
  489         sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
  490         sc->vm = vm;
  491 
  492         /*
  493          * Lookup the name again just in case somebody sneaked in when we
  494          * dropped the lock.
  495          */
  496         mtx_lock(&vmmdev_mtx);
  497         sc2 = vmmdev_lookup(buf);
  498         if (sc2 == NULL) {
  499                 SLIST_INSERT_HEAD(&head, sc, link);
  500                 sc->flags |= VSC_LINKED;
  501         }
  502         mtx_unlock(&vmmdev_mtx);
  503 
  504         if (sc2 != NULL) {
  505                 vmmdev_destroy(sc);
  506                 return (EEXIST);
  507         }
  508 
  509         error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
  510                            UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
  511         if (error != 0) {
  512                 vmmdev_destroy(sc);
  513                 return (error);
  514         }
  515 
  516         mtx_lock(&vmmdev_mtx);
  517         sc->cdev = cdev;
  518         sc->cdev->si_drv1 = sc;
  519         mtx_unlock(&vmmdev_mtx);
  520 
  521         return (0);
  522 }
  523 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
  524             NULL, 0, sysctl_vmm_create, "A", NULL);
  525 
  526 void
  527 vmmdev_init(void)
  528 {
  529         mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
  530 }
  531 
  532 int
  533 vmmdev_cleanup(void)
  534 {
  535         int error;
  536 
  537         if (SLIST_EMPTY(&head))
  538                 error = 0;
  539         else
  540                 error = EBUSY;
  541 
  542         return (error);
  543 }

Cache object: 1e069ac3267028b27e2ea14e8eb6c580


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.