The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/x86/xen/hvm.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2008, 2013 Citrix Systems, Inc.
    5  * Copyright (c) 2012 Spectra Logic Corporation
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 #include <sys/param.h>
   34 #include <sys/bus.h>
   35 #include <sys/kernel.h>
   36 #include <sys/malloc.h>
   37 #include <sys/proc.h>
   38 #include <sys/smp.h>
   39 #include <sys/systm.h>
   40 
   41 #include <vm/vm.h>
   42 #include <vm/pmap.h>
   43 #include <vm/vm_param.h>
   44 
   45 #include <dev/pci/pcivar.h>
   46 
   47 #include <machine/cpufunc.h>
   48 #include <machine/cpu.h>
   49 #include <machine/smp.h>
   50 
   51 #include <x86/apicreg.h>
   52 
   53 #include <xen/xen-os.h>
   54 #include <xen/error.h>
   55 #include <xen/features.h>
   56 #include <xen/gnttab.h>
   57 #include <xen/hypervisor.h>
   58 #include <xen/hvm.h>
   59 #include <xen/xen_intr.h>
   60 
   61 #include <contrib/xen/arch-x86/cpuid.h>
   62 #include <contrib/xen/hvm/params.h>
   63 #include <contrib/xen/vcpu.h>
   64 
   65 /*--------------------------- Forward Declarations ---------------------------*/
   66 static void xen_hvm_cpu_init(void);
   67 
   68 /*-------------------------------- Global Data -------------------------------*/
   69 enum xen_domain_type xen_domain_type = XEN_NATIVE;
   70 
   71 #ifdef SMP
   72 struct cpu_ops xen_hvm_cpu_ops = {
   73         .cpu_init       = xen_hvm_cpu_init,
   74         .cpu_resume     = xen_hvm_cpu_init
   75 };
   76 #endif
   77 
   78 static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
   79 
   80 /**
   81  * If non-zero, the hypervisor has been configured to use a direct
   82  * IDT event callback for interrupt injection.
   83  */
   84 int xen_vector_callback_enabled;
   85 
   86 /**
   87  * Start info flags. ATM this only used to store the initial domain flag for
   88  * PVHv2, and it's always empty for HVM guests.
   89  */
   90 uint32_t hvm_start_flags;
   91 
   92 /**
   93  * Signal whether the vector injected for the event channel upcall requires to
   94  * be EOI'ed on the local APIC.
   95  */
   96 bool xen_evtchn_needs_ack;
   97 
   98 /*------------------------------- Per-CPU Data -------------------------------*/
   99 DPCPU_DEFINE(struct vcpu_info, vcpu_local_info);
  100 DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
  101 
  102 /*------------------ Hypervisor Access Shared Memory Regions -----------------*/
  103 shared_info_t *HYPERVISOR_shared_info;
  104 
  105 /*------------------------------ Sysctl tunables -----------------------------*/
  106 int xen_disable_pv_disks = 0;
  107 int xen_disable_pv_nics = 0;
  108 TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks);
  109 TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics);
  110 
  111 /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
  112 
  113 uint32_t xen_cpuid_base;
  114 
  115 static uint32_t
  116 xen_hvm_cpuid_base(void)
  117 {
  118         uint32_t base, regs[4];
  119 
  120         for (base = 0x40000000; base < 0x40010000; base += 0x100) {
  121                 do_cpuid(base, regs);
  122                 if (!memcmp("XenVMMXenVMM", &regs[1], 12)
  123                     && (regs[0] - base) >= 2)
  124                         return (base);
  125         }
  126         return (0);
  127 }
  128 
  129 static void
  130 hypervisor_quirks(unsigned int major, unsigned int minor)
  131 {
  132 #ifdef SMP
  133         if (((major < 4) || (major == 4 && minor <= 5)) &&
  134             msix_disable_migration == -1) {
  135                 /*
  136                  * Xen hypervisors prior to 4.6.0 do not properly
  137                  * handle updates to enabled MSI-X table entries,
  138                  * so disable MSI-X interrupt migration in that
  139                  * case.
  140                  */
  141                 if (bootverbose)
  142                         printf(
  143 "Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n"
  144 "Set machdep.msix_disable_migration=0 to forcefully enable it.\n");
  145                 msix_disable_migration = 1;
  146         }
  147 #endif
  148 }
  149 
  150 static void
  151 hypervisor_version(void)
  152 {
  153         uint32_t regs[4];
  154         int major, minor;
  155 
  156         do_cpuid(xen_cpuid_base + 1, regs);
  157 
  158         major = regs[0] >> 16;
  159         minor = regs[0] & 0xffff;
  160         printf("XEN: Hypervisor version %d.%d detected.\n", major, minor);
  161 
  162         hypervisor_quirks(major, minor);
  163 }
  164 
  165 /*
  166  * Allocate and fill in the hypcall page.
  167  */
  168 int
  169 xen_hvm_init_hypercall_stubs(enum xen_hvm_init_type init_type)
  170 {
  171         uint32_t regs[4];
  172 
  173         /* Legacy PVH will get here without the cpuid leaf being set. */
  174         if (xen_cpuid_base == 0)
  175                 xen_cpuid_base = xen_hvm_cpuid_base();
  176         if (xen_cpuid_base == 0)
  177                 return (ENXIO);
  178 
  179         if (xen_domain() && init_type == XEN_HVM_INIT_LATE) {
  180                 /*
  181                  * If the domain type is already set we can assume that the
  182                  * hypercall page has been populated too, so just print the
  183                  * version (and apply any quirks) and exit.
  184                  */
  185                 hypervisor_version();
  186                 return 0;
  187         }
  188 
  189         if (init_type == XEN_HVM_INIT_LATE)
  190                 hypervisor_version();
  191 
  192         /*
  193          * Find the hypercall pages.
  194          */
  195         do_cpuid(xen_cpuid_base + 2, regs);
  196         if (regs[0] != 1)
  197                 return (EINVAL);
  198 
  199         wrmsr(regs[1], (init_type == XEN_HVM_INIT_EARLY)
  200             ? (vm_paddr_t)((uintptr_t)&hypercall_page - KERNBASE)
  201             : vtophys(&hypercall_page));
  202 
  203         return (0);
  204 }
  205 
  206 static void
  207 xen_hvm_init_shared_info_page(void)
  208 {
  209         struct xen_add_to_physmap xatp;
  210 
  211         if (xen_pv_domain()) {
  212                 /*
  213                  * Already setup in the PV case, shared_info is passed inside
  214                  * of the start_info struct at start of day.
  215                  */
  216                 return;
  217         }
  218 
  219         if (HYPERVISOR_shared_info == NULL) {
  220                 HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT);
  221                 if (HYPERVISOR_shared_info == NULL)
  222                         panic("Unable to allocate Xen shared info page");
  223         }
  224 
  225         xatp.domid = DOMID_SELF;
  226         xatp.idx = 0;
  227         xatp.space = XENMAPSPACE_shared_info;
  228         xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT;
  229         if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
  230                 panic("HYPERVISOR_memory_op failed");
  231 }
  232 
  233 static int
  234 set_percpu_callback(unsigned int vcpu)
  235 {
  236         struct xen_hvm_evtchn_upcall_vector vec;
  237         int error;
  238 
  239         vec.vcpu = vcpu;
  240         vec.vector = IDT_EVTCHN;
  241         error = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &vec);
  242 
  243         return (error != 0 ? xen_translate_error(error) : 0);
  244 }
  245 
  246 /*
  247  * Tell the hypervisor how to contact us for event channel callbacks.
  248  */
  249 void
  250 xen_hvm_set_callback(device_t dev)
  251 {
  252         struct xen_hvm_param xhp;
  253         int irq;
  254 
  255         if (xen_vector_callback_enabled)
  256                 return;
  257 
  258         xhp.domid = DOMID_SELF;
  259         xhp.index = HVM_PARAM_CALLBACK_IRQ;
  260         if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
  261                 int error;
  262 
  263                 error = set_percpu_callback(0);
  264                 if (error == 0) {
  265                         xen_evtchn_needs_ack = true;
  266                         /* Trick toolstack to think we are enlightened */
  267                         xhp.value = 1;
  268                 } else
  269                         xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN);
  270                 error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp);
  271                 if (error == 0) {
  272                         xen_vector_callback_enabled = 1;
  273                         return;
  274                 } else if (xen_evtchn_needs_ack)
  275                         panic("Unable to setup fake HVM param: %d", error);
  276 
  277                 printf("Xen HVM callback vector registration failed (%d). "
  278                     "Falling back to emulated device interrupt\n", error);
  279         }
  280         xen_vector_callback_enabled = 0;
  281         if (dev == NULL) {
  282                 /*
  283                  * Called from early boot or resume.
  284                  * xenpci will invoke us again later.
  285                  */
  286                 return;
  287         }
  288 
  289         irq = pci_get_irq(dev);
  290         if (irq < 16) {
  291                 xhp.value = HVM_CALLBACK_GSI(irq);
  292         } else {
  293                 u_int slot;
  294                 u_int pin;
  295 
  296                 slot = pci_get_slot(dev);
  297                 pin = pci_get_intpin(dev) - 1;
  298                 xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin);
  299         }
  300 
  301         if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0)
  302                 panic("Can't set evtchn callback");
  303 }
  304 
  305 #define XEN_MAGIC_IOPORT 0x10
  306 enum {
  307         XMI_MAGIC                        = 0x49d2,
  308         XMI_UNPLUG_IDE_DISKS             = 0x01,
  309         XMI_UNPLUG_NICS                  = 0x02,
  310         XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04
  311 };
  312 
  313 static void
  314 xen_hvm_disable_emulated_devices(void)
  315 {
  316         u_short disable_devs = 0;
  317 
  318         if (xen_pv_domain()) {
  319                 /*
  320                  * No emulated devices in the PV case, so no need to unplug
  321                  * anything.
  322                  */
  323                 if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0)
  324                         printf("PV devices cannot be disabled in PV guests\n");
  325                 return;
  326         }
  327 
  328         if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC)
  329                 return;
  330 
  331         if (xen_disable_pv_disks == 0) {
  332                 if (bootverbose)
  333                         printf("XEN: disabling emulated disks\n");
  334                 disable_devs |= XMI_UNPLUG_IDE_DISKS;
  335         }
  336         if (xen_disable_pv_nics == 0) {
  337                 if (bootverbose)
  338                         printf("XEN: disabling emulated nics\n");
  339                 disable_devs |= XMI_UNPLUG_NICS;
  340         }
  341 
  342         if (disable_devs != 0)
  343                 outw(XEN_MAGIC_IOPORT, disable_devs);
  344 }
  345 
  346 static void
  347 xen_hvm_init(enum xen_hvm_init_type init_type)
  348 {
  349         int error;
  350         int i;
  351 
  352         if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
  353                 return;
  354 
  355         error = xen_hvm_init_hypercall_stubs(init_type);
  356 
  357         switch (init_type) {
  358         case XEN_HVM_INIT_LATE:
  359                 if (error != 0)
  360                         return;
  361 
  362                 /*
  363                  * If xen_domain_type is not set at this point
  364                  * it means we are inside a (PV)HVM guest, because
  365                  * for PVH the guest type is set much earlier
  366                  * (see hammer_time_xen).
  367                  */
  368                 if (!xen_domain()) {
  369                         xen_domain_type = XEN_HVM_DOMAIN;
  370                         vm_guest = VM_GUEST_XEN;
  371                 }
  372 
  373                 setup_xen_features();
  374 #ifdef SMP
  375                 cpu_ops = xen_hvm_cpu_ops;
  376 #endif
  377                 break;
  378         case XEN_HVM_INIT_RESUME:
  379                 if (error != 0)
  380                         panic("Unable to init Xen hypercall stubs on resume");
  381 
  382                 /* Clear stale vcpu_info. */
  383                 CPU_FOREACH(i)
  384                         DPCPU_ID_SET(i, vcpu_info, NULL);
  385                 break;
  386         default:
  387                 panic("Unsupported HVM initialization type");
  388         }
  389 
  390         xen_vector_callback_enabled = 0;
  391         xen_evtchn_needs_ack = false;
  392         xen_hvm_set_callback(NULL);
  393 
  394         /*
  395          * On (PV)HVM domains we need to request the hypervisor to
  396          * fill the shared info page, for PVH guest the shared_info page
  397          * is passed inside the start_info struct and is already set, so this
  398          * functions are no-ops.
  399          */
  400         xen_hvm_init_shared_info_page();
  401         xen_hvm_disable_emulated_devices();
  402 } 
  403 
  404 void
  405 xen_hvm_suspend(void)
  406 {
  407 }
  408 
  409 void
  410 xen_hvm_resume(bool suspend_cancelled)
  411 {
  412 
  413         xen_hvm_init(suspend_cancelled ?
  414             XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
  415 
  416         /* Register vcpu_info area for CPU#0. */
  417         xen_hvm_cpu_init();
  418 }
  419 
  420 static void
  421 xen_hvm_sysinit(void *arg __unused)
  422 {
  423         xen_hvm_init(XEN_HVM_INIT_LATE);
  424 }
  425 SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
  426 
  427 static void
  428 xen_hvm_cpu_init(void)
  429 {
  430         struct vcpu_register_vcpu_info info;
  431         struct vcpu_info *vcpu_info;
  432         uint32_t regs[4];
  433         int cpu, rc;
  434 
  435         if (!xen_domain())
  436                 return;
  437 
  438         if (DPCPU_GET(vcpu_info) != NULL) {
  439                 /*
  440                  * vcpu_info is already set.  We're resuming
  441                  * from a failed migration and our pre-suspend
  442                  * configuration is still valid.
  443                  */
  444                 return;
  445         }
  446 
  447         /*
  448          * Set vCPU ID. If available fetch the ID from CPUID, if not just use
  449          * the ACPI ID.
  450          */
  451         KASSERT(xen_cpuid_base != 0, ("Invalid base Xen CPUID leaf"));
  452         cpuid_count(xen_cpuid_base + 4, 0, regs);
  453         KASSERT((regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ||
  454             !xen_pv_domain(),
  455             ("Xen PV domain without vcpu_id in cpuid"));
  456         PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ?
  457             regs[1] : PCPU_GET(acpi_id));
  458 
  459         if (xen_evtchn_needs_ack && !IS_BSP()) {
  460                 /*
  461                  * Setup the per-vpcu event channel upcall vector. This is only
  462                  * required when using the new HVMOP_set_evtchn_upcall_vector
  463                  * hypercall, which allows using a different vector for each
  464                  * vCPU. Note that FreeBSD uses the same vector for all vCPUs
  465                  * because it's not dynamically allocated.
  466                  */
  467                 rc = set_percpu_callback(PCPU_GET(vcpu_id));
  468                 if (rc != 0)
  469                         panic("Event channel upcall vector setup failed: %d",
  470                             rc);
  471         }
  472 
  473         /*
  474          * Set the vCPU info.
  475          *
  476          * NB: the vCPU info for vCPUs < 32 can be fetched from the shared info
  477          * page, but in order to make sure the mapping code is correct always
  478          * attempt to map the vCPU info at a custom place.
  479          */
  480         vcpu_info = DPCPU_PTR(vcpu_local_info);
  481         cpu = PCPU_GET(vcpu_id);
  482         info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT;
  483         info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info));
  484 
  485         rc = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
  486         if (rc != 0)
  487                 DPCPU_SET(vcpu_info, &HYPERVISOR_shared_info->vcpu_info[cpu]);
  488         else
  489                 DPCPU_SET(vcpu_info, vcpu_info);
  490 }
  491 SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL);
  492 
  493 bool
  494 xen_has_iommu_maps(void)
  495 {
  496         uint32_t regs[4];
  497 
  498         KASSERT(xen_cpuid_base != 0, ("Invalid base Xen CPUID leaf"));
  499         cpuid_count(xen_cpuid_base + 4, 0, regs);
  500 
  501         return (regs[0] & XEN_HVM_CPUID_IOMMU_MAPPINGS);
  502 }

Cache object: fc27e248a55542ebe1d4c9e9c8ba3dcb


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.