The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/include/vmm.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2011 NetApp, Inc.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  *
   28  * $FreeBSD$
   29  */
   30 
   31 #ifndef _VMM_H_
   32 #define _VMM_H_
   33 
   34 #include <sys/cpuset.h>
   35 #include <sys/sdt.h>
   36 #include <x86/segments.h>
   37 
   38 struct vm_snapshot_meta;
   39 
   40 #ifdef _KERNEL
   41 SDT_PROVIDER_DECLARE(vmm);
   42 #endif
   43 
   44 enum vm_suspend_how {
   45         VM_SUSPEND_NONE,
   46         VM_SUSPEND_RESET,
   47         VM_SUSPEND_POWEROFF,
   48         VM_SUSPEND_HALT,
   49         VM_SUSPEND_TRIPLEFAULT,
   50         VM_SUSPEND_LAST
   51 };
   52 
   53 /*
   54  * Identifiers for architecturally defined registers.
   55  */
   56 enum vm_reg_name {
   57         VM_REG_GUEST_RAX,
   58         VM_REG_GUEST_RBX,
   59         VM_REG_GUEST_RCX,
   60         VM_REG_GUEST_RDX,
   61         VM_REG_GUEST_RSI,
   62         VM_REG_GUEST_RDI,
   63         VM_REG_GUEST_RBP,
   64         VM_REG_GUEST_R8,
   65         VM_REG_GUEST_R9,
   66         VM_REG_GUEST_R10,
   67         VM_REG_GUEST_R11,
   68         VM_REG_GUEST_R12,
   69         VM_REG_GUEST_R13,
   70         VM_REG_GUEST_R14,
   71         VM_REG_GUEST_R15,
   72         VM_REG_GUEST_CR0,
   73         VM_REG_GUEST_CR3,
   74         VM_REG_GUEST_CR4,
   75         VM_REG_GUEST_DR7,
   76         VM_REG_GUEST_RSP,
   77         VM_REG_GUEST_RIP,
   78         VM_REG_GUEST_RFLAGS,
   79         VM_REG_GUEST_ES,
   80         VM_REG_GUEST_CS,
   81         VM_REG_GUEST_SS,
   82         VM_REG_GUEST_DS,
   83         VM_REG_GUEST_FS,
   84         VM_REG_GUEST_GS,
   85         VM_REG_GUEST_LDTR,
   86         VM_REG_GUEST_TR,
   87         VM_REG_GUEST_IDTR,
   88         VM_REG_GUEST_GDTR,
   89         VM_REG_GUEST_EFER,
   90         VM_REG_GUEST_CR2,
   91         VM_REG_GUEST_PDPTE0,
   92         VM_REG_GUEST_PDPTE1,
   93         VM_REG_GUEST_PDPTE2,
   94         VM_REG_GUEST_PDPTE3,
   95         VM_REG_GUEST_INTR_SHADOW,
   96         VM_REG_GUEST_DR0,
   97         VM_REG_GUEST_DR1,
   98         VM_REG_GUEST_DR2,
   99         VM_REG_GUEST_DR3,
  100         VM_REG_GUEST_DR6,
  101         VM_REG_GUEST_ENTRY_INST_LENGTH,
  102         VM_REG_LAST
  103 };
  104 
  105 enum x2apic_state {
  106         X2APIC_DISABLED,
  107         X2APIC_ENABLED,
  108         X2APIC_STATE_LAST
  109 };
  110 
  111 #define VM_INTINFO_VECTOR(info) ((info) & 0xff)
  112 #define VM_INTINFO_DEL_ERRCODE  0x800
  113 #define VM_INTINFO_RSVD         0x7ffff000
  114 #define VM_INTINFO_VALID        0x80000000
  115 #define VM_INTINFO_TYPE         0x700
  116 #define VM_INTINFO_HWINTR       (0 << 8)
  117 #define VM_INTINFO_NMI          (2 << 8)
  118 #define VM_INTINFO_HWEXCEPTION  (3 << 8)
  119 #define VM_INTINFO_SWINTR       (4 << 8)
  120 
  121 /*
  122  * The VM name has to fit into the pathname length constraints of devfs,
  123  * governed primarily by SPECNAMELEN.  The length is the total number of
  124  * characters in the full path, relative to the mount point and not 
  125  * including any leading '/' characters.
  126  * A prefix and a suffix are added to the name specified by the user.
  127  * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters
  128  * longer for future use.
  129  * The suffix is a string that identifies a bootrom image or some similar
  130  * image that is attached to the VM. A separator character gets added to
  131  * the suffix automatically when generating the full path, so it must be
  132  * accounted for, reducing the effective length by 1.
  133  * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37
  134  * bytes for FreeBSD 12.  A minimum length is set for safety and supports
  135  * a SPECNAMELEN as small as 32 on old systems.
  136  */
  137 #define VM_MAX_PREFIXLEN 10
  138 #define VM_MAX_SUFFIXLEN 15
  139 #define VM_MIN_NAMELEN   6
  140 #define VM_MAX_NAMELEN \
  141     (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1)
  142 
  143 #ifdef _KERNEL
  144 CTASSERT(VM_MAX_NAMELEN >= VM_MIN_NAMELEN);
  145 
  146 struct vcpu;
  147 struct vm;
  148 struct vm_exception;
  149 struct seg_desc;
  150 struct vm_exit;
  151 struct vm_run;
  152 struct vhpet;
  153 struct vioapic;
  154 struct vlapic;
  155 struct vmspace;
  156 struct vm_object;
  157 struct vm_guest_paging;
  158 struct pmap;
  159 enum snapshot_req;
  160 
  161 struct vm_eventinfo {
  162         void    *rptr;          /* rendezvous cookie */
  163         int     *sptr;          /* suspend cookie */
  164         int     *iptr;          /* reqidle cookie */
  165 };
  166 
  167 typedef int     (*vmm_init_func_t)(int ipinum);
  168 typedef int     (*vmm_cleanup_func_t)(void);
  169 typedef void    (*vmm_resume_func_t)(void);
  170 typedef void *  (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
  171 typedef int     (*vmi_run_func_t)(void *vcpui, register_t rip,
  172                     struct pmap *pmap, struct vm_eventinfo *info);
  173 typedef void    (*vmi_cleanup_func_t)(void *vmi);
  174 typedef void *  (*vmi_vcpu_init_func_t)(void *vmi, struct vcpu *vcpu,
  175                     int vcpu_id);
  176 typedef void    (*vmi_vcpu_cleanup_func_t)(void *vcpui);
  177 typedef int     (*vmi_get_register_t)(void *vcpui, int num, uint64_t *retval);
  178 typedef int     (*vmi_set_register_t)(void *vcpui, int num, uint64_t val);
  179 typedef int     (*vmi_get_desc_t)(void *vcpui, int num, struct seg_desc *desc);
  180 typedef int     (*vmi_set_desc_t)(void *vcpui, int num, struct seg_desc *desc);
  181 typedef int     (*vmi_get_cap_t)(void *vcpui, int num, int *retval);
  182 typedef int     (*vmi_set_cap_t)(void *vcpui, int num, int val);
  183 typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
  184 typedef void    (*vmi_vmspace_free)(struct vmspace *vmspace);
  185 typedef struct vlapic * (*vmi_vlapic_init)(void *vcpui);
  186 typedef void    (*vmi_vlapic_cleanup)(struct vlapic *vlapic);
  187 typedef int     (*vmi_snapshot_t)(void *vmi, struct vm_snapshot_meta *meta);
  188 typedef int     (*vmi_snapshot_vcpu_t)(void *vcpui, struct vm_snapshot_meta *meta);
  189 typedef int     (*vmi_restore_tsc_t)(void *vcpui, uint64_t now);
  190 
  191 struct vmm_ops {
  192         vmm_init_func_t         modinit;        /* module wide initialization */
  193         vmm_cleanup_func_t      modcleanup;
  194         vmm_resume_func_t       modresume;
  195 
  196         vmi_init_func_t         init;           /* vm-specific initialization */
  197         vmi_run_func_t          run;
  198         vmi_cleanup_func_t      cleanup;
  199         vmi_vcpu_init_func_t    vcpu_init;
  200         vmi_vcpu_cleanup_func_t vcpu_cleanup;
  201         vmi_get_register_t      getreg;
  202         vmi_set_register_t      setreg;
  203         vmi_get_desc_t          getdesc;
  204         vmi_set_desc_t          setdesc;
  205         vmi_get_cap_t           getcap;
  206         vmi_set_cap_t           setcap;
  207         vmi_vmspace_alloc       vmspace_alloc;
  208         vmi_vmspace_free        vmspace_free;
  209         vmi_vlapic_init         vlapic_init;
  210         vmi_vlapic_cleanup      vlapic_cleanup;
  211 
  212         /* checkpoint operations */
  213         vmi_snapshot_t          snapshot;
  214         vmi_snapshot_vcpu_t     vcpu_snapshot;
  215         vmi_restore_tsc_t       restore_tsc;
  216 };
  217 
  218 extern const struct vmm_ops vmm_ops_intel;
  219 extern const struct vmm_ops vmm_ops_amd;
  220 
  221 extern u_int vm_maxcpu;                 /* maximum virtual cpus */
  222 
  223 int vm_create(const char *name, struct vm **retvm);
  224 struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
  225 void vm_disable_vcpu_creation(struct vm *vm);
  226 void vm_slock_vcpus(struct vm *vm);
  227 void vm_unlock_vcpus(struct vm *vm);
  228 void vm_destroy(struct vm *vm);
  229 int vm_reinit(struct vm *vm);
  230 const char *vm_name(struct vm *vm);
  231 uint16_t vm_get_maxcpus(struct vm *vm);
  232 void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
  233     uint16_t *threads, uint16_t *maxcpus);
  234 int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
  235     uint16_t threads, uint16_t maxcpus);
  236 
  237 /*
  238  * APIs that modify the guest memory map require all vcpus to be frozen.
  239  */
  240 void vm_slock_memsegs(struct vm *vm);
  241 void vm_xlock_memsegs(struct vm *vm);
  242 void vm_unlock_memsegs(struct vm *vm);
  243 int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
  244     size_t len, int prot, int flags);
  245 int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
  246 int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
  247 void vm_free_memseg(struct vm *vm, int ident);
  248 int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
  249 int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
  250 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
  251 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
  252 
  253 /*
  254  * APIs that inspect the guest memory map require only a *single* vcpu to
  255  * be frozen. This acts like a read lock on the guest memory map since any
  256  * modification requires *all* vcpus to be frozen.
  257  */
  258 int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
  259     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
  260 int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
  261     struct vm_object **objptr);
  262 vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
  263 void *vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len,
  264     int prot, void **cookie);
  265 void *vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len,
  266     int prot, void **cookie);
  267 void *vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len,
  268     int prot, void **cookie);
  269 void vm_gpa_release(void *cookie);
  270 bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa);
  271 
  272 int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
  273 int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
  274 int vm_get_seg_desc(struct vcpu *vcpu, int reg,
  275                     struct seg_desc *ret_desc);
  276 int vm_set_seg_desc(struct vcpu *vcpu, int reg,
  277                     struct seg_desc *desc);
  278 int vm_run(struct vcpu *vcpu, struct vm_exit *vme_user);
  279 int vm_suspend(struct vm *vm, enum vm_suspend_how how);
  280 int vm_inject_nmi(struct vcpu *vcpu);
  281 int vm_nmi_pending(struct vcpu *vcpu);
  282 void vm_nmi_clear(struct vcpu *vcpu);
  283 int vm_inject_extint(struct vcpu *vcpu);
  284 int vm_extint_pending(struct vcpu *vcpu);
  285 void vm_extint_clear(struct vcpu *vcpu);
  286 int vcpu_vcpuid(struct vcpu *vcpu);
  287 struct vm *vcpu_vm(struct vcpu *vcpu);
  288 struct vcpu *vm_vcpu(struct vm *vm, int cpu);
  289 struct vlapic *vm_lapic(struct vcpu *vcpu);
  290 struct vioapic *vm_ioapic(struct vm *vm);
  291 struct vhpet *vm_hpet(struct vm *vm);
  292 int vm_get_capability(struct vcpu *vcpu, int type, int *val);
  293 int vm_set_capability(struct vcpu *vcpu, int type, int val);
  294 int vm_get_x2apic_state(struct vcpu *vcpu, enum x2apic_state *state);
  295 int vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state);
  296 int vm_apicid2vcpuid(struct vm *vm, int apicid);
  297 int vm_activate_cpu(struct vcpu *vcpu);
  298 int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu);
  299 int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu);
  300 int vm_restart_instruction(struct vcpu *vcpu);
  301 struct vm_exit *vm_exitinfo(struct vcpu *vcpu);
  302 void vm_exit_suspended(struct vcpu *vcpu, uint64_t rip);
  303 void vm_exit_debug(struct vcpu *vcpu, uint64_t rip);
  304 void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t rip);
  305 void vm_exit_astpending(struct vcpu *vcpu, uint64_t rip);
  306 void vm_exit_reqidle(struct vcpu *vcpu, uint64_t rip);
  307 int vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta);
  308 int vm_restore_time(struct vm *vm);
  309 
  310 #ifdef _SYS__CPUSET_H_
  311 /*
  312  * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
  313  * The rendezvous 'func(arg)' is not allowed to do anything that will
  314  * cause the thread to be put to sleep.
  315  *
  316  * The caller cannot hold any locks when initiating the rendezvous.
  317  *
  318  * The implementation of this API may cause vcpus other than those specified
  319  * by 'dest' to be stalled. The caller should not rely on any vcpus making
  320  * forward progress when the rendezvous is in progress.
  321  */
  322 typedef void (*vm_rendezvous_func_t)(struct vcpu *vcpu, void *arg);
  323 int vm_smp_rendezvous(struct vcpu *vcpu, cpuset_t dest,
  324     vm_rendezvous_func_t func, void *arg);
  325 
  326 cpuset_t vm_active_cpus(struct vm *vm);
  327 cpuset_t vm_debug_cpus(struct vm *vm);
  328 cpuset_t vm_suspended_cpus(struct vm *vm);
  329 cpuset_t vm_start_cpus(struct vm *vm, const cpuset_t *tostart);
  330 void vm_await_start(struct vm *vm, const cpuset_t *waiting);
  331 #endif  /* _SYS__CPUSET_H_ */
  332 
  333 static __inline int
  334 vcpu_rendezvous_pending(struct vm_eventinfo *info)
  335 {
  336 
  337         return (*((uintptr_t *)(info->rptr)) != 0);
  338 }
  339 
  340 static __inline int
  341 vcpu_suspended(struct vm_eventinfo *info)
  342 {
  343 
  344         return (*info->sptr);
  345 }
  346 
  347 static __inline int
  348 vcpu_reqidle(struct vm_eventinfo *info)
  349 {
  350 
  351         return (*info->iptr);
  352 }
  353 
  354 int vcpu_debugged(struct vcpu *vcpu);
  355 
  356 /*
  357  * Return true if device indicated by bus/slot/func is supposed to be a
  358  * pci passthrough device.
  359  *
  360  * Return false otherwise.
  361  */
  362 bool vmm_is_pptdev(int bus, int slot, int func);
  363 
  364 void *vm_iommu_domain(struct vm *vm);
  365 
  366 enum vcpu_state {
  367         VCPU_IDLE,
  368         VCPU_FROZEN,
  369         VCPU_RUNNING,
  370         VCPU_SLEEPING,
  371 };
  372 
  373 int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle);
  374 enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu);
  375 
  376 static int __inline
  377 vcpu_is_running(struct vcpu *vcpu, int *hostcpu)
  378 {
  379         return (vcpu_get_state(vcpu, hostcpu) == VCPU_RUNNING);
  380 }
  381 
  382 #ifdef _SYS_PROC_H_
  383 static int __inline
  384 vcpu_should_yield(struct vcpu *vcpu)
  385 {
  386         struct thread *td;
  387 
  388         td = curthread;
  389         return (td->td_ast != 0 || td->td_owepreempt != 0);
  390 }
  391 #endif
  392 
  393 void *vcpu_stats(struct vcpu *vcpu);
  394 void vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr);
  395 struct vmspace *vm_get_vmspace(struct vm *vm);
  396 struct vatpic *vm_atpic(struct vm *vm);
  397 struct vatpit *vm_atpit(struct vm *vm);
  398 struct vpmtmr *vm_pmtmr(struct vm *vm);
  399 struct vrtc *vm_rtc(struct vm *vm);
  400 
  401 /*
  402  * Inject exception 'vector' into the guest vcpu. This function returns 0 on
  403  * success and non-zero on failure.
  404  *
  405  * Wrapper functions like 'vm_inject_gp()' should be preferred to calling
  406  * this function directly because they enforce the trap-like or fault-like
  407  * behavior of an exception.
  408  *
  409  * This function should only be called in the context of the thread that is
  410  * executing this vcpu.
  411  */
  412 int vm_inject_exception(struct vcpu *vcpu, int vector, int err_valid,
  413     uint32_t errcode, int restart_instruction);
  414 
  415 /*
  416  * This function is called after a VM-exit that occurred during exception or
  417  * interrupt delivery through the IDT. The format of 'intinfo' is described
  418  * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
  419  *
  420  * If a VM-exit handler completes the event delivery successfully then it
  421  * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
  422  * if the task switch emulation is triggered via a task gate then it should
  423  * call this function with 'intinfo=0' to indicate that the external event
  424  * is not pending anymore.
  425  *
  426  * Return value is 0 on success and non-zero on failure.
  427  */
  428 int vm_exit_intinfo(struct vcpu *vcpu, uint64_t intinfo);
  429 
  430 /*
  431  * This function is called before every VM-entry to retrieve a pending
  432  * event that should be injected into the guest. This function combines
  433  * nested events into a double or triple fault.
  434  *
  435  * Returns 0 if there are no events that need to be injected into the guest
  436  * and non-zero otherwise.
  437  */
  438 int vm_entry_intinfo(struct vcpu *vcpu, uint64_t *info);
  439 
  440 int vm_get_intinfo(struct vcpu *vcpu, uint64_t *info1, uint64_t *info2);
  441 
  442 /*
  443  * Function used to keep track of the guest's TSC offset. The
  444  * offset is used by the virutalization extensions to provide a consistent
  445  * value for the Time Stamp Counter to the guest.
  446  */
  447 void vm_set_tsc_offset(struct vcpu *vcpu, uint64_t offset);
  448 
  449 enum vm_reg_name vm_segment_name(int seg_encoding);
  450 
  451 struct vm_copyinfo {
  452         uint64_t        gpa;
  453         size_t          len;
  454         void            *hva;
  455         void            *cookie;
  456 };
  457 
  458 /*
  459  * Set up 'copyinfo[]' to copy to/from guest linear address space starting
  460  * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for
  461  * a copyin or PROT_WRITE for a copyout. 
  462  *
  463  * retval       is_fault        Interpretation
  464  *   0             0            Success
  465  *   0             1            An exception was injected into the guest
  466  * EFAULT         N/A           Unrecoverable error
  467  *
  468  * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if
  469  * the return value is 0. The 'copyinfo[]' resources should be freed by calling
  470  * 'vm_copy_teardown()' after the copy is done.
  471  */
  472 int vm_copy_setup(struct vcpu *vcpu, struct vm_guest_paging *paging,
  473     uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
  474     int num_copyinfo, int *is_fault);
  475 void vm_copy_teardown(struct vm_copyinfo *copyinfo, int num_copyinfo);
  476 void vm_copyin(struct vm_copyinfo *copyinfo, void *kaddr, size_t len);
  477 void vm_copyout(const void *kaddr, struct vm_copyinfo *copyinfo, size_t len);
  478 
  479 int vcpu_trace_exceptions(struct vcpu *vcpu);
  480 int vcpu_trap_wbinvd(struct vcpu *vcpu);
  481 #endif  /* KERNEL */
  482 
  483 /*
  484  * Identifiers for optional vmm capabilities
  485  */
  486 enum vm_cap_type {
  487         VM_CAP_HALT_EXIT,
  488         VM_CAP_MTRAP_EXIT,
  489         VM_CAP_PAUSE_EXIT,
  490         VM_CAP_UNRESTRICTED_GUEST,
  491         VM_CAP_ENABLE_INVPCID,
  492         VM_CAP_BPT_EXIT,
  493         VM_CAP_RDPID,
  494         VM_CAP_RDTSCP,
  495         VM_CAP_IPI_EXIT,
  496         VM_CAP_MAX
  497 };
  498 
  499 enum vm_intr_trigger {
  500         EDGE_TRIGGER,
  501         LEVEL_TRIGGER
  502 };
  503 
  504 /*
  505  * The 'access' field has the format specified in Table 21-2 of the Intel
  506  * Architecture Manual vol 3b.
  507  *
  508  * XXX The contents of the 'access' field are architecturally defined except
  509  * bit 16 - Segment Unusable.
  510  */
  511 struct seg_desc {
  512         uint64_t        base;
  513         uint32_t        limit;
  514         uint32_t        access;
  515 };
  516 #define SEG_DESC_TYPE(access)           ((access) & 0x001f)
  517 #define SEG_DESC_DPL(access)            (((access) >> 5) & 0x3)
  518 #define SEG_DESC_PRESENT(access)        (((access) & 0x0080) ? 1 : 0)
  519 #define SEG_DESC_DEF32(access)          (((access) & 0x4000) ? 1 : 0)
  520 #define SEG_DESC_GRANULARITY(access)    (((access) & 0x8000) ? 1 : 0)
  521 #define SEG_DESC_UNUSABLE(access)       (((access) & 0x10000) ? 1 : 0)
  522 
  523 enum vm_cpu_mode {
  524         CPU_MODE_REAL,
  525         CPU_MODE_PROTECTED,
  526         CPU_MODE_COMPATIBILITY,         /* IA-32E mode (CS.L = 0) */
  527         CPU_MODE_64BIT,                 /* IA-32E mode (CS.L = 1) */
  528 };
  529 
  530 enum vm_paging_mode {
  531         PAGING_MODE_FLAT,
  532         PAGING_MODE_32,
  533         PAGING_MODE_PAE,
  534         PAGING_MODE_64,
  535         PAGING_MODE_64_LA57,
  536 };
  537 
  538 struct vm_guest_paging {
  539         uint64_t        cr3;
  540         int             cpl;
  541         enum vm_cpu_mode cpu_mode;
  542         enum vm_paging_mode paging_mode;
  543 };
  544 
  545 /*
  546  * The data structures 'vie' and 'vie_op' are meant to be opaque to the
  547  * consumers of instruction decoding. The only reason why their contents
  548  * need to be exposed is because they are part of the 'vm_exit' structure.
  549  */
  550 struct vie_op {
  551         uint8_t         op_byte;        /* actual opcode byte */
  552         uint8_t         op_type;        /* type of operation (e.g. MOV) */
  553         uint16_t        op_flags;
  554 };
  555 _Static_assert(sizeof(struct vie_op) == 4, "ABI");
  556 _Static_assert(_Alignof(struct vie_op) == 2, "ABI");
  557 
  558 #define VIE_INST_SIZE   15
  559 struct vie {
  560         uint8_t         inst[VIE_INST_SIZE];    /* instruction bytes */
  561         uint8_t         num_valid;              /* size of the instruction */
  562 
  563 /* The following fields are all zeroed upon restart. */
  564 #define vie_startzero   num_processed
  565         uint8_t         num_processed;
  566 
  567         uint8_t         addrsize:4, opsize:4;   /* address and operand sizes */
  568         uint8_t         rex_w:1,                /* REX prefix */
  569                         rex_r:1,
  570                         rex_x:1,
  571                         rex_b:1,
  572                         rex_present:1,
  573                         repz_present:1,         /* REP/REPE/REPZ prefix */
  574                         repnz_present:1,        /* REPNE/REPNZ prefix */
  575                         opsize_override:1,      /* Operand size override */
  576                         addrsize_override:1,    /* Address size override */
  577                         segment_override:1;     /* Segment override */
  578 
  579         uint8_t         mod:2,                  /* ModRM byte */
  580                         reg:4,
  581                         rm:4;
  582 
  583         uint8_t         ss:2,                   /* SIB byte */
  584                         vex_present:1,          /* VEX prefixed */
  585                         vex_l:1,                /* L bit */
  586                         index:4,                /* SIB byte */
  587                         base:4;                 /* SIB byte */
  588 
  589         uint8_t         disp_bytes;
  590         uint8_t         imm_bytes;
  591 
  592         uint8_t         scale;
  593 
  594         uint8_t         vex_reg:4,              /* vvvv: first source register specifier */
  595                         vex_pp:2,               /* pp */
  596                         _sparebits:2;
  597 
  598         uint8_t         _sparebytes[2];
  599 
  600         int             base_register;          /* VM_REG_GUEST_xyz */
  601         int             index_register;         /* VM_REG_GUEST_xyz */
  602         int             segment_register;       /* VM_REG_GUEST_xyz */
  603 
  604         int64_t         displacement;           /* optional addr displacement */
  605         int64_t         immediate;              /* optional immediate operand */
  606 
  607         uint8_t         decoded;        /* set to 1 if successfully decoded */
  608 
  609         uint8_t         _sparebyte;
  610 
  611         struct vie_op   op;                     /* opcode description */
  612 };
  613 _Static_assert(sizeof(struct vie) == 64, "ABI");
  614 _Static_assert(__offsetof(struct vie, disp_bytes) == 22, "ABI");
  615 _Static_assert(__offsetof(struct vie, scale) == 24, "ABI");
  616 _Static_assert(__offsetof(struct vie, base_register) == 28, "ABI");
  617 
  618 enum vm_exitcode {
  619         VM_EXITCODE_INOUT,
  620         VM_EXITCODE_VMX,
  621         VM_EXITCODE_BOGUS,
  622         VM_EXITCODE_RDMSR,
  623         VM_EXITCODE_WRMSR,
  624         VM_EXITCODE_HLT,
  625         VM_EXITCODE_MTRAP,
  626         VM_EXITCODE_PAUSE,
  627         VM_EXITCODE_PAGING,
  628         VM_EXITCODE_INST_EMUL,
  629         VM_EXITCODE_SPINUP_AP,
  630         VM_EXITCODE_DEPRECATED1,        /* used to be SPINDOWN_CPU */
  631         VM_EXITCODE_RENDEZVOUS,
  632         VM_EXITCODE_IOAPIC_EOI,
  633         VM_EXITCODE_SUSPENDED,
  634         VM_EXITCODE_INOUT_STR,
  635         VM_EXITCODE_TASK_SWITCH,
  636         VM_EXITCODE_MONITOR,
  637         VM_EXITCODE_MWAIT,
  638         VM_EXITCODE_SVM,
  639         VM_EXITCODE_REQIDLE,
  640         VM_EXITCODE_DEBUG,
  641         VM_EXITCODE_VMINSN,
  642         VM_EXITCODE_BPT,
  643         VM_EXITCODE_IPI,
  644         VM_EXITCODE_MAX
  645 };
  646 
  647 struct vm_inout {
  648         uint16_t        bytes:3;        /* 1 or 2 or 4 */
  649         uint16_t        in:1;
  650         uint16_t        string:1;
  651         uint16_t        rep:1;
  652         uint16_t        port;
  653         uint32_t        eax;            /* valid for out */
  654 };
  655 
  656 struct vm_inout_str {
  657         struct vm_inout inout;          /* must be the first element */
  658         struct vm_guest_paging paging;
  659         uint64_t        rflags;
  660         uint64_t        cr0;
  661         uint64_t        index;
  662         uint64_t        count;          /* rep=1 (%rcx), rep=0 (1) */
  663         int             addrsize;
  664         enum vm_reg_name seg_name;
  665         struct seg_desc seg_desc;
  666 };
  667 
  668 enum task_switch_reason {
  669         TSR_CALL,
  670         TSR_IRET,
  671         TSR_JMP,
  672         TSR_IDT_GATE,   /* task gate in IDT */
  673 };
  674 
  675 struct vm_task_switch {
  676         uint16_t        tsssel;         /* new TSS selector */
  677         int             ext;            /* task switch due to external event */
  678         uint32_t        errcode;
  679         int             errcode_valid;  /* push 'errcode' on the new stack */
  680         enum task_switch_reason reason;
  681         struct vm_guest_paging paging;
  682 };
  683 
  684 struct vm_exit {
  685         enum vm_exitcode        exitcode;
  686         int                     inst_length;    /* 0 means unknown */
  687         uint64_t                rip;
  688         union {
  689                 struct vm_inout inout;
  690                 struct vm_inout_str inout_str;
  691                 struct {
  692                         uint64_t        gpa;
  693                         int             fault_type;
  694                 } paging;
  695                 struct {
  696                         uint64_t        gpa;
  697                         uint64_t        gla;
  698                         uint64_t        cs_base;
  699                         int             cs_d;           /* CS.D */
  700                         struct vm_guest_paging paging;
  701                         struct vie      vie;
  702                 } inst_emul;
  703                 /*
  704                  * VMX specific payload. Used when there is no "better"
  705                  * exitcode to represent the VM-exit.
  706                  */
  707                 struct {
  708                         int             status;         /* vmx inst status */
  709                         /*
  710                          * 'exit_reason' and 'exit_qualification' are valid
  711                          * only if 'status' is zero.
  712                          */
  713                         uint32_t        exit_reason;
  714                         uint64_t        exit_qualification;
  715                         /*
  716                          * 'inst_error' and 'inst_type' are valid
  717                          * only if 'status' is non-zero.
  718                          */
  719                         int             inst_type;
  720                         int             inst_error;
  721                 } vmx;
  722                 /*
  723                  * SVM specific payload.
  724                  */
  725                 struct {
  726                         uint64_t        exitcode;
  727                         uint64_t        exitinfo1;
  728                         uint64_t        exitinfo2;
  729                 } svm;
  730                 struct {
  731                         int             inst_length;
  732                 } bpt;
  733                 struct {
  734                         uint32_t        code;           /* ecx value */
  735                         uint64_t        wval;
  736                 } msr;
  737                 struct {
  738                         int             vcpu;
  739                         uint64_t        rip;
  740                 } spinup_ap;
  741                 struct {
  742                         uint64_t        rflags;
  743                         uint64_t        intr_status;
  744                 } hlt;
  745                 struct {
  746                         int             vector;
  747                 } ioapic_eoi;
  748                 struct {
  749                         enum vm_suspend_how how;
  750                 } suspended;
  751                 struct {
  752                         uint32_t mode;
  753                         uint8_t vector;
  754                         cpuset_t dmask;
  755                 } ipi;
  756                 struct vm_task_switch task_switch;
  757         } u;
  758 };
  759 
  760 /* APIs to inject faults into the guest */
  761 #ifdef _KERNEL
  762 void vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid,
  763     int errcode);
  764 
  765 static __inline void
  766 vm_inject_ud(struct vcpu *vcpu)
  767 {
  768         vm_inject_fault(vcpu, IDT_UD, 0, 0);
  769 }
  770 
  771 static __inline void
  772 vm_inject_gp(struct vcpu *vcpu)
  773 {
  774         vm_inject_fault(vcpu, IDT_GP, 1, 0);
  775 }
  776 
  777 static __inline void
  778 vm_inject_ac(struct vcpu *vcpu, int errcode)
  779 {
  780         vm_inject_fault(vcpu, IDT_AC, 1, errcode);
  781 }
  782 
  783 static __inline void
  784 vm_inject_ss(struct vcpu *vcpu, int errcode)
  785 {
  786         vm_inject_fault(vcpu, IDT_SS, 1, errcode);
  787 }
  788 
  789 void vm_inject_pf(struct vcpu *vcpu, int error_code, uint64_t cr2);
  790 #else
  791 void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid,
  792     int errcode);
  793 
  794 static __inline void
  795 vm_inject_ud(void *vm, int vcpuid)
  796 {
  797         vm_inject_fault(vm, vcpuid, IDT_UD, 0, 0);
  798 }
  799 
  800 static __inline void
  801 vm_inject_gp(void *vm, int vcpuid)
  802 {
  803         vm_inject_fault(vm, vcpuid, IDT_GP, 1, 0);
  804 }
  805 
  806 static __inline void
  807 vm_inject_ac(void *vm, int vcpuid, int errcode)
  808 {
  809         vm_inject_fault(vm, vcpuid, IDT_AC, 1, errcode);
  810 }
  811 
  812 static __inline void
  813 vm_inject_ss(void *vm, int vcpuid, int errcode)
  814 {
  815         vm_inject_fault(vm, vcpuid, IDT_SS, 1, errcode);
  816 }
  817 
  818 void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2);
  819 #endif
  820 
  821 #endif  /* _VMM_H_ */

Cache object: 59ee3d88807e40521ee0fced34b291ab


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.