The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/vmm/intel/vmx_msr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2011 NetApp, Inc.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  *
   28  * $FreeBSD$
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/proc.h>
   37 
   38 #include <machine/clock.h>
   39 #include <machine/cpufunc.h>
   40 #include <machine/md_var.h>
   41 #include <machine/pcb.h>
   42 #include <machine/specialreg.h>
   43 #include <machine/vmm.h>
   44 
   45 #include "vmx.h"
   46 #include "vmx_msr.h"
   47 #include "x86.h"
   48 
   49 static bool
   50 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
   51 {
   52 
   53         return ((msr_val & (1UL << (bitpos + 32))) != 0);
   54 }
   55 
   56 static bool
   57 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
   58 {
   59 
   60         return ((msr_val & (1UL << bitpos)) == 0);
   61 }
   62 
   63 uint32_t
   64 vmx_revision(void)
   65 {
   66 
   67         return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
   68 }
   69 
   70 /*
   71  * Generate a bitmask to be used for the VMCS execution control fields.
   72  *
   73  * The caller specifies what bits should be set to one in 'ones_mask'
   74  * and what bits should be set to zero in 'zeros_mask'. The don't-care
   75  * bits are set to the default value. The default values are obtained
   76  * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
   77  * VMX Capabilities".
   78  *
   79  * Returns zero on success and non-zero on error.
   80  */
   81 int
   82 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
   83                uint32_t zeros_mask, uint32_t *retval)
   84 {
   85         int i;
   86         uint64_t val, trueval;
   87         bool true_ctls_avail, one_allowed, zero_allowed;
   88 
   89         /* We cannot ask the same bit to be set to both '1' and '' */
   90         if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
   91                 return (EINVAL);
   92 
   93         true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0;
   94 
   95         val = rdmsr(ctl_reg);
   96         if (true_ctls_avail)
   97                 trueval = rdmsr(true_ctl_reg);          /* step c */
   98         else
   99                 trueval = val;                          /* step a */
  100 
  101         for (i = 0; i < 32; i++) {
  102                 one_allowed = vmx_ctl_allows_one_setting(trueval, i);
  103                 zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
  104 
  105                 KASSERT(one_allowed || zero_allowed,
  106                         ("invalid zero/one setting for bit %d of ctl 0x%0x, "
  107                          "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
  108 
  109                 if (zero_allowed && !one_allowed) {             /* b(i),c(i) */
  110                         if (ones_mask & (1 << i))
  111                                 return (EINVAL);
  112                         *retval &= ~(1 << i);
  113                 } else if (one_allowed && !zero_allowed) {      /* b(i),c(i) */
  114                         if (zeros_mask & (1 << i))
  115                                 return (EINVAL);
  116                         *retval |= 1 << i;
  117                 } else {
  118                         if (zeros_mask & (1 << i))      /* b(ii),c(ii) */
  119                                 *retval &= ~(1 << i);
  120                         else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
  121                                 *retval |= 1 << i;
  122                         else if (!true_ctls_avail)
  123                                 *retval &= ~(1 << i);   /* b(iii) */
  124                         else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
  125                                 *retval &= ~(1 << i);
  126                         else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
  127                                 *retval |= 1 << i;
  128                         else {
  129                                 panic("vmx_set_ctlreg: unable to determine "
  130                                       "correct value of ctl bit %d for msr "
  131                                       "0x%0x and true msr 0x%0x", i, ctl_reg,
  132                                       true_ctl_reg);
  133                         }
  134                 }
  135         }
  136 
  137         return (0);
  138 }
  139 
  140 void
  141 msr_bitmap_initialize(char *bitmap)
  142 {
  143 
  144         memset(bitmap, 0xff, PAGE_SIZE);
  145 }
  146 
  147 int
  148 msr_bitmap_change_access(char *bitmap, u_int msr, int access)
  149 {
  150         int byte, bit;
  151 
  152         if (msr <= 0x00001FFF)
  153                 byte = msr / 8;
  154         else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
  155                 byte = 1024 + (msr - 0xC0000000) / 8;
  156         else
  157                 return (EINVAL);
  158 
  159         bit = msr & 0x7;
  160 
  161         if (access & MSR_BITMAP_ACCESS_READ)
  162                 bitmap[byte] &= ~(1 << bit);
  163         else
  164                 bitmap[byte] |= 1 << bit;
  165 
  166         byte += 2048;
  167         if (access & MSR_BITMAP_ACCESS_WRITE)
  168                 bitmap[byte] &= ~(1 << bit);
  169         else
  170                 bitmap[byte] |= 1 << bit;
  171 
  172         return (0);
  173 }
  174 
  175 static uint64_t misc_enable;
  176 static uint64_t platform_info;
  177 static uint64_t turbo_ratio_limit;
  178 static uint64_t host_msrs[GUEST_MSR_NUM];
  179 
  180 static bool
  181 nehalem_cpu(void)
  182 {
  183         u_int family, model;
  184 
  185         /*
  186          * The family:model numbers belonging to the Nehalem microarchitecture
  187          * are documented in Section 35.5, Intel SDM dated Feb 2014.
  188          */
  189         family = CPUID_TO_FAMILY(cpu_id);
  190         model = CPUID_TO_MODEL(cpu_id);
  191         if (family == 0x6) {
  192                 switch (model) {
  193                 case 0x1A:
  194                 case 0x1E:
  195                 case 0x1F:
  196                 case 0x2E:
  197                         return (true);
  198                 default:
  199                         break;
  200                 }
  201         }
  202         return (false);
  203 }
  204 
  205 static bool
  206 westmere_cpu(void)
  207 {
  208         u_int family, model;
  209 
  210         /*
  211          * The family:model numbers belonging to the Westmere microarchitecture
  212          * are documented in Section 35.6, Intel SDM dated Feb 2014.
  213          */
  214         family = CPUID_TO_FAMILY(cpu_id);
  215         model = CPUID_TO_MODEL(cpu_id);
  216         if (family == 0x6) {
  217                 switch (model) {
  218                 case 0x25:
  219                 case 0x2C:
  220                         return (true);
  221                 default:
  222                         break;
  223                 }
  224         }
  225         return (false);
  226 }
  227 
  228 static bool
  229 pat_valid(uint64_t val)
  230 {
  231         int i, pa;
  232 
  233         /*
  234          * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
  235          *
  236          * Extract PA0 through PA7 and validate that each one encodes a
  237          * valid memory type.
  238          */
  239         for (i = 0; i < 8; i++) {
  240                 pa = (val >> (i * 8)) & 0xff;
  241                 if (pa == 2 || pa == 3 || pa >= 8)
  242                         return (false);
  243         }
  244         return (true);
  245 }
  246 
  247 void
  248 vmx_msr_init(void)
  249 {
  250         uint64_t bus_freq, ratio;
  251         int i;
  252 
  253         /*
  254          * It is safe to cache the values of the following MSRs because
  255          * they don't change based on curcpu, curproc or curthread.
  256          */
  257         host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
  258         host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
  259         host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
  260         host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
  261 
  262         /*
  263          * Initialize emulated MSRs
  264          */
  265         misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
  266         /*
  267          * Set mandatory bits
  268          *  11:   branch trace disabled
  269          *  12:   PEBS unavailable
  270          * Clear unsupported features
  271          *  16:   SpeedStep enable
  272          *  18:   enable MONITOR FSM
  273          */
  274         misc_enable |= (1 << 12) | (1 << 11);
  275         misc_enable &= ~((1 << 18) | (1 << 16));
  276 
  277         if (nehalem_cpu() || westmere_cpu())
  278                 bus_freq = 133330000;           /* 133Mhz */
  279         else
  280                 bus_freq = 100000000;           /* 100Mhz */
  281 
  282         /*
  283          * XXXtime
  284          * The ratio should really be based on the virtual TSC frequency as
  285          * opposed to the host TSC.
  286          */
  287         ratio = (tsc_freq / bus_freq) & 0xff;
  288 
  289         /*
  290          * The register definition is based on the micro-architecture
  291          * but the following bits are always the same:
  292          * [15:8]  Maximum Non-Turbo Ratio
  293          * [28]    Programmable Ratio Limit for Turbo Mode
  294          * [29]    Programmable TDC-TDP Limit for Turbo Mode
  295          * [47:40] Maximum Efficiency Ratio
  296          *
  297          * The other bits can be safely set to 0 on all
  298          * micro-architectures up to Haswell.
  299          */
  300         platform_info = (ratio << 8) | (ratio << 40);
  301 
  302         /*
  303          * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
  304          * dependent on the maximum cores per package supported by the micro-
  305          * architecture. For e.g., Westmere supports 6 cores per package and
  306          * uses the low 48 bits. Sandybridge support 8 cores per package and
  307          * uses up all 64 bits.
  308          *
  309          * However, the unused bits are reserved so we pretend that all bits
  310          * in this MSR are valid.
  311          */
  312         for (i = 0; i < 8; i++)
  313                 turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
  314 }
  315 
  316 void
  317 vmx_msr_guest_init(struct vmx *vmx, struct vmx_vcpu *vcpu)
  318 {
  319         /*
  320          * The permissions bitmap is shared between all vcpus so initialize it
  321          * once when initializing the vBSP.
  322          */
  323         if (vcpu->vcpuid == 0) {
  324                 guest_msr_rw(vmx, MSR_LSTAR);
  325                 guest_msr_rw(vmx, MSR_CSTAR);
  326                 guest_msr_rw(vmx, MSR_STAR);
  327                 guest_msr_rw(vmx, MSR_SF_MASK);
  328                 guest_msr_rw(vmx, MSR_KGSBASE);
  329         }
  330 
  331         /*
  332          * Initialize guest IA32_PAT MSR with default value after reset.
  333          */
  334         vcpu->guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
  335             PAT_VALUE(1, PAT_WRITE_THROUGH)     |
  336             PAT_VALUE(2, PAT_UNCACHED)          |
  337             PAT_VALUE(3, PAT_UNCACHEABLE)       |
  338             PAT_VALUE(4, PAT_WRITE_BACK)        |
  339             PAT_VALUE(5, PAT_WRITE_THROUGH)     |
  340             PAT_VALUE(6, PAT_UNCACHED)          |
  341             PAT_VALUE(7, PAT_UNCACHEABLE);
  342 
  343         return;
  344 }
  345 
  346 void
  347 vmx_msr_guest_enter(struct vmx_vcpu *vcpu)
  348 {
  349 
  350         /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */
  351         update_pcb_bases(curpcb);
  352         wrmsr(MSR_LSTAR, vcpu->guest_msrs[IDX_MSR_LSTAR]);
  353         wrmsr(MSR_CSTAR, vcpu->guest_msrs[IDX_MSR_CSTAR]);
  354         wrmsr(MSR_STAR, vcpu->guest_msrs[IDX_MSR_STAR]);
  355         wrmsr(MSR_SF_MASK, vcpu->guest_msrs[IDX_MSR_SF_MASK]);
  356         wrmsr(MSR_KGSBASE, vcpu->guest_msrs[IDX_MSR_KGSBASE]);
  357 }
  358 
  359 void
  360 vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu)
  361 {
  362         uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX];
  363         uint32_t host_aux = cpu_auxmsr();
  364 
  365         if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux)
  366                 wrmsr(MSR_TSC_AUX, guest_tsc_aux);
  367 }
  368 
  369 void
  370 vmx_msr_guest_exit(struct vmx_vcpu *vcpu)
  371 {
  372 
  373         /* Save guest MSRs */
  374         vcpu->guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
  375         vcpu->guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
  376         vcpu->guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
  377         vcpu->guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
  378         vcpu->guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
  379 
  380         /* Restore host MSRs */
  381         wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
  382         wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
  383         wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
  384         wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
  385 
  386         /* MSR_KGSBASE will be restored on the way back to userspace */
  387 }
  388 
  389 void
  390 vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu)
  391 {
  392         uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX];
  393         uint32_t host_aux = cpu_auxmsr();
  394 
  395         if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux)
  396                 /*
  397                  * Note that it is not necessary to save the guest value
  398                  * here; vcpu->guest_msrs[IDX_MSR_TSC_AUX] always
  399                  * contains the current value since it is updated whenever
  400                  * the guest writes to it (which is expected to be very
  401                  * rare).
  402                  */
  403                 wrmsr(MSR_TSC_AUX, host_aux);
  404 }
  405 
  406 int
  407 vmx_rdmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t *val, bool *retu)
  408 {
  409         int error;
  410 
  411         error = 0;
  412 
  413         switch (num) {
  414         case MSR_MCG_CAP:
  415         case MSR_MCG_STATUS:
  416                 *val = 0;
  417                 break;
  418         case MSR_MTRRcap:
  419         case MSR_MTRRdefType:
  420         case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
  421         case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
  422         case MSR_MTRR64kBase:
  423         case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1:
  424                 if (vm_rdmtrr(&vcpu->mtrr, num, val) != 0) {
  425                         vm_inject_gp(vcpu->vcpu);
  426                 }
  427                 break;
  428         case MSR_IA32_MISC_ENABLE:
  429                 *val = misc_enable;
  430                 break;
  431         case MSR_PLATFORM_INFO:
  432                 *val = platform_info;
  433                 break;
  434         case MSR_TURBO_RATIO_LIMIT:
  435         case MSR_TURBO_RATIO_LIMIT1:
  436                 *val = turbo_ratio_limit;
  437                 break;
  438         case MSR_PAT:
  439                 *val = vcpu->guest_msrs[IDX_MSR_PAT];
  440                 break;
  441         default:
  442                 error = EINVAL;
  443                 break;
  444         }
  445         return (error);
  446 }
  447 
  448 int
  449 vmx_wrmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t val, bool *retu)
  450 {
  451         uint64_t changed;
  452         int error;
  453 
  454         error = 0;
  455 
  456         switch (num) {
  457         case MSR_MCG_CAP:
  458         case MSR_MCG_STATUS:
  459                 break;          /* ignore writes */
  460         case MSR_MTRRcap:
  461         case MSR_MTRRdefType:
  462         case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
  463         case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
  464         case MSR_MTRR64kBase:
  465         case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1:
  466                 if (vm_wrmtrr(&vcpu->mtrr, num, val) != 0) {
  467                         vm_inject_gp(vcpu->vcpu);
  468                 }
  469                 break;
  470         case MSR_IA32_MISC_ENABLE:
  471                 changed = val ^ misc_enable;
  472                 /*
  473                  * If the host has disabled the NX feature then the guest
  474                  * also cannot use it. However, a Linux guest will try to
  475                  * enable the NX feature by writing to the MISC_ENABLE MSR.
  476                  *
  477                  * This can be safely ignored because the memory management
  478                  * code looks at CPUID.80000001H:EDX.NX to check if the
  479                  * functionality is actually enabled.
  480                  */
  481                 changed &= ~(1UL << 34);
  482 
  483                 /*
  484                  * Punt to userspace if any other bits are being modified.
  485                  */
  486                 if (changed)
  487                         error = EINVAL;
  488 
  489                 break;
  490         case MSR_PAT:
  491                 if (pat_valid(val))
  492                         vcpu->guest_msrs[IDX_MSR_PAT] = val;
  493                 else
  494                         vm_inject_gp(vcpu->vcpu);
  495                 break;
  496         case MSR_TSC:
  497                 error = vmx_set_tsc_offset(vcpu, val - rdtsc());
  498                 break;
  499         case MSR_TSC_AUX:
  500                 if (vmx_have_msr_tsc_aux)
  501                         /*
  502                          * vmx_msr_guest_enter_tsc_aux() will apply this
  503                          * value when it is called immediately before guest
  504                          * entry.
  505                          */
  506                         vcpu->guest_msrs[IDX_MSR_TSC_AUX] = val;
  507                 else
  508                         vm_inject_gp(vcpu->vcpu);
  509                 break;
  510         default:
  511                 error = EINVAL;
  512                 break;
  513         }
  514 
  515         return (error);
  516 }

Cache object: 2ee31b4fc3b3e41a14f1e2ae85d576c0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.