| 
     1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2014 Bryan Venteicher <bryanv@FreeBSD.org>
    5  * Copyright (c) 2021 Mathieu Chouquet-Stringer
    6  * Copyright (c) 2021 Juniper Networks, Inc.
    7  * Copyright (c) 2021 Klara, Inc.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  */
   30 
   31 /*
   32  * Linux KVM paravirtual clock support
   33  *
   34  * References:
   35  *     - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
   36  *     - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include <sys/param.h>
   43 #include <sys/bus.h>
   44 #include <sys/domainset.h>
   45 #include <sys/kernel.h>
   46 #include <sys/malloc.h>
   47 #include <sys/module.h>
   48 #include <sys/smp.h>
   49 #include <sys/sysctl.h>
   50 
   51 #include <vm/vm.h>
   52 #include <vm/pmap.h>
   53 #include <vm/vm_extern.h>
   54 
   55 #include <machine/pvclock.h>
   56 #include <x86/kvm.h>
   57 
   58 #include "clock_if.h"
   59 
   60 #define KVM_CLOCK_DEVNAME               "kvmclock"
   61 /*
   62  * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's
   63  * default value of 800, and (3) below the TSC's value when it supports the
   64  * "Invariant TSC" feature and is believed to be synchronized across all CPUs.
   65  */
   66 #define KVM_CLOCK_TC_QUALITY            975
   67 
   68 struct kvm_clock_softc {
   69         struct pvclock                   pvc;
   70         struct pvclock_wall_clock        wc;
   71         struct pvclock_vcpu_time_info   *timeinfos;
   72         u_int                            msr_tc;
   73         u_int                            msr_wc;
   74 #ifndef EARLY_AP_STARTUP
   75         int                              firstcpu;
   76 #endif
   77 };
   78 
   79 static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg);
   80 static void     kvm_clock_system_time_enable(struct kvm_clock_softc *sc,
   81                     const cpuset_t *cpus);
   82 static void     kvm_clock_system_time_enable_pcpu(void *arg);
   83 static void     kvm_clock_setup_sysctl(device_t);
   84 
   85 static struct pvclock_wall_clock *
   86 kvm_clock_get_wallclock(void *arg)
   87 {
   88         struct kvm_clock_softc *sc = arg;
   89 
   90         wrmsr(sc->msr_wc, vtophys(&sc->wc));
   91         return (&sc->wc);
   92 }
   93 
   94 static void
   95 kvm_clock_system_time_enable(struct kvm_clock_softc *sc, const cpuset_t *cpus)
   96 {
   97         smp_rendezvous_cpus(*cpus, NULL, kvm_clock_system_time_enable_pcpu,
   98             NULL, sc);
   99 }
  100 
  101 static void
  102 kvm_clock_system_time_enable_pcpu(void *arg)
  103 {
  104         struct kvm_clock_softc *sc = arg;
  105 
  106         /*
  107          * See [2]; the lsb of this MSR is the system time enable bit.
  108          */
  109         wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1);
  110 }
  111 
  112 #ifndef EARLY_AP_STARTUP
  113 static void
  114 kvm_clock_init_smp(void *arg __unused)
  115 {
  116         devclass_t kvm_clock_devclass;
  117         cpuset_t cpus;
  118         struct kvm_clock_softc *sc;
  119 
  120         kvm_clock_devclass = devclass_find(KVM_CLOCK_DEVNAME);
  121         sc = devclass_get_softc(kvm_clock_devclass, 0);
  122         if (sc == NULL || mp_ncpus == 1)
  123                 return;
  124 
  125         /*
  126          * Register with the hypervisor on all CPUs except the one that
  127          * registered in kvm_clock_attach().
  128          */
  129         cpus = all_cpus;
  130         KASSERT(CPU_ISSET(sc->firstcpu, &cpus),
  131             ("%s: invalid first CPU %d", __func__, sc->firstcpu));
  132         CPU_CLR(sc->firstcpu, &cpus);
  133         kvm_clock_system_time_enable(sc, &cpus);
  134 }
  135 SYSINIT(kvm_clock, SI_SUB_SMP, SI_ORDER_ANY, kvm_clock_init_smp, NULL);
  136 #endif
  137 
  138 static void
  139 kvm_clock_identify(driver_t *driver, device_t parent)
  140 {
  141         u_int regs[4];
  142 
  143         kvm_cpuid_get_features(regs);
  144         if ((regs[0] &
  145             (KVM_FEATURE_CLOCKSOURCE2 | KVM_FEATURE_CLOCKSOURCE)) == 0)
  146                 return;
  147         if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1))
  148                 return;
  149         BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0);
  150 }
  151 
  152 static int
  153 kvm_clock_probe(device_t dev)
  154 {
  155         device_set_desc(dev, "KVM paravirtual clock");
  156         return (BUS_PROBE_DEFAULT);
  157 }
  158 
  159 static int
  160 kvm_clock_attach(device_t dev)
  161 {
  162         u_int regs[4];
  163         struct kvm_clock_softc *sc = device_get_softc(dev);
  164         bool stable_flag_supported;
  165 
  166         /* Process KVM "features" CPUID leaf content: */
  167         kvm_cpuid_get_features(regs);
  168         if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) {
  169                 sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW;
  170                 sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW;
  171         } else {
  172                 KASSERT((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0,
  173                     ("Clocksource feature flags disappeared since "
  174                     "kvm_clock_identify: regs[0] %#0x.", regs[0]));
  175                 sc->msr_tc = KVM_MSR_SYSTEM_TIME;
  176                 sc->msr_wc = KVM_MSR_WALL_CLOCK;
  177         }
  178         stable_flag_supported =
  179             (regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0;
  180 
  181         /* Set up 'struct pvclock_vcpu_time_info' page(s): */
  182         sc->timeinfos = kmem_malloc(mp_ncpus *
  183             sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO);
  184 #ifdef EARLY_AP_STARTUP
  185         kvm_clock_system_time_enable(sc, &all_cpus);
  186 #else
  187         sc->firstcpu = curcpu;
  188         kvm_clock_system_time_enable_pcpu(sc);
  189 #endif
  190 
  191         /*
  192          * Init pvclock; register KVM clock wall clock, register KVM clock
  193          * timecounter, and set up the requisite infrastructure for vDSO access
  194          * to this timecounter.
  195          *     Regarding 'tc_flags': Since the KVM MSR documentation does not
  196          *     specifically discuss suspend/resume scenarios, conservatively
  197          *     leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume that the system
  198          *     time must be re-inited in such cases.
  199          */
  200         sc->pvc.get_wallclock = kvm_clock_get_wallclock;
  201         sc->pvc.get_wallclock_arg = sc;
  202         sc->pvc.timeinfos = sc->timeinfos;
  203         sc->pvc.stable_flag_supported = stable_flag_supported;
  204         pvclock_init(&sc->pvc, dev, KVM_CLOCK_DEVNAME, KVM_CLOCK_TC_QUALITY, 0);
  205         kvm_clock_setup_sysctl(dev);
  206         return (0);
  207 }
  208 
  209 static int
  210 kvm_clock_detach(device_t dev)
  211 {
  212         struct kvm_clock_softc *sc = device_get_softc(dev);
  213 
  214         return (pvclock_destroy(&sc->pvc));
  215 }
  216 
  217 static int
  218 kvm_clock_suspend(device_t dev)
  219 {
  220         return (0);
  221 }
  222 
  223 static int
  224 kvm_clock_resume(device_t dev)
  225 {
  226         /*
  227          * See note in 'kvm_clock_attach()' regarding 'TC_FLAGS_SUSPEND_SAFE';
  228          * conservatively assume that the system time must be re-inited in
  229          * suspend/resume scenarios.
  230          */
  231         kvm_clock_system_time_enable(device_get_softc(dev), &all_cpus);
  232         pvclock_resume();
  233         inittodr(time_second);
  234         return (0);
  235 }
  236 
  237 static int
  238 kvm_clock_gettime(device_t dev, struct timespec *ts)
  239 {
  240         struct kvm_clock_softc *sc = device_get_softc(dev);
  241 
  242         pvclock_gettime(&sc->pvc, ts);
  243         return (0);
  244 }
  245 
  246 static int
  247 kvm_clock_settime(device_t dev, struct timespec *ts)
  248 {
  249         /*
  250          * Even though it is not possible to set the KVM clock's wall clock, to
  251          * avoid the possibility of periodic benign error messages from
  252          * 'settime_task_func()', report success rather than, e.g., 'ENODEV'.
  253          */
  254         return (0);
  255 }
  256 
  257 static int
  258 kvm_clock_tsc_freq_sysctl(SYSCTL_HANDLER_ARGS)
  259 {
  260         struct kvm_clock_softc *sc = oidp->oid_arg1;
  261         uint64_t freq = pvclock_tsc_freq(sc->timeinfos);
  262 
  263         return (sysctl_handle_64(oidp, &freq, 0, req));
  264 }
  265 
  266 static void
  267 kvm_clock_setup_sysctl(device_t dev)
  268 {
  269         struct kvm_clock_softc *sc = device_get_softc(dev);
  270         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
  271         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
  272         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
  273 
  274         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tsc_freq",
  275             CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
  276             kvm_clock_tsc_freq_sysctl, "QU",
  277             "Time Stamp Counter frequency");
  278 }
  279 
  280 static device_method_t kvm_clock_methods[] = {
  281         DEVMETHOD(device_identify,      kvm_clock_identify),
  282         DEVMETHOD(device_probe,         kvm_clock_probe),
  283         DEVMETHOD(device_attach,        kvm_clock_attach),
  284         DEVMETHOD(device_detach,        kvm_clock_detach),
  285         DEVMETHOD(device_suspend,       kvm_clock_suspend),
  286         DEVMETHOD(device_resume,        kvm_clock_resume),
  287         /* clock interface */
  288         DEVMETHOD(clock_gettime,        kvm_clock_gettime),
  289         DEVMETHOD(clock_settime,        kvm_clock_settime),
  290 
  291         DEVMETHOD_END
  292 };
  293 
  294 static driver_t kvm_clock_driver = {
  295         KVM_CLOCK_DEVNAME,
  296         kvm_clock_methods,
  297         sizeof(struct kvm_clock_softc),
  298 };
  299 
  300 DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, 0, 0);
Cache object: dd665d4548fb827c93a4cf16a421d560 
 
 |