The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/x86/x86/pvclock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2009 Adrian Chadd
    3  * Copyright (c) 2012 Spectra Logic Corporation
    4  * Copyright (c) 2014 Bryan Venteicher
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include <sys/param.h>
   33 #include <sys/systm.h>
   34 #include <sys/bus.h>
   35 #include <sys/clock.h>
   36 #include <sys/conf.h>
   37 #include <sys/fcntl.h>
   38 #include <sys/limits.h>
   39 #include <sys/mman.h>
   40 #include <sys/proc.h>
   41 #include <sys/smp.h>
   42 #include <sys/sysctl.h>
   43 #include <sys/vdso.h>
   44 
   45 #include <vm/vm.h>
   46 #include <vm/pmap.h>
   47 
   48 #include <machine/atomic.h>
   49 #include <machine/cpufunc.h>
   50 #include <machine/md_var.h>
   51 #include <machine/pvclock.h>
   52 
   53 /*
   54  * Last system time. This is used to guarantee a monotonically non-decreasing
   55  * clock for the kernel codepath and approximate the same for the vDSO codepath.
   56  * In theory, this should be unnecessary absent hypervisor bug(s) and/or what
   57  * should be rare cases where TSC jitter may still be visible despite the
   58  * hypervisor's best efforts.
   59  */
   60 static volatile uint64_t pvclock_last_systime;
   61 
   62 static uint64_t          pvclock_getsystime(struct pvclock *pvc);
   63 static void              pvclock_read_time_info(
   64     struct pvclock_vcpu_time_info *ti, uint64_t *ns, uint8_t *flags);
   65 static void              pvclock_read_wall_clock(struct pvclock_wall_clock *wc,
   66     struct timespec *ts);
   67 static u_int             pvclock_tc_get_timecount(struct timecounter *tc);
   68 static uint32_t          pvclock_tc_vdso_timehands(
   69     struct vdso_timehands *vdso_th, struct timecounter *tc);
   70 #ifdef COMPAT_FREEBSD32
   71 static uint32_t          pvclock_tc_vdso_timehands32(
   72     struct vdso_timehands32 *vdso_th, struct timecounter *tc);
   73 #endif
   74 
   75 static d_open_t          pvclock_cdev_open;
   76 static d_mmap_t          pvclock_cdev_mmap;
   77 
   78 static struct cdevsw     pvclock_cdev_cdevsw = {
   79         .d_version =    D_VERSION,
   80         .d_name =       PVCLOCK_CDEVNAME,
   81         .d_open =       pvclock_cdev_open,
   82         .d_mmap =       pvclock_cdev_mmap,
   83 };
   84 
   85 void
   86 pvclock_resume(void)
   87 {
   88         atomic_store_rel_64(&pvclock_last_systime, 0);
   89 }
   90 
   91 uint64_t
   92 pvclock_tsc_freq(struct pvclock_vcpu_time_info *ti)
   93 {
   94         uint64_t freq;
   95 
   96         freq = (1000000000ULL << 32) / ti->tsc_to_system_mul;
   97         if (ti->tsc_shift < 0)
   98                 freq <<= -ti->tsc_shift;
   99         else
  100                 freq >>= ti->tsc_shift;
  101         return (freq);
  102 }
  103 
  104 static void
  105 pvclock_read_time_info(struct pvclock_vcpu_time_info *ti,
  106     uint64_t *ns, uint8_t *flags)
  107 {
  108         uint64_t delta;
  109         uint32_t version;
  110 
  111         do {
  112                 version = atomic_load_acq_32(&ti->version);
  113                 delta = rdtsc_ordered() - ti->tsc_timestamp;
  114                 *ns = ti->system_time + pvclock_scale_delta(delta,
  115                     ti->tsc_to_system_mul, ti->tsc_shift);
  116                 *flags = ti->flags;
  117                 atomic_thread_fence_acq();
  118         } while ((ti->version & 1) != 0 || ti->version != version);
  119 }
  120 
  121 static void
  122 pvclock_read_wall_clock(struct pvclock_wall_clock *wc, struct timespec *ts)
  123 {
  124         uint32_t version;
  125 
  126         do {
  127                 version = atomic_load_acq_32(&wc->version);
  128                 ts->tv_sec = wc->sec;
  129                 ts->tv_nsec = wc->nsec;
  130                 atomic_thread_fence_acq();
  131         } while ((wc->version & 1) != 0 || wc->version != version);
  132 }
  133 
  134 static uint64_t
  135 pvclock_getsystime(struct pvclock *pvc)
  136 {
  137         uint64_t now, last, ret;
  138         uint8_t flags;
  139 
  140         critical_enter();
  141         pvclock_read_time_info(&pvc->timeinfos[curcpu], &now, &flags);
  142         ret = now;
  143         if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
  144                 last = atomic_load_acq_64(&pvclock_last_systime);
  145                 do {
  146                         if (last > now) {
  147                                 ret = last;
  148                                 break;
  149                         }
  150                 } while (!atomic_fcmpset_rel_64(&pvclock_last_systime, &last,
  151                     now));
  152         }
  153         critical_exit();
  154         return (ret);
  155 }
  156 
  157 /*
  158  * NOTE: Transitional-only; this should be removed after 'dev/xen/timer/timer.c'
  159  * has been migrated to the 'struct pvclock' API.
  160  */
  161 uint64_t
  162 pvclock_get_timecount(struct pvclock_vcpu_time_info *ti)
  163 {
  164         uint64_t now, last, ret;
  165         uint8_t flags;
  166 
  167         pvclock_read_time_info(ti, &now, &flags);
  168         ret = now;
  169         if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
  170                 last = atomic_load_acq_64(&pvclock_last_systime);
  171                 do {
  172                         if (last > now) {
  173                                 ret = last;
  174                                 break;
  175                         }
  176                 } while (!atomic_fcmpset_rel_64(&pvclock_last_systime, &last,
  177                     now));
  178         }
  179         return (ret);
  180 }
  181 
  182 /*
  183  * NOTE: Transitional-only; this should be removed after 'dev/xen/timer/timer.c'
  184  * has been migrated to the 'struct pvclock' API.
  185  */
  186 void
  187 pvclock_get_wallclock(struct pvclock_wall_clock *wc, struct timespec *ts)
  188 {
  189         pvclock_read_wall_clock(wc, ts);
  190 }
  191 
  192 static int
  193 pvclock_cdev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
  194 {
  195         if (oflags & FWRITE)
  196                 return (EPERM);
  197         return (0);
  198 }
  199 
  200 static int
  201 pvclock_cdev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
  202     int nprot, vm_memattr_t *memattr)
  203 {
  204         if (offset >= mp_ncpus * sizeof(struct pvclock_vcpu_time_info))
  205                 return (EINVAL);
  206         if (PROT_EXTRACT(nprot) != PROT_READ)
  207                 return (EACCES);
  208         *paddr = vtophys((uintptr_t)dev->si_drv1 + offset);
  209         *memattr = VM_MEMATTR_DEFAULT;
  210         return (0);
  211 }
  212 
  213 static u_int
  214 pvclock_tc_get_timecount(struct timecounter *tc)
  215 {
  216         struct pvclock *pvc = tc->tc_priv;
  217 
  218         return (pvclock_getsystime(pvc) & UINT_MAX);
  219 }
  220 
  221 static uint32_t
  222 pvclock_tc_vdso_timehands(struct vdso_timehands *vdso_th,
  223     struct timecounter *tc)
  224 {
  225         struct pvclock *pvc = tc->tc_priv;
  226 
  227         vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
  228         vdso_th->th_x86_shift = 0;
  229         vdso_th->th_x86_hpet_idx = 0;
  230         vdso_th->th_x86_pvc_last_systime =
  231             atomic_load_acq_64(&pvclock_last_systime);
  232         vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable &&
  233             pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0;
  234         bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
  235         return (pvc->cdev != NULL && amd_feature & AMDID_RDTSCP);
  236 }
  237 
  238 #ifdef COMPAT_FREEBSD32
  239 static uint32_t
  240 pvclock_tc_vdso_timehands32(struct vdso_timehands32 *vdso_th,
  241     struct timecounter *tc)
  242 {
  243         struct pvclock *pvc = tc->tc_priv;
  244 
  245         vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
  246         vdso_th->th_x86_shift = 0;
  247         vdso_th->th_x86_hpet_idx = 0;
  248         vdso_th->th_x86_pvc_last_systime =
  249             atomic_load_acq_64(&pvclock_last_systime);
  250         vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable &&
  251             pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0;
  252         bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
  253         return (pvc->cdev != NULL && amd_feature & AMDID_RDTSCP);
  254 }
  255 #endif
  256 
  257 void
  258 pvclock_gettime(struct pvclock *pvc, struct timespec *ts)
  259 {
  260         struct timespec system_ts;
  261         uint64_t system_ns;
  262 
  263         pvclock_read_wall_clock(pvc->get_wallclock(pvc->get_wallclock_arg), ts);
  264         system_ns = pvclock_getsystime(pvc);
  265         system_ts.tv_sec = system_ns / 1000000000ULL;
  266         system_ts.tv_nsec = system_ns % 1000000000ULL;
  267         timespecadd(ts, &system_ts, ts);
  268 }
  269 
  270 void
  271 pvclock_init(struct pvclock *pvc, device_t dev, const char *tc_name,
  272     int tc_quality, u_int tc_flags)
  273 {
  274         struct make_dev_args mda;
  275         int err;
  276 
  277         KASSERT(((uintptr_t)pvc->timeinfos & PAGE_MASK) == 0,
  278             ("Specified time info page(s) address is not page-aligned."));
  279 
  280         /* Set up vDSO stable-flag suppression test facility: */
  281         pvc->vdso_force_unstable = false;
  282         SYSCTL_ADD_BOOL(device_get_sysctl_ctx(dev),
  283             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
  284             "vdso_force_unstable", CTLFLAG_RW, &pvc->vdso_force_unstable, 0,
  285             "Forcibly deassert stable flag in vDSO codepath");
  286 
  287         /* Set up timecounter and timecounter-supporting members: */
  288         pvc->tc.tc_get_timecount = pvclock_tc_get_timecount;
  289         pvc->tc.tc_poll_pps = NULL;
  290         pvc->tc.tc_counter_mask = ~0U;
  291         pvc->tc.tc_frequency = 1000000000ULL;
  292         pvc->tc.tc_name = tc_name;
  293         pvc->tc.tc_quality = tc_quality;
  294         pvc->tc.tc_flags = tc_flags;
  295         pvc->tc.tc_priv = pvc;
  296         pvc->tc.tc_fill_vdso_timehands = pvclock_tc_vdso_timehands;
  297 #ifdef COMPAT_FREEBSD32
  298         pvc->tc.tc_fill_vdso_timehands32 = pvclock_tc_vdso_timehands32;
  299 #endif
  300 
  301         /* Set up cdev for userspace mmapping of vCPU 0 time info page: */
  302         make_dev_args_init(&mda);
  303         mda.mda_devsw = &pvclock_cdev_cdevsw;
  304         mda.mda_uid = UID_ROOT;
  305         mda.mda_gid = GID_WHEEL;
  306         mda.mda_mode = 0444;
  307         mda.mda_si_drv1 = pvc->timeinfos;
  308         err = make_dev_s(&mda, &pvc->cdev, PVCLOCK_CDEVNAME);
  309         if (err != 0) {
  310                 device_printf(dev, "Could not create /dev/%s, error %d. Fast "
  311                     "time of day will be unavailable for this timecounter.\n",
  312                     PVCLOCK_CDEVNAME, err);
  313                 KASSERT(pvc->cdev == NULL,
  314                     ("Failed make_dev_s() unexpectedly inited cdev."));
  315         }
  316 
  317         /* Register timecounter: */
  318         tc_init(&pvc->tc);
  319 
  320         /*
  321          * Register wallclock:
  322          *     The RTC registration API expects a resolution in microseconds;
  323          *     pvclock's 1ns resolution is rounded up to 1us.
  324          */
  325         clock_register(dev, 1);
  326 }
  327 
  328 int
  329 pvclock_destroy(struct pvclock *pvc)
  330 {
  331         /*
  332          * Not currently possible since there is no teardown counterpart of
  333          * 'tc_init()'.
  334          */
  335         return (EBUSY);
  336 }

Cache object: eaaa1afa60341358fd767c8d345d6306


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.