The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_clock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2003,2004 The DragonFly Project.  All rights reserved.
    3  * 
    4  * This code is derived from software contributed to The DragonFly Project
    5  * by Matthew Dillon <dillon@backplane.com>
    6  * 
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in
   15  *    the documentation and/or other materials provided with the
   16  *    distribution.
   17  * 3. Neither the name of The DragonFly Project nor the names of its
   18  *    contributors may be used to endorse or promote products derived
   19  *    from this software without specific, prior written permission.
   20  * 
   21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
   25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  * 
   34  * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
   35  * Copyright (c) 1982, 1986, 1991, 1993
   36  *      The Regents of the University of California.  All rights reserved.
   37  * (c) UNIX System Laboratories, Inc.
   38  * All or some portions of this file are derived from material licensed
   39  * to the University of California by American Telephone and Telegraph
   40  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   41  * the permission of UNIX System Laboratories, Inc.
   42  *
   43  * Redistribution and use in source and binary forms, with or without
   44  * modification, are permitted provided that the following conditions
   45  * are met:
   46  * 1. Redistributions of source code must retain the above copyright
   47  *    notice, this list of conditions and the following disclaimer.
   48  * 2. Redistributions in binary form must reproduce the above copyright
   49  *    notice, this list of conditions and the following disclaimer in the
   50  *    documentation and/or other materials provided with the distribution.
   51  * 3. Neither the name of the University nor the names of its contributors
   52  *    may be used to endorse or promote products derived from this software
   53  *    without specific prior written permission.
   54  *
   55  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   65  * SUCH DAMAGE.
   66  *
   67  *      @(#)kern_clock.c        8.5 (Berkeley) 1/21/94
   68  * $FreeBSD: src/sys/kern/kern_clock.c,v 1.105.2.10 2002/10/17 13:19:40 maxim Exp $
   69  */
   70 
   71 #include "opt_ntp.h"
   72 #include "opt_ifpoll.h"
   73 #include "opt_pctrack.h"
   74 
   75 #include <sys/param.h>
   76 #include <sys/systm.h>
   77 #include <sys/callout.h>
   78 #include <sys/kernel.h>
   79 #include <sys/kinfo.h>
   80 #include <sys/proc.h>
   81 #include <sys/malloc.h>
   82 #include <sys/resource.h>
   83 #include <sys/resourcevar.h>
   84 #include <sys/signalvar.h>
   85 #include <sys/timex.h>
   86 #include <sys/timepps.h>
   87 #include <vm/vm.h>
   88 #include <sys/lock.h>
   89 #include <vm/pmap.h>
   90 #include <vm/vm_map.h>
   91 #include <vm/vm_extern.h>
   92 #include <sys/sysctl.h>
   93 
   94 #include <sys/thread2.h>
   95 
   96 #include <machine/cpu.h>
   97 #include <machine/limits.h>
   98 #include <machine/smp.h>
   99 #include <machine/cpufunc.h>
  100 #include <machine/specialreg.h>
  101 #include <machine/clock.h>
  102 
  103 #ifdef GPROF
  104 #include <sys/gmon.h>
  105 #endif
  106 
  107 #ifdef IFPOLL_ENABLE
  108 extern void ifpoll_init_pcpu(int);
  109 #endif
  110 
  111 #ifdef DEBUG_PCTRACK
  112 static void do_pctrack(struct intrframe *frame, int which);
  113 #endif
  114 
  115 static void initclocks (void *dummy);
  116 SYSINIT(clocks, SI_BOOT2_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
  117 
  118 /*
  119  * Some of these don't belong here, but it's easiest to concentrate them.
  120  * Note that cpu_time counts in microseconds, but most userland programs
  121  * just compare relative times against the total by delta.
  122  */
  123 struct kinfo_cputime cputime_percpu[MAXCPU];
  124 #ifdef DEBUG_PCTRACK
  125 struct kinfo_pcheader cputime_pcheader = { PCTRACK_SIZE, PCTRACK_ARYSIZE };
  126 struct kinfo_pctrack cputime_pctrack[MAXCPU][PCTRACK_SIZE];
  127 #endif
  128 
  129 static int
  130 sysctl_cputime(SYSCTL_HANDLER_ARGS)
  131 {
  132         int cpu, error = 0;
  133         size_t size = sizeof(struct kinfo_cputime);
  134 
  135         for (cpu = 0; cpu < ncpus; ++cpu) {
  136                 if ((error = SYSCTL_OUT(req, &cputime_percpu[cpu], size)))
  137                         break;
  138         }
  139 
  140         return (error);
  141 }
  142 SYSCTL_PROC(_kern, OID_AUTO, cputime, (CTLTYPE_OPAQUE|CTLFLAG_RD), 0, 0,
  143         sysctl_cputime, "S,kinfo_cputime", "CPU time statistics");
  144 
  145 static int
  146 sysctl_cp_time(SYSCTL_HANDLER_ARGS)
  147 {
  148         long cpu_states[5] = {0};
  149         int cpu, error = 0;
  150         size_t size = sizeof(cpu_states);
  151 
  152         for (cpu = 0; cpu < ncpus; ++cpu) {
  153                 cpu_states[CP_USER] += cputime_percpu[cpu].cp_user;
  154                 cpu_states[CP_NICE] += cputime_percpu[cpu].cp_nice;
  155                 cpu_states[CP_SYS] += cputime_percpu[cpu].cp_sys;
  156                 cpu_states[CP_INTR] += cputime_percpu[cpu].cp_intr;
  157                 cpu_states[CP_IDLE] += cputime_percpu[cpu].cp_idle;
  158         }
  159 
  160         error = SYSCTL_OUT(req, cpu_states, size);
  161 
  162         return (error);
  163 }
  164 
  165 SYSCTL_PROC(_kern, OID_AUTO, cp_time, (CTLTYPE_LONG|CTLFLAG_RD), 0, 0,
  166         sysctl_cp_time, "LU", "CPU time statistics");
  167 
  168 /*
  169  * boottime is used to calculate the 'real' uptime.  Do not confuse this with
  170  * microuptime().  microtime() is not drift compensated.  The real uptime
  171  * with compensation is nanotime() - bootime.  boottime is recalculated
  172  * whenever the real time is set based on the compensated elapsed time
  173  * in seconds (gd->gd_time_seconds).
  174  *
  175  * The gd_time_seconds and gd_cpuclock_base fields remain fairly monotonic.
  176  * Slight adjustments to gd_cpuclock_base are made to phase-lock it to
  177  * the real time.
  178  */
  179 struct timespec boottime;       /* boot time (realtime) for reference only */
  180 time_t time_second;             /* read-only 'passive' uptime in seconds */
  181 time_t time_uptime;             /* read-only 'passive' uptime in seconds */
  182 
  183 /*
  184  * basetime is used to calculate the compensated real time of day.  The
  185  * basetime can be modified on a per-tick basis by the adjtime(), 
  186  * ntp_adjtime(), and sysctl-based time correction APIs.
  187  *
  188  * Note that frequency corrections can also be made by adjusting
  189  * gd_cpuclock_base.
  190  *
  191  * basetime is a tail-chasing FIFO, updated only by cpu #0.  The FIFO is
  192  * used on both SMP and UP systems to avoid MP races between cpu's and
  193  * interrupt races on UP systems.
  194  */
  195 #define BASETIME_ARYSIZE        16
  196 #define BASETIME_ARYMASK        (BASETIME_ARYSIZE - 1)
  197 static struct timespec basetime[BASETIME_ARYSIZE];
  198 static volatile int basetime_index;
  199 
  200 static int
  201 sysctl_get_basetime(SYSCTL_HANDLER_ARGS)
  202 {
  203         struct timespec *bt;
  204         int error;
  205         int index;
  206 
  207         /*
  208          * Because basetime data and index may be updated by another cpu,
  209          * a load fence is required to ensure that the data we read has
  210          * not been speculatively read relative to a possibly updated index.
  211          */
  212         index = basetime_index;
  213         cpu_lfence();
  214         bt = &basetime[index];
  215         error = SYSCTL_OUT(req, bt, sizeof(*bt));
  216         return (error);
  217 }
  218 
  219 SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD,
  220     &boottime, timespec, "System boottime");
  221 SYSCTL_PROC(_kern, OID_AUTO, basetime, CTLTYPE_STRUCT|CTLFLAG_RD, 0, 0,
  222     sysctl_get_basetime, "S,timespec", "System basetime");
  223 
  224 static void hardclock(systimer_t info, int, struct intrframe *frame);
  225 static void statclock(systimer_t info, int, struct intrframe *frame);
  226 static void schedclock(systimer_t info, int, struct intrframe *frame);
  227 static void getnanotime_nbt(struct timespec *nbt, struct timespec *tsp);
  228 
  229 int     ticks;                  /* system master ticks at hz */
  230 int     clocks_running;         /* tsleep/timeout clocks operational */
  231 int64_t nsec_adj;               /* ntpd per-tick adjustment in nsec << 32 */
  232 int64_t nsec_acc;               /* accumulator */
  233 int     sched_ticks;            /* global schedule clock ticks */
  234 
  235 /* NTPD time correction fields */
  236 int64_t ntp_tick_permanent;     /* per-tick adjustment in nsec << 32 */
  237 int64_t ntp_tick_acc;           /* accumulator for per-tick adjustment */
  238 int64_t ntp_delta;              /* one-time correction in nsec */
  239 int64_t ntp_big_delta = 1000000000;
  240 int32_t ntp_tick_delta;         /* current adjustment rate */
  241 int32_t ntp_default_tick_delta; /* adjustment rate for ntp_delta */
  242 time_t  ntp_leap_second;        /* time of next leap second */
  243 int     ntp_leap_insert;        /* whether to insert or remove a second */
  244 
  245 /*
  246  * Finish initializing clock frequencies and start all clocks running.
  247  */
  248 /* ARGSUSED*/
  249 static void
  250 initclocks(void *dummy)
  251 {
  252         /*psratio = profhz / stathz;*/
  253         initclocks_pcpu();
  254         clocks_running = 1;
  255 }
  256 
  257 /*
  258  * Called on a per-cpu basis
  259  */
  260 void
  261 initclocks_pcpu(void)
  262 {
  263         struct globaldata *gd = mycpu;
  264 
  265         crit_enter();
  266         if (gd->gd_cpuid == 0) {
  267             gd->gd_time_seconds = 1;
  268             gd->gd_cpuclock_base = sys_cputimer->count();
  269         } else {
  270             /* XXX */
  271             gd->gd_time_seconds = globaldata_find(0)->gd_time_seconds;
  272             gd->gd_cpuclock_base = globaldata_find(0)->gd_cpuclock_base;
  273         }
  274 
  275         systimer_intr_enable();
  276 
  277 #ifdef IFPOLL_ENABLE
  278         ifpoll_init_pcpu(gd->gd_cpuid);
  279 #endif
  280 
  281         /*
  282          * Use a non-queued periodic systimer to prevent multiple ticks from
  283          * building up if the sysclock jumps forward (8254 gets reset).  The
  284          * sysclock will never jump backwards.  Our time sync is based on
  285          * the actual sysclock, not the ticks count.
  286          */
  287         systimer_init_periodic_nq(&gd->gd_hardclock, hardclock, NULL, hz);
  288         systimer_init_periodic_nq(&gd->gd_statclock, statclock, NULL, stathz);
  289         /* XXX correct the frequency for scheduler / estcpu tests */
  290         systimer_init_periodic_nq(&gd->gd_schedclock, schedclock, 
  291                                 NULL, ESTCPUFREQ); 
  292         crit_exit();
  293 }
  294 
  295 /*
  296  * This sets the current real time of day.  Timespecs are in seconds and
  297  * nanoseconds.  We do not mess with gd_time_seconds and gd_cpuclock_base,
  298  * instead we adjust basetime so basetime + gd_* results in the current
  299  * time of day.  This way the gd_* fields are guarenteed to represent
  300  * a monotonically increasing 'uptime' value.
  301  *
  302  * When set_timeofday() is called from userland, the system call forces it
  303  * onto cpu #0 since only cpu #0 can update basetime_index.
  304  */
  305 void
  306 set_timeofday(struct timespec *ts)
  307 {
  308         struct timespec *nbt;
  309         int ni;
  310 
  311         /*
  312          * XXX SMP / non-atomic basetime updates
  313          */
  314         crit_enter();
  315         ni = (basetime_index + 1) & BASETIME_ARYMASK;
  316         nbt = &basetime[ni];
  317         nanouptime(nbt);
  318         nbt->tv_sec = ts->tv_sec - nbt->tv_sec;
  319         nbt->tv_nsec = ts->tv_nsec - nbt->tv_nsec;
  320         if (nbt->tv_nsec < 0) {
  321             nbt->tv_nsec += 1000000000;
  322             --nbt->tv_sec;
  323         }
  324 
  325         /*
  326          * Note that basetime diverges from boottime as the clock drift is
  327          * compensated for, so we cannot do away with boottime.  When setting
  328          * the absolute time of day the drift is 0 (for an instant) and we
  329          * can simply assign boottime to basetime.  
  330          *
  331          * Note that nanouptime() is based on gd_time_seconds which is drift
  332          * compensated up to a point (it is guarenteed to remain monotonically
  333          * increasing).  gd_time_seconds is thus our best uptime guess and
  334          * suitable for use in the boottime calculation.  It is already taken
  335          * into account in the basetime calculation above.
  336          */
  337         boottime.tv_sec = nbt->tv_sec;
  338         ntp_delta = 0;
  339 
  340         /*
  341          * We now have a new basetime, make sure all other cpus have it,
  342          * then update the index.
  343          */
  344         cpu_sfence();
  345         basetime_index = ni;
  346 
  347         crit_exit();
  348 }
  349         
  350 /*
  351  * Each cpu has its own hardclock, but we only increments ticks and softticks
  352  * on cpu #0.
  353  *
  354  * NOTE! systimer! the MP lock might not be held here.  We can only safely
  355  * manipulate objects owned by the current cpu.
  356  */
  357 static void
  358 hardclock(systimer_t info, int in_ipi __unused, struct intrframe *frame)
  359 {
  360         sysclock_t cputicks;
  361         struct proc *p;
  362         struct globaldata *gd = mycpu;
  363 
  364         /*
  365          * Realtime updates are per-cpu.  Note that timer corrections as
  366          * returned by microtime() and friends make an additional adjustment
  367          * using a system-wise 'basetime', but the running time is always
  368          * taken from the per-cpu globaldata area.  Since the same clock
  369          * is distributing (XXX SMP) to all cpus, the per-cpu timebases
  370          * stay in synch.
  371          *
  372          * Note that we never allow info->time (aka gd->gd_hardclock.time)
  373          * to reverse index gd_cpuclock_base, but that it is possible for
  374          * it to temporarily get behind in the seconds if something in the
  375          * system locks interrupts for a long period of time.  Since periodic
  376          * timers count events, though everything should resynch again
  377          * immediately.
  378          */
  379         cputicks = info->time - gd->gd_cpuclock_base;
  380         if (cputicks >= sys_cputimer->freq) {
  381                 ++gd->gd_time_seconds;
  382                 gd->gd_cpuclock_base += sys_cputimer->freq;
  383                 if (gd->gd_cpuid == 0)
  384                         ++time_uptime;  /* uncorrected monotonic 1-sec gran */
  385         }
  386 
  387         /*
  388          * The system-wide ticks counter and NTP related timedelta/tickdelta
  389          * adjustments only occur on cpu #0.  NTP adjustments are accomplished
  390          * by updating basetime.
  391          */
  392         if (gd->gd_cpuid == 0) {
  393             struct timespec *nbt;
  394             struct timespec nts;
  395             int leap;
  396             int ni;
  397 
  398             ++ticks;
  399 
  400 #if 0
  401             if (tco->tc_poll_pps) 
  402                 tco->tc_poll_pps(tco);
  403 #endif
  404 
  405             /*
  406              * Calculate the new basetime index.  We are in a critical section
  407              * on cpu #0 and can safely play with basetime_index.  Start
  408              * with the current basetime and then make adjustments.
  409              */
  410             ni = (basetime_index + 1) & BASETIME_ARYMASK;
  411             nbt = &basetime[ni];
  412             *nbt = basetime[basetime_index];
  413 
  414             /*
  415              * Apply adjtime corrections.  (adjtime() API)
  416              *
  417              * adjtime() only runs on cpu #0 so our critical section is
  418              * sufficient to access these variables.
  419              */
  420             if (ntp_delta != 0) {
  421                 nbt->tv_nsec += ntp_tick_delta;
  422                 ntp_delta -= ntp_tick_delta;
  423                 if ((ntp_delta > 0 && ntp_delta < ntp_tick_delta) ||
  424                     (ntp_delta < 0 && ntp_delta > ntp_tick_delta)) {
  425                         ntp_tick_delta = ntp_delta;
  426                 }
  427             }
  428 
  429             /*
  430              * Apply permanent frequency corrections.  (sysctl API)
  431              */
  432             if (ntp_tick_permanent != 0) {
  433                 ntp_tick_acc += ntp_tick_permanent;
  434                 if (ntp_tick_acc >= (1LL << 32)) {
  435                     nbt->tv_nsec += ntp_tick_acc >> 32;
  436                     ntp_tick_acc -= (ntp_tick_acc >> 32) << 32;
  437                 } else if (ntp_tick_acc <= -(1LL << 32)) {
  438                     /* Negate ntp_tick_acc to avoid shifting the sign bit. */
  439                     nbt->tv_nsec -= (-ntp_tick_acc) >> 32;
  440                     ntp_tick_acc += ((-ntp_tick_acc) >> 32) << 32;
  441                 }
  442             }
  443 
  444             if (nbt->tv_nsec >= 1000000000) {
  445                     nbt->tv_sec++;
  446                     nbt->tv_nsec -= 1000000000;
  447             } else if (nbt->tv_nsec < 0) {
  448                     nbt->tv_sec--;
  449                     nbt->tv_nsec += 1000000000;
  450             }
  451 
  452             /*
  453              * Another per-tick compensation.  (for ntp_adjtime() API)
  454              */
  455             if (nsec_adj != 0) {
  456                 nsec_acc += nsec_adj;
  457                 if (nsec_acc >= 0x100000000LL) {
  458                     nbt->tv_nsec += nsec_acc >> 32;
  459                     nsec_acc = (nsec_acc & 0xFFFFFFFFLL);
  460                 } else if (nsec_acc <= -0x100000000LL) {
  461                     nbt->tv_nsec -= -nsec_acc >> 32;
  462                     nsec_acc = -(-nsec_acc & 0xFFFFFFFFLL);
  463                 }
  464                 if (nbt->tv_nsec >= 1000000000) {
  465                     nbt->tv_nsec -= 1000000000;
  466                     ++nbt->tv_sec;
  467                 } else if (nbt->tv_nsec < 0) {
  468                     nbt->tv_nsec += 1000000000;
  469                     --nbt->tv_sec;
  470                 }
  471             }
  472 
  473             /************************************************************
  474              *                  LEAP SECOND CORRECTION                  *
  475              ************************************************************
  476              *
  477              * Taking into account all the corrections made above, figure
  478              * out the new real time.  If the seconds field has changed
  479              * then apply any pending leap-second corrections.
  480              */
  481             getnanotime_nbt(nbt, &nts);
  482 
  483             if (time_second != nts.tv_sec) {
  484                 /*
  485                  * Apply leap second (sysctl API).  Adjust nts for changes
  486                  * so we do not have to call getnanotime_nbt again.
  487                  */
  488                 if (ntp_leap_second) {
  489                     if (ntp_leap_second == nts.tv_sec) {
  490                         if (ntp_leap_insert) {
  491                             nbt->tv_sec++;
  492                             nts.tv_sec++;
  493                         } else {
  494                             nbt->tv_sec--;
  495                             nts.tv_sec--;
  496                         }
  497                         ntp_leap_second--;
  498                     }
  499                 }
  500 
  501                 /*
  502                  * Apply leap second (ntp_adjtime() API), calculate a new
  503                  * nsec_adj field.  ntp_update_second() returns nsec_adj
  504                  * as a per-second value but we need it as a per-tick value.
  505                  */
  506                 leap = ntp_update_second(time_second, &nsec_adj);
  507                 nsec_adj /= hz;
  508                 nbt->tv_sec += leap;
  509                 nts.tv_sec += leap;
  510 
  511                 /*
  512                  * Update the time_second 'approximate time' global.
  513                  */
  514                 time_second = nts.tv_sec;
  515             }
  516 
  517             /*
  518              * Finally, our new basetime is ready to go live!
  519              */
  520             cpu_sfence();
  521             basetime_index = ni;
  522         }
  523 
  524         /*
  525          * lwkt thread scheduler fair queueing
  526          */
  527         lwkt_schedulerclock(curthread);
  528 
  529         /*
  530          * softticks are handled for all cpus
  531          */
  532         hardclock_softtick(gd);
  533 
  534         /*
  535          * ITimer handling is per-tick, per-cpu.
  536          *
  537          * We must acquire the per-process token in order for ksignal()
  538          * to be non-blocking.  For the moment this requires an AST fault,
  539          * the ksignal() cannot be safely issued from this hard interrupt.
  540          *
  541          * XXX Even the trytoken here isn't right, and itimer operation in
  542          *     a multi threaded environment is going to be weird at the
  543          *     very least.
  544          */
  545         if ((p = curproc) != NULL && lwkt_trytoken(&p->p_token)) {
  546                 crit_enter_hard();
  547                 if (frame && CLKF_USERMODE(frame) &&
  548                     timevalisset(&p->p_timer[ITIMER_VIRTUAL].it_value) &&
  549                     itimerdecr(&p->p_timer[ITIMER_VIRTUAL], ustick) == 0) {
  550                         p->p_flags |= P_SIGVTALRM;
  551                         need_user_resched();
  552                 }
  553                 if (timevalisset(&p->p_timer[ITIMER_PROF].it_value) &&
  554                     itimerdecr(&p->p_timer[ITIMER_PROF], ustick) == 0) {
  555                         p->p_flags |= P_SIGPROF;
  556                         need_user_resched();
  557                 }
  558                 crit_exit_hard();
  559                 lwkt_reltoken(&p->p_token);
  560         }
  561         setdelayed();
  562 }
  563 
  564 /*
  565  * The statistics clock typically runs at a 125Hz rate, and is intended
  566  * to be frequency offset from the hardclock (typ 100Hz).  It is per-cpu.
  567  *
  568  * NOTE! systimer! the MP lock might not be held here.  We can only safely
  569  * manipulate objects owned by the current cpu.
  570  *
  571  * The stats clock is responsible for grabbing a profiling sample.
  572  * Most of the statistics are only used by user-level statistics programs.
  573  * The main exceptions are p->p_uticks, p->p_sticks, p->p_iticks, and
  574  * p->p_estcpu.
  575  *
  576  * Like the other clocks, the stat clock is called from what is effectively
  577  * a fast interrupt, so the context should be the thread/process that got
  578  * interrupted.
  579  */
  580 static void
  581 statclock(systimer_t info, int in_ipi, struct intrframe *frame)
  582 {
  583 #ifdef GPROF
  584         struct gmonparam *g;
  585         int i;
  586 #endif
  587         thread_t td;
  588         struct proc *p;
  589         int bump;
  590         struct timeval tv;
  591         struct timeval *stv;
  592 
  593         /*
  594          * How big was our timeslice relative to the last time?
  595          */
  596         microuptime(&tv);       /* mpsafe */
  597         stv = &mycpu->gd_stattv;
  598         if (stv->tv_sec == 0) {
  599             bump = 1;
  600         } else {
  601             bump = tv.tv_usec - stv->tv_usec +
  602                 (tv.tv_sec - stv->tv_sec) * 1000000;
  603             if (bump < 0)
  604                 bump = 0;
  605             if (bump > 1000000)
  606                 bump = 1000000;
  607         }
  608         *stv = tv;
  609 
  610         td = curthread;
  611         p = td->td_proc;
  612 
  613         if (frame && CLKF_USERMODE(frame)) {
  614                 /*
  615                  * Came from userland, handle user time and deal with
  616                  * possible process.
  617                  */
  618                 if (p && (p->p_flags & P_PROFIL))
  619                         addupc_intr(p, CLKF_PC(frame), 1);
  620                 td->td_uticks += bump;
  621 
  622                 /*
  623                  * Charge the time as appropriate
  624                  */
  625                 if (p && p->p_nice > NZERO)
  626                         cpu_time.cp_nice += bump;
  627                 else
  628                         cpu_time.cp_user += bump;
  629         } else {
  630                 int intr_nest = mycpu->gd_intr_nesting_level;
  631 
  632                 if (in_ipi) {
  633                         /*
  634                          * IPI processing code will bump gd_intr_nesting_level
  635                          * up by one, which breaks following CLKF_INTR testing,
  636                          * so we substract it by one here.
  637                          */
  638                         --intr_nest;
  639                 }
  640 #ifdef GPROF
  641                 /*
  642                  * Kernel statistics are just like addupc_intr, only easier.
  643                  */
  644                 g = &_gmonparam;
  645                 if (g->state == GMON_PROF_ON && frame) {
  646                         i = CLKF_PC(frame) - g->lowpc;
  647                         if (i < g->textsize) {
  648                                 i /= HISTFRACTION * sizeof(*g->kcount);
  649                                 g->kcount[i]++;
  650                         }
  651                 }
  652 #endif
  653 
  654 #define IS_INTR_RUNNING ((frame && CLKF_INTR(intr_nest)) || CLKF_INTR_TD(td))
  655 
  656                 /*
  657                  * Came from kernel mode, so we were:
  658                  * - handling an interrupt,
  659                  * - doing syscall or trap work on behalf of the current
  660                  *   user process, or
  661                  * - spinning in the idle loop.
  662                  * Whichever it is, charge the time as appropriate.
  663                  * Note that we charge interrupts to the current process,
  664                  * regardless of whether they are ``for'' that process,
  665                  * so that we know how much of its real time was spent
  666                  * in ``non-process'' (i.e., interrupt) work.
  667                  *
  668                  * XXX assume system if frame is NULL.  A NULL frame 
  669                  * can occur if ipi processing is done from a crit_exit().
  670                  */
  671                 if (IS_INTR_RUNNING)
  672                         td->td_iticks += bump;
  673                 else
  674                         td->td_sticks += bump;
  675 
  676                 if (IS_INTR_RUNNING) {
  677                         /*
  678                          * If we interrupted an interrupt thread, well,
  679                          * count it as interrupt time.
  680                          */
  681 #ifdef DEBUG_PCTRACK
  682                         if (frame)
  683                                 do_pctrack(frame, PCTRACK_INT);
  684 #endif
  685                         cpu_time.cp_intr += bump;
  686                 } else {
  687                         if (td == &mycpu->gd_idlethread) {
  688                                 /*
  689                                  * Even if the current thread is the idle
  690                                  * thread it could be due to token contention
  691                                  * in the LWKT scheduler.  Count such as
  692                                  * system time.
  693                                  */
  694                                 if (mycpu->gd_reqflags & RQF_AST_LWKT_RESCHED)
  695                                         cpu_time.cp_sys += bump;
  696                                 else
  697                                         cpu_time.cp_idle += bump;
  698                         } else {
  699                                 /*
  700                                  * System thread was running.
  701                                  */
  702 #ifdef DEBUG_PCTRACK
  703                                 if (frame)
  704                                         do_pctrack(frame, PCTRACK_SYS);
  705 #endif
  706                                 cpu_time.cp_sys += bump;
  707                         }
  708                 }
  709 
  710 #undef IS_INTR_RUNNING
  711         }
  712 }
  713 
  714 #ifdef DEBUG_PCTRACK
  715 /*
  716  * Sample the PC when in the kernel or in an interrupt.  User code can
  717  * retrieve the information and generate a histogram or other output.
  718  */
  719 
  720 static void
  721 do_pctrack(struct intrframe *frame, int which)
  722 {
  723         struct kinfo_pctrack *pctrack;
  724 
  725         pctrack = &cputime_pctrack[mycpu->gd_cpuid][which];
  726         pctrack->pc_array[pctrack->pc_index & PCTRACK_ARYMASK] = 
  727                 (void *)CLKF_PC(frame);
  728         ++pctrack->pc_index;
  729 }
  730 
  731 static int
  732 sysctl_pctrack(SYSCTL_HANDLER_ARGS)
  733 {
  734         struct kinfo_pcheader head;
  735         int error;
  736         int cpu;
  737         int ntrack;
  738 
  739         head.pc_ntrack = PCTRACK_SIZE;
  740         head.pc_arysize = PCTRACK_ARYSIZE;
  741 
  742         if ((error = SYSCTL_OUT(req, &head, sizeof(head))) != 0)
  743                 return (error);
  744 
  745         for (cpu = 0; cpu < ncpus; ++cpu) {
  746                 for (ntrack = 0; ntrack < PCTRACK_SIZE; ++ntrack) {
  747                         error = SYSCTL_OUT(req, &cputime_pctrack[cpu][ntrack],
  748                                            sizeof(struct kinfo_pctrack));
  749                         if (error)
  750                                 break;
  751                 }
  752                 if (error)
  753                         break;
  754         }
  755         return (error);
  756 }
  757 SYSCTL_PROC(_kern, OID_AUTO, pctrack, (CTLTYPE_OPAQUE|CTLFLAG_RD), 0, 0,
  758         sysctl_pctrack, "S,kinfo_pcheader", "CPU PC tracking");
  759 
  760 #endif
  761 
  762 /*
  763  * The scheduler clock typically runs at a 50Hz rate.  NOTE! systimer,
  764  * the MP lock might not be held.  We can safely manipulate parts of curproc
  765  * but that's about it.
  766  *
  767  * Each cpu has its own scheduler clock.
  768  */
  769 static void
  770 schedclock(systimer_t info, int in_ipi __unused, struct intrframe *frame)
  771 {
  772         struct lwp *lp;
  773         struct rusage *ru;
  774         struct vmspace *vm;
  775         long rss;
  776 
  777         if ((lp = lwkt_preempted_proc()) != NULL) {
  778                 /*
  779                  * Account for cpu time used and hit the scheduler.  Note
  780                  * that this call MUST BE MP SAFE, and the BGL IS NOT HELD
  781                  * HERE.
  782                  */
  783                 ++lp->lwp_cpticks;
  784                 usched_schedulerclock(lp, info->periodic, info->time);
  785         } else {
  786                 usched_schedulerclock(NULL, info->periodic, info->time);
  787         }
  788         if ((lp = curthread->td_lwp) != NULL) {
  789                 /*
  790                  * Update resource usage integrals and maximums.
  791                  */
  792                 if ((ru = &lp->lwp_proc->p_ru) &&
  793                     (vm = lp->lwp_proc->p_vmspace) != NULL) {
  794                         ru->ru_ixrss += pgtok(vm->vm_tsize);
  795                         ru->ru_idrss += pgtok(vm->vm_dsize);
  796                         ru->ru_isrss += pgtok(vm->vm_ssize);
  797                         if (lwkt_trytoken(&vm->vm_map.token)) {
  798                                 rss = pgtok(vmspace_resident_count(vm));
  799                                 if (ru->ru_maxrss < rss)
  800                                         ru->ru_maxrss = rss;
  801                                 lwkt_reltoken(&vm->vm_map.token);
  802                         }
  803                 }
  804         }
  805         /* Increment the global sched_ticks */
  806         if (mycpu->gd_cpuid == 0)
  807                 ++sched_ticks;
  808 }
  809 
  810 /*
  811  * Compute number of ticks for the specified amount of time.  The 
  812  * return value is intended to be used in a clock interrupt timed
  813  * operation and guarenteed to meet or exceed the requested time.
  814  * If the representation overflows, return INT_MAX.  The minimum return
  815  * value is 1 ticks and the function will average the calculation up.
  816  * If any value greater then 0 microseconds is supplied, a value
  817  * of at least 2 will be returned to ensure that a near-term clock
  818  * interrupt does not cause the timeout to occur (degenerately) early.
  819  *
  820  * Note that limit checks must take into account microseconds, which is
  821  * done simply by using the smaller signed long maximum instead of
  822  * the unsigned long maximum.
  823  *
  824  * If ints have 32 bits, then the maximum value for any timeout in
  825  * 10ms ticks is 248 days.
  826  */
  827 int
  828 tvtohz_high(struct timeval *tv)
  829 {
  830         int ticks;
  831         long sec, usec;
  832 
  833         sec = tv->tv_sec;
  834         usec = tv->tv_usec;
  835         if (usec < 0) {
  836                 sec--;
  837                 usec += 1000000;
  838         }
  839         if (sec < 0) {
  840 #ifdef DIAGNOSTIC
  841                 if (usec > 0) {
  842                         sec++;
  843                         usec -= 1000000;
  844                 }
  845                 kprintf("tvtohz_high: negative time difference "
  846                         "%ld sec %ld usec\n",
  847                         sec, usec);
  848 #endif
  849                 ticks = 1;
  850         } else if (sec <= INT_MAX / hz) {
  851                 ticks = (int)(sec * hz + 
  852                             ((u_long)usec + (ustick - 1)) / ustick) + 1;
  853         } else {
  854                 ticks = INT_MAX;
  855         }
  856         return (ticks);
  857 }
  858 
  859 int
  860 tstohz_high(struct timespec *ts)
  861 {
  862         int ticks;
  863         long sec, nsec;
  864 
  865         sec = ts->tv_sec;
  866         nsec = ts->tv_nsec;
  867         if (nsec < 0) {
  868                 sec--;
  869                 nsec += 1000000000;
  870         }
  871         if (sec < 0) {
  872 #ifdef DIAGNOSTIC
  873                 if (nsec > 0) {
  874                         sec++;
  875                         nsec -= 1000000000;
  876                 }
  877                 kprintf("tstohz_high: negative time difference "
  878                         "%ld sec %ld nsec\n",
  879                         sec, nsec);
  880 #endif
  881                 ticks = 1;
  882         } else if (sec <= INT_MAX / hz) {
  883                 ticks = (int)(sec * hz +
  884                             ((u_long)nsec + (nstick - 1)) / nstick) + 1;
  885         } else {
  886                 ticks = INT_MAX;
  887         }
  888         return (ticks);
  889 }
  890 
  891 
  892 /*
  893  * Compute number of ticks for the specified amount of time, erroring on
  894  * the side of it being too low to ensure that sleeping the returned number
  895  * of ticks will not result in a late return.
  896  *
  897  * The supplied timeval may not be negative and should be normalized.  A
  898  * return value of 0 is possible if the timeval converts to less then
  899  * 1 tick.
  900  *
  901  * If ints have 32 bits, then the maximum value for any timeout in
  902  * 10ms ticks is 248 days.
  903  */
  904 int
  905 tvtohz_low(struct timeval *tv)
  906 {
  907         int ticks;
  908         long sec;
  909 
  910         sec = tv->tv_sec;
  911         if (sec <= INT_MAX / hz)
  912                 ticks = (int)(sec * hz + (u_long)tv->tv_usec / ustick);
  913         else
  914                 ticks = INT_MAX;
  915         return (ticks);
  916 }
  917 
  918 int
  919 tstohz_low(struct timespec *ts)
  920 {
  921         int ticks;
  922         long sec;
  923 
  924         sec = ts->tv_sec;
  925         if (sec <= INT_MAX / hz)
  926                 ticks = (int)(sec * hz + (u_long)ts->tv_nsec / nstick);
  927         else
  928                 ticks = INT_MAX;
  929         return (ticks);
  930 }
  931 
  932 /*
  933  * Start profiling on a process.
  934  *
  935  * Kernel profiling passes proc0 which never exits and hence
  936  * keeps the profile clock running constantly.
  937  */
  938 void
  939 startprofclock(struct proc *p)
  940 {
  941         if ((p->p_flags & P_PROFIL) == 0) {
  942                 p->p_flags |= P_PROFIL;
  943 #if 0   /* XXX */
  944                 if (++profprocs == 1 && stathz != 0) {
  945                         crit_enter();
  946                         psdiv = psratio;
  947                         setstatclockrate(profhz);
  948                         crit_exit();
  949                 }
  950 #endif
  951         }
  952 }
  953 
  954 /*
  955  * Stop profiling on a process.
  956  *
  957  * caller must hold p->p_token
  958  */
  959 void
  960 stopprofclock(struct proc *p)
  961 {
  962         if (p->p_flags & P_PROFIL) {
  963                 p->p_flags &= ~P_PROFIL;
  964 #if 0   /* XXX */
  965                 if (--profprocs == 0 && stathz != 0) {
  966                         crit_enter();
  967                         psdiv = 1;
  968                         setstatclockrate(stathz);
  969                         crit_exit();
  970                 }
  971 #endif
  972         }
  973 }
  974 
  975 /*
  976  * Return information about system clocks.
  977  */
  978 static int
  979 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
  980 {
  981         struct kinfo_clockinfo clkinfo;
  982         /*
  983          * Construct clockinfo structure.
  984          */
  985         clkinfo.ci_hz = hz;
  986         clkinfo.ci_tick = ustick;
  987         clkinfo.ci_tickadj = ntp_default_tick_delta / 1000;
  988         clkinfo.ci_profhz = profhz;
  989         clkinfo.ci_stathz = stathz ? stathz : hz;
  990         return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
  991 }
  992 
  993 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
  994         0, 0, sysctl_kern_clockrate, "S,clockinfo","");
  995 
  996 /*
  997  * We have eight functions for looking at the clock, four for
  998  * microseconds and four for nanoseconds.  For each there is fast
  999  * but less precise version "get{nano|micro}[up]time" which will
 1000  * return a time which is up to 1/HZ previous to the call, whereas
 1001  * the raw version "{nano|micro}[up]time" will return a timestamp
 1002  * which is as precise as possible.  The "up" variants return the
 1003  * time relative to system boot, these are well suited for time
 1004  * interval measurements.
 1005  *
 1006  * Each cpu independantly maintains the current time of day, so all
 1007  * we need to do to protect ourselves from changes is to do a loop
 1008  * check on the seconds field changing out from under us.
 1009  *
 1010  * The system timer maintains a 32 bit count and due to various issues
 1011  * it is possible for the calculated delta to occassionally exceed
 1012  * sys_cputimer->freq.  If this occurs the sys_cputimer->freq64_nsec
 1013  * multiplication can easily overflow, so we deal with the case.  For
 1014  * uniformity we deal with the case in the usec case too.
 1015  *
 1016  * All the [get][micro,nano][time,uptime]() routines are MPSAFE.
 1017  */
 1018 void
 1019 getmicrouptime(struct timeval *tvp)
 1020 {
 1021         struct globaldata *gd = mycpu;
 1022         sysclock_t delta;
 1023 
 1024         do {
 1025                 tvp->tv_sec = gd->gd_time_seconds;
 1026                 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
 1027         } while (tvp->tv_sec != gd->gd_time_seconds);
 1028 
 1029         if (delta >= sys_cputimer->freq) {
 1030                 tvp->tv_sec += delta / sys_cputimer->freq;
 1031                 delta %= sys_cputimer->freq;
 1032         }
 1033         tvp->tv_usec = (sys_cputimer->freq64_usec * delta) >> 32;
 1034         if (tvp->tv_usec >= 1000000) {
 1035                 tvp->tv_usec -= 1000000;
 1036                 ++tvp->tv_sec;
 1037         }
 1038 }
 1039 
 1040 void
 1041 getnanouptime(struct timespec *tsp)
 1042 {
 1043         struct globaldata *gd = mycpu;
 1044         sysclock_t delta;
 1045 
 1046         do {
 1047                 tsp->tv_sec = gd->gd_time_seconds;
 1048                 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
 1049         } while (tsp->tv_sec != gd->gd_time_seconds);
 1050 
 1051         if (delta >= sys_cputimer->freq) {
 1052                 tsp->tv_sec += delta / sys_cputimer->freq;
 1053                 delta %= sys_cputimer->freq;
 1054         }
 1055         tsp->tv_nsec = (sys_cputimer->freq64_nsec * delta) >> 32;
 1056 }
 1057 
 1058 void
 1059 microuptime(struct timeval *tvp)
 1060 {
 1061         struct globaldata *gd = mycpu;
 1062         sysclock_t delta;
 1063 
 1064         do {
 1065                 tvp->tv_sec = gd->gd_time_seconds;
 1066                 delta = sys_cputimer->count() - gd->gd_cpuclock_base;
 1067         } while (tvp->tv_sec != gd->gd_time_seconds);
 1068 
 1069         if (delta >= sys_cputimer->freq) {
 1070                 tvp->tv_sec += delta / sys_cputimer->freq;
 1071                 delta %= sys_cputimer->freq;
 1072         }
 1073         tvp->tv_usec = (sys_cputimer->freq64_usec * delta) >> 32;
 1074 }
 1075 
 1076 void
 1077 nanouptime(struct timespec *tsp)
 1078 {
 1079         struct globaldata *gd = mycpu;
 1080         sysclock_t delta;
 1081 
 1082         do {
 1083                 tsp->tv_sec = gd->gd_time_seconds;
 1084                 delta = sys_cputimer->count() - gd->gd_cpuclock_base;
 1085         } while (tsp->tv_sec != gd->gd_time_seconds);
 1086 
 1087         if (delta >= sys_cputimer->freq) {
 1088                 tsp->tv_sec += delta / sys_cputimer->freq;
 1089                 delta %= sys_cputimer->freq;
 1090         }
 1091         tsp->tv_nsec = (sys_cputimer->freq64_nsec * delta) >> 32;
 1092 }
 1093 
 1094 /*
 1095  * realtime routines
 1096  */
 1097 void
 1098 getmicrotime(struct timeval *tvp)
 1099 {
 1100         struct globaldata *gd = mycpu;
 1101         struct timespec *bt;
 1102         sysclock_t delta;
 1103 
 1104         do {
 1105                 tvp->tv_sec = gd->gd_time_seconds;
 1106                 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
 1107         } while (tvp->tv_sec != gd->gd_time_seconds);
 1108 
 1109         if (delta >= sys_cputimer->freq) {
 1110                 tvp->tv_sec += delta / sys_cputimer->freq;
 1111                 delta %= sys_cputimer->freq;
 1112         }
 1113         tvp->tv_usec = (sys_cputimer->freq64_usec * delta) >> 32;
 1114 
 1115         bt = &basetime[basetime_index];
 1116         tvp->tv_sec += bt->tv_sec;
 1117         tvp->tv_usec += bt->tv_nsec / 1000;
 1118         while (tvp->tv_usec >= 1000000) {
 1119                 tvp->tv_usec -= 1000000;
 1120                 ++tvp->tv_sec;
 1121         }
 1122 }
 1123 
 1124 void
 1125 getnanotime(struct timespec *tsp)
 1126 {
 1127         struct globaldata *gd = mycpu;
 1128         struct timespec *bt;
 1129         sysclock_t delta;
 1130 
 1131         do {
 1132                 tsp->tv_sec = gd->gd_time_seconds;
 1133                 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
 1134         } while (tsp->tv_sec != gd->gd_time_seconds);
 1135 
 1136         if (delta >= sys_cputimer->freq) {
 1137                 tsp->tv_sec += delta / sys_cputimer->freq;
 1138                 delta %= sys_cputimer->freq;
 1139         }
 1140         tsp->tv_nsec = (sys_cputimer->freq64_nsec * delta) >> 32;
 1141 
 1142         bt = &basetime[basetime_index];
 1143         tsp->tv_sec += bt->tv_sec;
 1144         tsp->tv_nsec += bt->tv_nsec;
 1145         while (tsp->tv_nsec >= 1000000000) {
 1146                 tsp->tv_nsec -= 1000000000;
 1147                 ++tsp->tv_sec;
 1148         }
 1149 }
 1150 
 1151 static void
 1152 getnanotime_nbt(struct timespec *nbt, struct timespec *tsp)
 1153 {
 1154         struct globaldata *gd = mycpu;
 1155         sysclock_t delta;
 1156 
 1157         do {
 1158                 tsp->tv_sec = gd->gd_time_seconds;
 1159                 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
 1160         } while (tsp->tv_sec != gd->gd_time_seconds);
 1161 
 1162         if (delta >= sys_cputimer->freq) {
 1163                 tsp->tv_sec += delta / sys_cputimer->freq;
 1164                 delta %= sys_cputimer->freq;
 1165         }
 1166         tsp->tv_nsec = (sys_cputimer->freq64_nsec * delta) >> 32;
 1167 
 1168         tsp->tv_sec += nbt->tv_sec;
 1169         tsp->tv_nsec += nbt->tv_nsec;
 1170         while (tsp->tv_nsec >= 1000000000) {
 1171                 tsp->tv_nsec -= 1000000000;
 1172                 ++tsp->tv_sec;
 1173         }
 1174 }
 1175 
 1176 
 1177 void
 1178 microtime(struct timeval *tvp)
 1179 {
 1180         struct globaldata *gd = mycpu;
 1181         struct timespec *bt;
 1182         sysclock_t delta;
 1183 
 1184         do {
 1185                 tvp->tv_sec = gd->gd_time_seconds;
 1186                 delta = sys_cputimer->count() - gd->gd_cpuclock_base;
 1187         } while (tvp->tv_sec != gd->gd_time_seconds);
 1188 
 1189         if (delta >= sys_cputimer->freq) {
 1190                 tvp->tv_sec += delta / sys_cputimer->freq;
 1191                 delta %= sys_cputimer->freq;
 1192         }
 1193         tvp->tv_usec = (sys_cputimer->freq64_usec * delta) >> 32;
 1194 
 1195         bt = &basetime[basetime_index];
 1196         tvp->tv_sec += bt->tv_sec;
 1197         tvp->tv_usec += bt->tv_nsec / 1000;
 1198         while (tvp->tv_usec >= 1000000) {
 1199                 tvp->tv_usec -= 1000000;
 1200                 ++tvp->tv_sec;
 1201         }
 1202 }
 1203 
 1204 void
 1205 nanotime(struct timespec *tsp)
 1206 {
 1207         struct globaldata *gd = mycpu;
 1208         struct timespec *bt;
 1209         sysclock_t delta;
 1210 
 1211         do {
 1212                 tsp->tv_sec = gd->gd_time_seconds;
 1213                 delta = sys_cputimer->count() - gd->gd_cpuclock_base;
 1214         } while (tsp->tv_sec != gd->gd_time_seconds);
 1215 
 1216         if (delta >= sys_cputimer->freq) {
 1217                 tsp->tv_sec += delta / sys_cputimer->freq;
 1218                 delta %= sys_cputimer->freq;
 1219         }
 1220         tsp->tv_nsec = (sys_cputimer->freq64_nsec * delta) >> 32;
 1221 
 1222         bt = &basetime[basetime_index];
 1223         tsp->tv_sec += bt->tv_sec;
 1224         tsp->tv_nsec += bt->tv_nsec;
 1225         while (tsp->tv_nsec >= 1000000000) {
 1226                 tsp->tv_nsec -= 1000000000;
 1227                 ++tsp->tv_sec;
 1228         }
 1229 }
 1230 
 1231 /*
 1232  * note: this is not exactly synchronized with real time.  To do that we
 1233  * would have to do what microtime does and check for a nanoseconds overflow.
 1234  */
 1235 time_t
 1236 get_approximate_time_t(void)
 1237 {
 1238         struct globaldata *gd = mycpu;
 1239         struct timespec *bt;
 1240 
 1241         bt = &basetime[basetime_index];
 1242         return(gd->gd_time_seconds + bt->tv_sec);
 1243 }
 1244 
 1245 int
 1246 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
 1247 {
 1248         pps_params_t *app;
 1249         struct pps_fetch_args *fapi;
 1250 #ifdef PPS_SYNC
 1251         struct pps_kcbind_args *kapi;
 1252 #endif
 1253 
 1254         switch (cmd) {
 1255         case PPS_IOC_CREATE:
 1256                 return (0);
 1257         case PPS_IOC_DESTROY:
 1258                 return (0);
 1259         case PPS_IOC_SETPARAMS:
 1260                 app = (pps_params_t *)data;
 1261                 if (app->mode & ~pps->ppscap)
 1262                         return (EINVAL);
 1263                 pps->ppsparam = *app;         
 1264                 return (0);
 1265         case PPS_IOC_GETPARAMS:
 1266                 app = (pps_params_t *)data;
 1267                 *app = pps->ppsparam;
 1268                 app->api_version = PPS_API_VERS_1;
 1269                 return (0);
 1270         case PPS_IOC_GETCAP:
 1271                 *(int*)data = pps->ppscap;
 1272                 return (0);
 1273         case PPS_IOC_FETCH:
 1274                 fapi = (struct pps_fetch_args *)data;
 1275                 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
 1276                         return (EINVAL);
 1277                 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec)
 1278                         return (EOPNOTSUPP);
 1279                 pps->ppsinfo.current_mode = pps->ppsparam.mode;         
 1280                 fapi->pps_info_buf = pps->ppsinfo;
 1281                 return (0);
 1282         case PPS_IOC_KCBIND:
 1283 #ifdef PPS_SYNC
 1284                 kapi = (struct pps_kcbind_args *)data;
 1285                 /* XXX Only root should be able to do this */
 1286                 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
 1287                         return (EINVAL);
 1288                 if (kapi->kernel_consumer != PPS_KC_HARDPPS)
 1289                         return (EINVAL);
 1290                 if (kapi->edge & ~pps->ppscap)
 1291                         return (EINVAL);
 1292                 pps->kcmode = kapi->edge;
 1293                 return (0);
 1294 #else
 1295                 return (EOPNOTSUPP);
 1296 #endif
 1297         default:
 1298                 return (ENOTTY);
 1299         }
 1300 }
 1301 
 1302 void
 1303 pps_init(struct pps_state *pps)
 1304 {
 1305         pps->ppscap |= PPS_TSFMT_TSPEC;
 1306         if (pps->ppscap & PPS_CAPTUREASSERT)
 1307                 pps->ppscap |= PPS_OFFSETASSERT;
 1308         if (pps->ppscap & PPS_CAPTURECLEAR)
 1309                 pps->ppscap |= PPS_OFFSETCLEAR;
 1310 }
 1311 
 1312 void
 1313 pps_event(struct pps_state *pps, sysclock_t count, int event)
 1314 {
 1315         struct globaldata *gd;
 1316         struct timespec *tsp;
 1317         struct timespec *osp;
 1318         struct timespec *bt;
 1319         struct timespec ts;
 1320         sysclock_t *pcount;
 1321 #ifdef PPS_SYNC
 1322         sysclock_t tcount;
 1323 #endif
 1324         sysclock_t delta;
 1325         pps_seq_t *pseq;
 1326         int foff;
 1327         int fhard;
 1328 
 1329         gd = mycpu;
 1330 
 1331         /* Things would be easier with arrays... */
 1332         if (event == PPS_CAPTUREASSERT) {
 1333                 tsp = &pps->ppsinfo.assert_timestamp;
 1334                 osp = &pps->ppsparam.assert_offset;
 1335                 foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
 1336                 fhard = pps->kcmode & PPS_CAPTUREASSERT;
 1337                 pcount = &pps->ppscount[0];
 1338                 pseq = &pps->ppsinfo.assert_sequence;
 1339         } else {
 1340                 tsp = &pps->ppsinfo.clear_timestamp;
 1341                 osp = &pps->ppsparam.clear_offset;
 1342                 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
 1343                 fhard = pps->kcmode & PPS_CAPTURECLEAR;
 1344                 pcount = &pps->ppscount[1];
 1345                 pseq = &pps->ppsinfo.clear_sequence;
 1346         }
 1347 
 1348         /* Nothing really happened */
 1349         if (*pcount == count)
 1350                 return;
 1351 
 1352         *pcount = count;
 1353 
 1354         do {
 1355                 ts.tv_sec = gd->gd_time_seconds;
 1356                 delta = count - gd->gd_cpuclock_base;
 1357         } while (ts.tv_sec != gd->gd_time_seconds);
 1358 
 1359         if (delta >= sys_cputimer->freq) {
 1360                 ts.tv_sec += delta / sys_cputimer->freq;
 1361                 delta %= sys_cputimer->freq;
 1362         }
 1363         ts.tv_nsec = (sys_cputimer->freq64_nsec * delta) >> 32;
 1364         bt = &basetime[basetime_index];
 1365         ts.tv_sec += bt->tv_sec;
 1366         ts.tv_nsec += bt->tv_nsec;
 1367         while (ts.tv_nsec >= 1000000000) {
 1368                 ts.tv_nsec -= 1000000000;
 1369                 ++ts.tv_sec;
 1370         }
 1371 
 1372         (*pseq)++;
 1373         *tsp = ts;
 1374 
 1375         if (foff) {
 1376                 timespecadd(tsp, osp);
 1377                 if (tsp->tv_nsec < 0) {
 1378                         tsp->tv_nsec += 1000000000;
 1379                         tsp->tv_sec -= 1;
 1380                 }
 1381         }
 1382 #ifdef PPS_SYNC
 1383         if (fhard) {
 1384                 /* magic, at its best... */
 1385                 tcount = count - pps->ppscount[2];
 1386                 pps->ppscount[2] = count;
 1387                 if (tcount >= sys_cputimer->freq) {
 1388                         delta = (1000000000 * (tcount / sys_cputimer->freq) +
 1389                                  sys_cputimer->freq64_nsec * 
 1390                                  (tcount % sys_cputimer->freq)) >> 32;
 1391                 } else {
 1392                         delta = (sys_cputimer->freq64_nsec * tcount) >> 32;
 1393                 }
 1394                 hardpps(tsp, delta);
 1395         }
 1396 #endif
 1397 }
 1398 
 1399 /*
 1400  * Return the tsc target value for a delay of (ns).
 1401  *
 1402  * Returns -1 if the TSC is not supported.
 1403  */
 1404 int64_t
 1405 tsc_get_target(int ns)
 1406 {
 1407 #if defined(_RDTSC_SUPPORTED_)
 1408         if (cpu_feature & CPUID_TSC) {
 1409                 return (rdtsc() + tsc_frequency * ns / (int64_t)1000000000);
 1410         }
 1411 #endif
 1412         return(-1);
 1413 }
 1414 
 1415 /*
 1416  * Compare the tsc against the passed target
 1417  *
 1418  * Returns +1 if the target has been reached
 1419  * Returns  0 if the target has not yet been reached
 1420  * Returns -1 if the TSC is not supported.
 1421  *
 1422  * Typical use:         while (tsc_test_target(target) == 0) { ...poll... }
 1423  */
 1424 int
 1425 tsc_test_target(int64_t target)
 1426 {
 1427 #if defined(_RDTSC_SUPPORTED_)
 1428         if (cpu_feature & CPUID_TSC) {
 1429                 if ((int64_t)(target - rdtsc()) <= 0)
 1430                         return(1);
 1431                 return(0);
 1432         }
 1433 #endif
 1434         return(-1);
 1435 }
 1436 
 1437 /*
 1438  * Delay the specified number of nanoseconds using the tsc.  This function
 1439  * returns immediately if the TSC is not supported.  At least one cpu_pause()
 1440  * will be issued.
 1441  */
 1442 void
 1443 tsc_delay(int ns)
 1444 {
 1445         int64_t clk;
 1446 
 1447         clk = tsc_get_target(ns);
 1448         cpu_pause();
 1449         while (tsc_test_target(clk) == 0)
 1450                 cpu_pause();
 1451 }

Cache object: a3d8e6fd078ac61e2975b67ea08ec6d2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.