kern_clock.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
    3  * Copyright (c) 1982, 1986, 1991, 1993
    4  *      The Regents of the University of California.  All rights reserved.
    5  * (c) UNIX System Laboratories, Inc.
    6  * All or some portions of this file are derived from material licensed
    7  * to the University of California by American Telephone and Telegraph
    8  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    9  * the permission of UNIX System Laboratories, Inc.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. All advertising materials mentioning features or use of this software
   20  *    must display the following acknowledgement:
   21  *      This product includes software developed by the University of
   22  *      California, Berkeley and its contributors.
   23  * 4. Neither the name of the University nor the names of its contributors
   24  *    may be used to endorse or promote products derived from this software
   25  *    without specific prior written permission.
   26  *
   27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   37  * SUCH DAMAGE.
   38  *
   39  *      @(#)kern_clock.c        8.5 (Berkeley) 1/21/94
   40  * $FreeBSD$
   41  */
   42 
   43 #include "opt_ntp.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/dkstat.h>
   48 #include <sys/callout.h>
   49 #include <sys/kernel.h>
   50 #include <sys/proc.h>
   51 #include <sys/malloc.h>
   52 #include <sys/resourcevar.h>
   53 #include <sys/signalvar.h>
   54 #include <sys/timex.h>
   55 #include <sys/timepps.h>
   56 #include <vm/vm.h>
   57 #include <sys/lock.h>
   58 #include <vm/pmap.h>
   59 #include <vm/vm_map.h>
   60 #include <sys/sysctl.h>
   61 
   62 #include <machine/cpu.h>
   63 #include <machine/limits.h>
   64 #include <machine/smp.h>
   65 
   66 #ifdef GPROF
   67 #include <sys/gmon.h>
   68 #endif
   69 
   70 #ifdef DEVICE_POLLING
   71 extern void init_device_poll(void);
   72 extern void hardclock_device_poll(void);
   73 #endif /* DEVICE_POLLING */
   74 
   75 /*
   76  * a large step happens on boot.  This constant detects such
   77  * a steps.  It is relatively small so that ntp_update_second gets called
   78  * enough in the typical 'missed a couple of seconds' case, but doesn't
   79  * loop forever when the time step is large.
   80  */
   81 #define LARGE_STEP      200
   82  
   83 /*
   84  * Number of timecounters used to implement stable storage
   85  */
   86 #ifndef NTIMECOUNTER
   87 #define NTIMECOUNTER    5
   88 #endif
   89 
   90 static MALLOC_DEFINE(M_TIMECOUNTER, "timecounter", 
   91         "Timecounter stable storage");
   92 
   93 static void initclocks __P((void *dummy));
   94 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
   95 
   96 static void tco_forward __P((int force));
   97 static void tco_setscales __P((struct timecounter *tc));
   98 static __inline unsigned tco_delta __P((struct timecounter *tc));
   99 
  100 /* Some of these don't belong here, but it's easiest to concentrate them. */
  101 long cp_time[CPUSTATES];
  102 
  103 SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time),
  104     "LU", "CPU time statistics");
  105 
  106 long tk_cancc;
  107 long tk_nin;
  108 long tk_nout;
  109 long tk_rawcc;
  110 
  111 time_t time_second;
  112 
  113 struct  timeval boottime;
  114 SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD,
  115     &boottime, timeval, "System boottime");
  116 
  117 /*
  118  * Which update policy to use.
  119  *   0 - every tick, bad hardware may fail with "calcru negative..."
  120  *   1 - more resistent to the above hardware, but less efficient.
  121  */
  122 static int tco_method;
  123 
  124 /*
  125  * Implement a dummy timecounter which we can use until we get a real one
  126  * in the air.  This allows the console and other early stuff to use
  127  * timeservices.
  128  */
  129 
  130 static unsigned 
  131 dummy_get_timecount(struct timecounter *tc)
  132 {
  133         static unsigned now;
  134         return (++now);
  135 }
  136 
  137 static struct timecounter dummy_timecounter = {
  138         dummy_get_timecount,
  139         0,
  140         ~0u,
  141         1000000,
  142         "dummy"
  143 };
  144 
  145 struct timecounter *timecounter = &dummy_timecounter;
  146 
  147 /*
  148  * Clock handling routines.
  149  *
  150  * This code is written to operate with two timers that run independently of
  151  * each other.
  152  *
  153  * The main timer, running hz times per second, is used to trigger interval
  154  * timers, timeouts and rescheduling as needed.
  155  *
  156  * The second timer handles kernel and user profiling,
  157  * and does resource use estimation.  If the second timer is programmable,
  158  * it is randomized to avoid aliasing between the two clocks.  For example,
  159  * the randomization prevents an adversary from always giving up the cpu
  160  * just before its quantum expires.  Otherwise, it would never accumulate
  161  * cpu ticks.  The mean frequency of the second timer is stathz.
  162  *
  163  * If no second timer exists, stathz will be zero; in this case we drive
  164  * profiling and statistics off the main clock.  This WILL NOT be accurate;
  165  * do not do it unless absolutely necessary.
  166  *
  167  * The statistics clock may (or may not) be run at a higher rate while
  168  * profiling.  This profile clock runs at profhz.  We require that profhz
  169  * be an integral multiple of stathz.
  170  *
  171  * If the statistics clock is running fast, it must be divided by the ratio
  172  * profhz/stathz for statistics.  (For profiling, every tick counts.)
  173  *
  174  * Time-of-day is maintained using a "timecounter", which may or may
  175  * not be related to the hardware generating the above mentioned
  176  * interrupts.
  177  */
  178 
  179 int     stathz;
  180 int     profhz;
  181 static int profprocs;
  182 int     ticks;
  183 static int psdiv, pscnt;                /* prof => stat divider */
  184 int     psratio;                        /* ratio: prof / stat */
  185 
  186 /*
  187  * Initialize clock frequencies and start both clocks running.
  188  */
  189 /* ARGSUSED*/
  190 static void
  191 initclocks(dummy)
  192         void *dummy;
  193 {
  194         register int i;
  195 
  196         /*
  197          * Set divisors to 1 (normal case) and let the machine-specific
  198          * code do its bit.
  199          */
  200         psdiv = pscnt = 1;
  201         cpu_initclocks();
  202 
  203 #ifdef DEVICE_POLLING
  204         init_device_poll();
  205 #endif
  206 
  207         /*
  208          * Compute profhz/stathz, and fix profhz if needed.
  209          */
  210         i = stathz ? stathz : hz;
  211         if (profhz == 0)
  212                 profhz = i;
  213         psratio = profhz / i;
  214 }
  215 
  216 /*
  217  * The real-time timer, interrupting hz times per second.
  218  */
  219 void
  220 hardclock(frame)
  221         register struct clockframe *frame;
  222 {
  223         register struct proc *p;
  224 
  225         p = curproc;
  226         if (p) {
  227                 register struct pstats *pstats;
  228 
  229                 /*
  230                  * Run current process's virtual and profile time, as needed.
  231                  */
  232                 pstats = p->p_stats;
  233                 if (CLKF_USERMODE(frame) &&
  234                     timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
  235                     itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
  236                         psignal(p, SIGVTALRM);
  237                 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
  238                     itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
  239                         psignal(p, SIGPROF);
  240         }
  241 
  242 #if defined(SMP) && defined(BETTER_CLOCK)
  243         forward_hardclock(pscnt);
  244 #endif
  245 
  246         /*
  247          * If no separate statistics clock is available, run it from here.
  248          */
  249         if (stathz == 0)
  250                 statclock(frame);
  251 
  252         tco_forward(0);
  253         ticks++;
  254 
  255 #ifdef DEVICE_POLLING
  256         hardclock_device_poll();        /* this is very short and quick */
  257 #endif /* DEVICE_POLLING */
  258 
  259         /*
  260          * Process callouts at a very low cpu priority, so we don't keep the
  261          * relatively high clock interrupt priority any longer than necessary.
  262          */
  263         if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
  264                 if (CLKF_BASEPRI(frame)) {
  265                         /*
  266                          * Save the overhead of a software interrupt;
  267                          * it will happen as soon as we return, so do it now.
  268                          */
  269                         (void)splsoftclock();
  270                         softclock();
  271                 } else
  272                         setsoftclock();
  273         } else if (softticks + 1 == ticks)
  274                 ++softticks;
  275 }
  276 
  277 /*
  278  * Compute number of ticks in the specified amount of time.
  279  */
  280 int
  281 tvtohz(tv)
  282         struct timeval *tv;
  283 {
  284         register unsigned long ticks;
  285         register long sec, usec;
  286 
  287         /*
  288          * If the number of usecs in the whole seconds part of the time
  289          * difference fits in a long, then the total number of usecs will
  290          * fit in an unsigned long.  Compute the total and convert it to
  291          * ticks, rounding up and adding 1 to allow for the current tick
  292          * to expire.  Rounding also depends on unsigned long arithmetic
  293          * to avoid overflow.
  294          *
  295          * Otherwise, if the number of ticks in the whole seconds part of
  296          * the time difference fits in a long, then convert the parts to
  297          * ticks separately and add, using similar rounding methods and
  298          * overflow avoidance.  This method would work in the previous
  299          * case but it is slightly slower and assumes that hz is integral.
  300          *
  301          * Otherwise, round the time difference down to the maximum
  302          * representable value.
  303          *
  304          * If ints have 32 bits, then the maximum value for any timeout in
  305          * 10ms ticks is 248 days.
  306          */
  307         sec = tv->tv_sec;
  308         usec = tv->tv_usec;
  309         if (usec < 0) {
  310                 sec--;
  311                 usec += 1000000;
  312         }
  313         if (sec < 0) {
  314 #ifdef DIAGNOSTIC
  315                 if (usec > 0) {
  316                         sec++;
  317                         usec -= 1000000;
  318                 }
  319                 printf("tvotohz: negative time difference %ld sec %ld usec\n",
  320                        sec, usec);
  321 #endif
  322                 ticks = 1;
  323         } else if (sec <= LONG_MAX / 1000000)
  324                 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
  325                         / tick + 1;
  326         else if (sec <= LONG_MAX / hz)
  327                 ticks = sec * hz
  328                         + ((unsigned long)usec + (tick - 1)) / tick + 1;
  329         else
  330                 ticks = LONG_MAX;
  331         if (ticks > INT_MAX)
  332                 ticks = INT_MAX;
  333         return ((int)ticks);
  334 }
  335 
  336 /*
  337  * Start profiling on a process.
  338  *
  339  * Kernel profiling passes proc0 which never exits and hence
  340  * keeps the profile clock running constantly.
  341  */
  342 void
  343 startprofclock(p)
  344         register struct proc *p;
  345 {
  346         int s;
  347 
  348         if ((p->p_flag & P_PROFIL) == 0) {
  349                 p->p_flag |= P_PROFIL;
  350                 if (++profprocs == 1 && stathz != 0) {
  351                         s = splstatclock();
  352                         psdiv = pscnt = psratio;
  353                         setstatclockrate(profhz);
  354                         splx(s);
  355                 }
  356         }
  357 }
  358 
  359 /*
  360  * Stop profiling on a process.
  361  */
  362 void
  363 stopprofclock(p)
  364         register struct proc *p;
  365 {
  366         int s;
  367 
  368         if (p->p_flag & P_PROFIL) {
  369                 p->p_flag &= ~P_PROFIL;
  370                 if (--profprocs == 0 && stathz != 0) {
  371                         s = splstatclock();
  372                         psdiv = pscnt = 1;
  373                         setstatclockrate(stathz);
  374                         splx(s);
  375                 }
  376         }
  377 }
  378 
  379 /*
  380  * Statistics clock.  Grab profile sample, and if divider reaches 0,
  381  * do process and kernel statistics.  Most of the statistics are only
  382  * used by user-level statistics programs.  The main exceptions are
  383  * p->p_uticks, p->p_sticks, p->p_iticks, and p->p_estcpu.
  384  */
  385 void
  386 statclock(frame)
  387         register struct clockframe *frame;
  388 {
  389 #ifdef GPROF
  390         register struct gmonparam *g;
  391         int i;
  392 #endif
  393         register struct proc *p;
  394         struct pstats *pstats;
  395         long rss;
  396         struct rusage *ru;
  397         struct vmspace *vm;
  398 
  399         if (curproc != NULL && CLKF_USERMODE(frame)) {
  400                 /*
  401                  * Came from user mode; CPU was in user state.
  402                  * If this process is being profiled, record the tick.
  403                  */
  404                 p = curproc;
  405                 if (p->p_flag & P_PROFIL)
  406                         addupc_intr(p, CLKF_PC(frame), 1);
  407 #if defined(SMP) && defined(BETTER_CLOCK)
  408                 if (stathz != 0)
  409                         forward_statclock(pscnt);
  410 #endif
  411                 if (--pscnt > 0)
  412                         return;
  413                 /*
  414                  * Charge the time as appropriate.
  415                  */
  416                 p->p_uticks++;
  417                 if (p->p_nice > NZERO)
  418                         cp_time[CP_NICE]++;
  419                 else
  420                         cp_time[CP_USER]++;
  421         } else {
  422 #ifdef GPROF
  423                 /*
  424                  * Kernel statistics are just like addupc_intr, only easier.
  425                  */
  426                 g = &_gmonparam;
  427                 if (g->state == GMON_PROF_ON) {
  428                         i = CLKF_PC(frame) - g->lowpc;
  429                         if (i < g->textsize) {
  430                                 i /= HISTFRACTION * sizeof(*g->kcount);
  431                                 g->kcount[i]++;
  432                         }
  433                 }
  434 #endif
  435 #if defined(SMP) && defined(BETTER_CLOCK)
  436                 if (stathz != 0)
  437                         forward_statclock(pscnt);
  438 #endif
  439                 if (--pscnt > 0)
  440                         return;
  441                 /*
  442                  * Came from kernel mode, so we were:
  443                  * - handling an interrupt,
  444                  * - doing syscall or trap work on behalf of the current
  445                  *   user process, or
  446                  * - spinning in the idle loop.
  447                  * Whichever it is, charge the time as appropriate.
  448                  * Note that we charge interrupts to the current process,
  449                  * regardless of whether they are ``for'' that process,
  450                  * so that we know how much of its real time was spent
  451                  * in ``non-process'' (i.e., interrupt) work.
  452                  */
  453                 p = curproc;
  454                 if (CLKF_INTR(frame)) {
  455                         if (p != NULL)
  456                                 p->p_iticks++;
  457                         cp_time[CP_INTR]++;
  458                 } else if (p != NULL) {
  459                         p->p_sticks++;
  460                         cp_time[CP_SYS]++;
  461                 } else
  462                         cp_time[CP_IDLE]++;
  463         }
  464         pscnt = psdiv;
  465 
  466         if (p != NULL) {
  467                 schedclock(p);
  468 
  469                 /* Update resource usage integrals and maximums. */
  470                 if ((pstats = p->p_stats) != NULL &&
  471                     (ru = &pstats->p_ru) != NULL &&
  472                     (vm = p->p_vmspace) != NULL) {
  473                         ru->ru_ixrss += pgtok(vm->vm_tsize);
  474                         ru->ru_idrss += pgtok(vm->vm_dsize);
  475                         ru->ru_isrss += pgtok(vm->vm_ssize);
  476                         rss = pgtok(vmspace_resident_count(vm));
  477                         if (ru->ru_maxrss < rss)
  478                                 ru->ru_maxrss = rss;
  479                 }
  480         }
  481 }
  482 
  483 /*
  484  * Return information about system clocks.
  485  */
  486 static int
  487 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
  488 {
  489         struct clockinfo clkinfo;
  490         /*
  491          * Construct clockinfo structure.
  492          */
  493         clkinfo.hz = hz;
  494         clkinfo.tick = tick;
  495         clkinfo.tickadj = tickadj;
  496         clkinfo.profhz = profhz;
  497         clkinfo.stathz = stathz ? stathz : hz;
  498         return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
  499 }
  500 
  501 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
  502         0, 0, sysctl_kern_clockrate, "S,clockinfo","");
  503 
  504 static __inline unsigned
  505 tco_delta(struct timecounter *tc)
  506 {
  507 
  508         return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) & 
  509             tc->tc_counter_mask);
  510 }
  511 
  512 /*
  513  * We have eight functions for looking at the clock, four for
  514  * microseconds and four for nanoseconds.  For each there is fast
  515  * but less precise version "get{nano|micro}[up]time" which will
  516  * return a time which is up to 1/HZ previous to the call, whereas
  517  * the raw version "{nano|micro}[up]time" will return a timestamp
  518  * which is as precise as possible.  The "up" variants return the
  519  * time relative to system boot, these are well suited for time
  520  * interval measurements.
  521  */
  522 
  523 void
  524 getmicrotime(struct timeval *tvp)
  525 {
  526         struct timecounter *tc;
  527 
  528         if (!tco_method) {
  529                 tc = timecounter;
  530                 *tvp = tc->tc_microtime;
  531         } else {
  532                 microtime(tvp);
  533         }
  534 }
  535 
  536 void
  537 getnanotime(struct timespec *tsp)
  538 {
  539         struct timecounter *tc;
  540 
  541         if (!tco_method) {
  542                 tc = timecounter;
  543                 *tsp = tc->tc_nanotime;
  544         } else {
  545                 nanotime(tsp);
  546         }
  547 }
  548 
  549 void
  550 microtime(struct timeval *tv)
  551 {
  552         struct timecounter *tc;
  553 
  554         tc = timecounter;
  555         tv->tv_sec = tc->tc_offset_sec;
  556         tv->tv_usec = tc->tc_offset_micro;
  557         tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
  558         tv->tv_usec += boottime.tv_usec;
  559         tv->tv_sec += boottime.tv_sec;
  560         while (tv->tv_usec < 0) {
  561                 tv->tv_usec += 1000000;
  562                 if (tv->tv_sec > 0)
  563                         tv->tv_sec--;
  564         }
  565         while (tv->tv_usec >= 1000000) {
  566                 tv->tv_usec -= 1000000;
  567                 tv->tv_sec++;
  568         }
  569 }
  570 
  571 void
  572 nanotime(struct timespec *ts)
  573 {
  574         unsigned count;
  575         u_int64_t delta;
  576         struct timecounter *tc;
  577 
  578         tc = timecounter;
  579         ts->tv_sec = tc->tc_offset_sec;
  580         count = tco_delta(tc);
  581         delta = tc->tc_offset_nano;
  582         delta += ((u_int64_t)count * tc->tc_scale_nano_f);
  583         delta >>= 32;
  584         delta += ((u_int64_t)count * tc->tc_scale_nano_i);
  585         delta += boottime.tv_usec * 1000;
  586         ts->tv_sec += boottime.tv_sec;
  587         while (delta < 0) {
  588                 delta += 1000000000;
  589                 if (ts->tv_sec > 0)
  590                         ts->tv_sec--;
  591         }
  592         while (delta >= 1000000000) {
  593                 delta -= 1000000000;
  594                 ts->tv_sec++;
  595         }
  596         ts->tv_nsec = delta;
  597 }
  598 
  599 void
  600 getmicrouptime(struct timeval *tvp)
  601 {
  602         struct timecounter *tc;
  603 
  604         if (!tco_method) {
  605                 tc = timecounter;
  606                 tvp->tv_sec = tc->tc_offset_sec;
  607                 tvp->tv_usec = tc->tc_offset_micro;
  608         } else {
  609                 microuptime(tvp);
  610         }
  611 }
  612 
  613 void
  614 getnanouptime(struct timespec *tsp)
  615 {
  616         struct timecounter *tc;
  617 
  618         if (!tco_method) {
  619                 tc = timecounter;
  620                 tsp->tv_sec = tc->tc_offset_sec;
  621                 tsp->tv_nsec = tc->tc_offset_nano >> 32;
  622         } else {
  623                 nanouptime(tsp);
  624         }
  625 }
  626 
  627 void
  628 microuptime(struct timeval *tv)
  629 {
  630         struct timecounter *tc;
  631 
  632         tc = timecounter;
  633         tv->tv_sec = tc->tc_offset_sec;
  634         tv->tv_usec = tc->tc_offset_micro;
  635         tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
  636         while (tv->tv_usec < 0) {
  637                 tv->tv_usec += 1000000;
  638                 if (tv->tv_sec > 0)
  639                         tv->tv_sec--;
  640         }
  641         while (tv->tv_usec >= 1000000) {
  642                 tv->tv_usec -= 1000000;
  643                 tv->tv_sec++;
  644         }
  645 }
  646 
  647 void
  648 nanouptime(struct timespec *ts)
  649 {
  650         unsigned count;
  651         u_int64_t delta;
  652         struct timecounter *tc;
  653 
  654         tc = timecounter;
  655         ts->tv_sec = tc->tc_offset_sec;
  656         count = tco_delta(tc);
  657         delta = tc->tc_offset_nano;
  658         delta += ((u_int64_t)count * tc->tc_scale_nano_f);
  659         delta >>= 32;
  660         delta += ((u_int64_t)count * tc->tc_scale_nano_i);
  661         while (delta < 0) {
  662                 delta += 1000000000;
  663                 if (ts->tv_sec > 0)
  664                         ts->tv_sec--;
  665         }
  666         while (delta >= 1000000000) {
  667                 delta -= 1000000000;
  668                 ts->tv_sec++;
  669         }
  670         ts->tv_nsec = delta;
  671 }
  672 
  673 static void
  674 tco_setscales(struct timecounter *tc)
  675 {
  676         u_int64_t scale;
  677 
  678         scale = 1000000000LL << 32;
  679         scale += tc->tc_adjustment;
  680         scale /= tc->tc_tweak->tc_frequency;
  681         tc->tc_scale_micro = scale / 1000;
  682         tc->tc_scale_nano_f = scale & 0xffffffff;
  683         tc->tc_scale_nano_i = scale >> 32;
  684 }
  685 
  686 void
  687 update_timecounter(struct timecounter *tc)
  688 {
  689         tco_setscales(tc);
  690 }
  691 
  692 void
  693 init_timecounter(struct timecounter *tc)
  694 {
  695         struct timespec ts1;
  696         struct timecounter *t1, *t2, *t3;
  697         unsigned u;
  698         int i;
  699 
  700         u = tc->tc_frequency / tc->tc_counter_mask;
  701         if (u > hz) {
  702                 printf("Timecounter \"%s\" frequency %lu Hz"
  703                        " -- Insufficient hz, needs at least %u\n",
  704                        tc->tc_name, (u_long) tc->tc_frequency, u);
  705                 return;
  706         }
  707 
  708         tc->tc_adjustment = 0;
  709         tc->tc_tweak = tc;
  710         tco_setscales(tc);
  711         tc->tc_offset_count = tc->tc_get_timecount(tc);
  712         if (timecounter == &dummy_timecounter)
  713                 tc->tc_avail = tc;
  714         else {
  715                 tc->tc_avail = timecounter->tc_tweak->tc_avail;
  716                 timecounter->tc_tweak->tc_avail = tc;
  717         }
  718         MALLOC(t1, struct timecounter *, sizeof *t1, M_TIMECOUNTER, M_WAITOK);
  719         tc->tc_other = t1;
  720         *t1 = *tc;
  721         t2 = t1;
  722         for (i = 1; i < NTIMECOUNTER; i++) {
  723                 MALLOC(t3, struct timecounter *, sizeof *t3,
  724                     M_TIMECOUNTER, M_WAITOK);
  725                 *t3 = *tc;
  726                 t3->tc_other = t2;
  727                 t2 = t3;
  728         }
  729         t1->tc_other = t3;
  730         tc = t1;
  731 
  732         printf("Timecounter \"%s\"  frequency %lu Hz\n", 
  733             tc->tc_name, (u_long)tc->tc_frequency);
  734 
  735         /* XXX: For now always start using the counter. */
  736         tc->tc_offset_count = tc->tc_get_timecount(tc);
  737         nanouptime(&ts1);
  738         tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32;
  739         tc->tc_offset_micro = ts1.tv_nsec / 1000;
  740         tc->tc_offset_sec = ts1.tv_sec;
  741         timecounter = tc;
  742 }
  743 
  744 void
  745 set_timecounter(struct timespec *ts)
  746 {
  747         struct timespec ts2;
  748 
  749         nanouptime(&ts2);
  750         boottime.tv_sec = ts->tv_sec - ts2.tv_sec;
  751         boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000;
  752         if (boottime.tv_usec < 0) {
  753                 boottime.tv_usec += 1000000;
  754                 boottime.tv_sec--;
  755         }
  756         /* fiddle all the little crinkly bits around the fiords... */
  757         tco_forward(1);
  758 }
  759 
  760 static void
  761 switch_timecounter(struct timecounter *newtc)
  762 {
  763         int s;
  764         struct timecounter *tc;
  765         struct timespec ts;
  766 
  767         s = splclock();
  768         tc = timecounter;
  769         if (newtc->tc_tweak == tc->tc_tweak) {
  770                 splx(s);
  771                 return;
  772         }
  773         newtc = newtc->tc_tweak->tc_other;
  774         nanouptime(&ts);
  775         newtc->tc_offset_sec = ts.tv_sec;
  776         newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32;
  777         newtc->tc_offset_micro = ts.tv_nsec / 1000;
  778         newtc->tc_offset_count = newtc->tc_get_timecount(newtc);
  779         tco_setscales(newtc);
  780         timecounter = newtc;
  781         splx(s);
  782 }
  783 
  784 static struct timecounter *
  785 sync_other_counter(void)
  786 {
  787         struct timecounter *tc, *tcn, *tco;
  788         unsigned delta;
  789 
  790         tco = timecounter;
  791         tc = tco->tc_other;
  792         tcn = tc->tc_other;
  793         *tc = *tco;
  794         tc->tc_other = tcn;
  795         delta = tco_delta(tc);
  796         tc->tc_offset_count += delta;
  797         tc->tc_offset_count &= tc->tc_counter_mask;
  798         tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f;
  799         tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32;
  800         return (tc);
  801 }
  802 
  803 static void
  804 tco_forward(int force)
  805 {
  806         struct timecounter *tc, *tco;
  807         struct timeval tvt;
  808         time_t t;
  809 
  810         tco = timecounter;
  811         tc = sync_other_counter();
  812         /*
  813          * We may be inducing a tiny error here, the tc_poll_pps() may
  814          * process a latched count which happens after the tco_delta()
  815          * in sync_other_counter(), which would extend the previous
  816          * counters parameters into the domain of this new one.
  817          * Since the timewindow is very small for this, the error is
  818          * going to be only a few weenieseconds (as Dave Mills would
  819          * say), so lets just not talk more about it, OK ?
  820          */
  821         if (tco->tc_poll_pps) 
  822                 tco->tc_poll_pps(tco);
  823         if (timedelta != 0) {
  824                 tvt = boottime;
  825                 tvt.tv_usec += tickdelta;
  826                 if (tvt.tv_usec >= 1000000) {
  827                         tvt.tv_sec++;
  828                         tvt.tv_usec -= 1000000;
  829                 } else if (tvt.tv_usec < 0) {
  830                         tvt.tv_sec--;
  831                         tvt.tv_usec += 1000000;
  832                 }
  833                 boottime = tvt;
  834                 timedelta -= tickdelta;
  835         }
  836 
  837         while (tc->tc_offset_nano >= 1000000000ULL << 32) {
  838                 tc->tc_offset_nano -= 1000000000ULL << 32;
  839                 tc->tc_offset_sec++;
  840                 force++;
  841         }
  842 
  843         if (tco_method && !force)
  844                 return;
  845 
  846         tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32;
  847 
  848         /* Figure out the wall-clock time */
  849         tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec;
  850         tc->tc_nanotime.tv_nsec = 
  851             (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000;
  852         tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec;
  853         while (tc->tc_nanotime.tv_nsec >= 1000000000) {
  854                 tc->tc_nanotime.tv_nsec -= 1000000000;
  855                 tc->tc_microtime.tv_usec -= 1000000;
  856                 tc->tc_nanotime.tv_sec++;
  857         }
  858         t = tc->tc_nanotime.tv_sec - time_second;
  859         if (t > LARGE_STEP)
  860             t = 2;
  861         while (t-- > 0) {
  862                 time_second = tc->tc_nanotime.tv_sec;
  863                 ntp_update_second(tc);
  864                 tc->tc_offset_sec += tc->tc_nanotime.tv_sec - time_second;
  865                 tco_setscales(tc);
  866         }
  867         time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec;
  868 
  869         timecounter = tc;
  870 }
  871 
  872 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
  873 
  874 SYSCTL_INT(_kern_timecounter, OID_AUTO, method, CTLFLAG_RW, &tco_method, 0,
  875     "This variable determines the method used for updating timecounters. "
  876     "If the default algorithm (0) fails with \"calcru negative...\" messages "
  877     "try the alternate algorithm (1) which handles bad hardware better."
  878 
  879 );
  880 
  881 static int
  882 sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS)
  883 {
  884         char newname[32];
  885         struct timecounter *newtc, *tc;
  886         int error;
  887 
  888         tc = timecounter->tc_tweak;
  889         strncpy(newname, tc->tc_name, sizeof(newname));
  890         error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req);
  891         if (error == 0 && req->newptr != NULL &&
  892             strcmp(newname, tc->tc_name) != 0) {
  893                 for (newtc = tc->tc_avail; newtc != tc;
  894                     newtc = newtc->tc_avail) {
  895                         if (strcmp(newname, newtc->tc_name) == 0) {
  896                                 /* Warm up new timecounter. */
  897                                 (void)newtc->tc_get_timecount(newtc);
  898 
  899                                 switch_timecounter(newtc);
  900                                 return (0);
  901                         }
  902                 }
  903                 return (EINVAL);
  904         }
  905         return (error);
  906 }
  907 
  908 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW,
  909     0, 0, sysctl_kern_timecounter_hardware, "A", "");
  910 
  911 
  912 int
  913 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
  914 {
  915         pps_params_t *app;
  916         struct pps_fetch_args *fapi;
  917 #ifdef PPS_SYNC
  918         struct pps_kcbind_args *kapi;
  919 #endif
  920 
  921         switch (cmd) {
  922         case PPS_IOC_CREATE:
  923                 return (0);
  924         case PPS_IOC_DESTROY:
  925                 return (0);
  926         case PPS_IOC_SETPARAMS:
  927                 app = (pps_params_t *)data;
  928                 if (app->mode & ~pps->ppscap)
  929                         return (EINVAL);
  930                 pps->ppsparam = *app;         
  931                 return (0);
  932         case PPS_IOC_GETPARAMS:
  933                 app = (pps_params_t *)data;
  934                 *app = pps->ppsparam;
  935                 app->api_version = PPS_API_VERS_1;
  936                 return (0);
  937         case PPS_IOC_GETCAP:
  938                 *(int*)data = pps->ppscap;
  939                 return (0);
  940         case PPS_IOC_FETCH:
  941                 fapi = (struct pps_fetch_args *)data;
  942                 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
  943                         return (EINVAL);
  944                 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec)
  945                         return (EOPNOTSUPP);
  946                 pps->ppsinfo.current_mode = pps->ppsparam.mode;         
  947                 fapi->pps_info_buf = pps->ppsinfo;
  948                 return (0);
  949         case PPS_IOC_KCBIND:
  950 #ifdef PPS_SYNC
  951                 kapi = (struct pps_kcbind_args *)data;
  952                 /* XXX Only root should be able to do this */
  953                 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
  954                         return (EINVAL);
  955                 if (kapi->kernel_consumer != PPS_KC_HARDPPS)
  956                         return (EINVAL);
  957                 if (kapi->edge & ~pps->ppscap)
  958                         return (EINVAL);
  959                 pps->kcmode = kapi->edge;
  960                 return (0);
  961 #else
  962                 return (EOPNOTSUPP);
  963 #endif
  964         default:
  965                 return (ENOTTY);
  966         }
  967 }
  968 
  969 void
  970 pps_init(struct pps_state *pps)
  971 {
  972         pps->ppscap |= PPS_TSFMT_TSPEC;
  973         if (pps->ppscap & PPS_CAPTUREASSERT)
  974                 pps->ppscap |= PPS_OFFSETASSERT;
  975         if (pps->ppscap & PPS_CAPTURECLEAR)
  976                 pps->ppscap |= PPS_OFFSETCLEAR;
  977 }
  978 
  979 void
  980 pps_event(struct pps_state *pps, struct timecounter *tc, unsigned count, int event)
  981 {
  982         struct timespec ts, *tsp, *osp;
  983         u_int64_t delta;
  984         unsigned tcount, *pcount;
  985         int foff, fhard;
  986         pps_seq_t       *pseq;
  987 
  988         /* Things would be easier with arrays... */
  989         if (event == PPS_CAPTUREASSERT) {
  990                 tsp = &pps->ppsinfo.assert_timestamp;
  991                 osp = &pps->ppsparam.assert_offset;
  992                 foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
  993                 fhard = pps->kcmode & PPS_CAPTUREASSERT;
  994                 pcount = &pps->ppscount[0];
  995                 pseq = &pps->ppsinfo.assert_sequence;
  996         } else {
  997                 tsp = &pps->ppsinfo.clear_timestamp;
  998                 osp = &pps->ppsparam.clear_offset;
  999                 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
 1000                 fhard = pps->kcmode & PPS_CAPTURECLEAR;
 1001                 pcount = &pps->ppscount[1];
 1002                 pseq = &pps->ppsinfo.clear_sequence;
 1003         }
 1004 
 1005         /* The timecounter changed: bail */
 1006         if (!pps->ppstc || 
 1007             pps->ppstc->tc_name != tc->tc_name || 
 1008             tc->tc_name != timecounter->tc_name) {
 1009                 pps->ppstc = tc;
 1010                 *pcount = count;
 1011                 return;
 1012         }
 1013 
 1014         /* Nothing really happened */
 1015         if (*pcount == count)
 1016                 return;
 1017 
 1018         *pcount = count;
 1019 
 1020         /* Convert the count to timespec */
 1021         ts.tv_sec = tc->tc_offset_sec;
 1022         tcount = count - tc->tc_offset_count;
 1023         tcount &= tc->tc_counter_mask;
 1024         delta = tc->tc_offset_nano;
 1025         delta += ((u_int64_t)tcount * tc->tc_scale_nano_f);
 1026         delta >>= 32;
 1027         delta += ((u_int64_t)tcount * tc->tc_scale_nano_i);
 1028         delta += boottime.tv_usec * 1000;
 1029         ts.tv_sec += boottime.tv_sec;
 1030         while (delta >= 1000000000) {
 1031                 delta -= 1000000000;
 1032                 ts.tv_sec++;
 1033         }
 1034         ts.tv_nsec = delta;
 1035 
 1036         (*pseq)++;
 1037         *tsp = ts;
 1038 
 1039         if (foff) {
 1040                 timespecadd(tsp, osp);
 1041                 if (tsp->tv_nsec < 0) {
 1042                         tsp->tv_nsec += 1000000000;
 1043                         tsp->tv_sec -= 1;
 1044                 }
 1045         }
 1046 #ifdef PPS_SYNC
 1047         if (fhard) {
 1048                 /* magic, at its best... */
 1049                 tcount = count - pps->ppscount[2];
 1050                 pps->ppscount[2] = count;
 1051                 tcount &= tc->tc_counter_mask;
 1052                 delta = ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_f);
 1053                 delta >>= 32;
 1054                 delta += ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_i);
 1055                 hardpps(tsp, delta);
 1056         }
 1057 #endif
 1058 }
Cache object: c2fb343fcb282413ba87d3bc89394497
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/kern_clock.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_clock.c