The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_clock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)kern_clock.c        8.5 (Berkeley) 1/21/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/6.2/sys/kern/kern_clock.c 164286 2006-11-14 20:42:41Z cvs2svn $");
   39 
   40 #include "opt_device_polling.h"
   41 #include "opt_hwpmc_hooks.h"
   42 #include "opt_ntp.h"
   43 #include "opt_watchdog.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/callout.h>
   48 #include <sys/kdb.h>
   49 #include <sys/kernel.h>
   50 #include <sys/lock.h>
   51 #include <sys/ktr.h>
   52 #include <sys/mutex.h>
   53 #include <sys/proc.h>
   54 #include <sys/resource.h>
   55 #include <sys/resourcevar.h>
   56 #include <sys/sched.h>
   57 #include <sys/signalvar.h>
   58 #include <sys/smp.h>
   59 #include <vm/vm.h>
   60 #include <vm/pmap.h>
   61 #include <vm/vm_map.h>
   62 #include <sys/sysctl.h>
   63 #include <sys/bus.h>
   64 #include <sys/interrupt.h>
   65 #include <sys/limits.h>
   66 #include <sys/timetc.h>
   67 
   68 #include <machine/cpu.h>
   69 
   70 #ifdef GPROF
   71 #include <sys/gmon.h>
   72 #endif
   73 
   74 #ifdef HWPMC_HOOKS
   75 #include <sys/pmckern.h>
   76 #endif
   77 
   78 #ifdef DEVICE_POLLING
   79 extern void hardclock_device_poll(void);
   80 #endif /* DEVICE_POLLING */
   81 
   82 static void initclocks(void *dummy);
   83 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
   84 
   85 /* Some of these don't belong here, but it's easiest to concentrate them. */
   86 long cp_time[CPUSTATES];
   87 
   88 static int
   89 sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS)
   90 {
   91         int error;
   92 #ifdef SCTL_MASK32
   93         int i;
   94         unsigned int cp_time32[CPUSTATES];
   95         
   96         if (req->flags & SCTL_MASK32) {
   97                 if (!req->oldptr)
   98                         return SYSCTL_OUT(req, 0, sizeof(cp_time32));
   99                 for (i = 0; i < CPUSTATES; i++)
  100                         cp_time32[i] = (unsigned int)cp_time[i];
  101                 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
  102         } else
  103 #endif
  104         {
  105                 if (!req->oldptr)
  106                         return SYSCTL_OUT(req, 0, sizeof(cp_time));
  107                 error = SYSCTL_OUT(req, cp_time, sizeof(cp_time));
  108         }
  109         return error;
  110 }
  111 
  112 SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD, 
  113     0,0, sysctl_kern_cp_time, "LU", "CPU time statistics");
  114 
  115 #ifdef SW_WATCHDOG
  116 #include <sys/watchdog.h>
  117 
  118 static int watchdog_ticks;
  119 static int watchdog_enabled;
  120 static void watchdog_fire(void);
  121 static void watchdog_config(void *, u_int, int *);
  122 #endif /* SW_WATCHDOG */
  123 
  124 /*
  125  * Clock handling routines.
  126  *
  127  * This code is written to operate with two timers that run independently of
  128  * each other.
  129  *
  130  * The main timer, running hz times per second, is used to trigger interval
  131  * timers, timeouts and rescheduling as needed.
  132  *
  133  * The second timer handles kernel and user profiling,
  134  * and does resource use estimation.  If the second timer is programmable,
  135  * it is randomized to avoid aliasing between the two clocks.  For example,
  136  * the randomization prevents an adversary from always giving up the cpu
  137  * just before its quantum expires.  Otherwise, it would never accumulate
  138  * cpu ticks.  The mean frequency of the second timer is stathz.
  139  *
  140  * If no second timer exists, stathz will be zero; in this case we drive
  141  * profiling and statistics off the main clock.  This WILL NOT be accurate;
  142  * do not do it unless absolutely necessary.
  143  *
  144  * The statistics clock may (or may not) be run at a higher rate while
  145  * profiling.  This profile clock runs at profhz.  We require that profhz
  146  * be an integral multiple of stathz.
  147  *
  148  * If the statistics clock is running fast, it must be divided by the ratio
  149  * profhz/stathz for statistics.  (For profiling, every tick counts.)
  150  *
  151  * Time-of-day is maintained using a "timecounter", which may or may
  152  * not be related to the hardware generating the above mentioned
  153  * interrupts.
  154  */
  155 
  156 int     stathz;
  157 int     profhz;
  158 int     profprocs;
  159 int     ticks;
  160 int     psratio;
  161 
  162 /*
  163  * Initialize clock frequencies and start both clocks running.
  164  */
  165 /* ARGSUSED*/
  166 static void
  167 initclocks(dummy)
  168         void *dummy;
  169 {
  170         register int i;
  171 
  172         /*
  173          * Set divisors to 1 (normal case) and let the machine-specific
  174          * code do its bit.
  175          */
  176         cpu_initclocks();
  177 
  178         /*
  179          * Compute profhz/stathz, and fix profhz if needed.
  180          */
  181         i = stathz ? stathz : hz;
  182         if (profhz == 0)
  183                 profhz = i;
  184         psratio = profhz / i;
  185 #ifdef SW_WATCHDOG
  186         EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0);
  187 #endif
  188 }
  189 
  190 /*
  191  * Each time the real-time timer fires, this function is called on all CPUs.
  192  * Note that hardclock() calls hardclock_process() for the boot CPU, so only
  193  * the other CPUs in the system need to call this function.
  194  */
  195 void
  196 hardclock_process(frame)
  197         register struct clockframe *frame;
  198 {
  199         struct pstats *pstats;
  200         struct thread *td = curthread;
  201         struct proc *p = td->td_proc;
  202 
  203         /*
  204          * Run current process's virtual and profile time, as needed.
  205          */
  206         mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
  207         if (p->p_flag & P_SA) {
  208                 /* XXXKSE What to do? */
  209         } else {
  210                 pstats = p->p_stats;
  211                 if (CLKF_USERMODE(frame) &&
  212                     timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
  213                     itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
  214                         p->p_sflag |= PS_ALRMPEND;
  215                         td->td_flags |= TDF_ASTPENDING;
  216                 }
  217                 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
  218                     itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
  219                         p->p_sflag |= PS_PROFPEND;
  220                         td->td_flags |= TDF_ASTPENDING;
  221                 }
  222         }
  223         mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
  224 
  225 #ifdef  HWPMC_HOOKS
  226         if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
  227                 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
  228 #endif
  229 }
  230 
  231 /*
  232  * The real-time timer, interrupting hz times per second.
  233  */
  234 void
  235 hardclock(frame)
  236         register struct clockframe *frame;
  237 {
  238         int need_softclock = 0;
  239 
  240         CTR0(KTR_CLK, "hardclock fired");
  241         hardclock_process(frame);
  242 
  243         tc_ticktock();
  244         /*
  245          * If no separate statistics clock is available, run it from here.
  246          *
  247          * XXX: this only works for UP
  248          */
  249         if (stathz == 0) {
  250                 profclock(frame);
  251                 statclock(frame);
  252         }
  253 
  254 #ifdef DEVICE_POLLING
  255         hardclock_device_poll();        /* this is very short and quick */
  256 #endif /* DEVICE_POLLING */
  257 
  258         /*
  259          * Process callouts at a very low cpu priority, so we don't keep the
  260          * relatively high clock interrupt priority any longer than necessary.
  261          */
  262         mtx_lock_spin_flags(&callout_lock, MTX_QUIET);
  263         ticks++;
  264         if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
  265                 need_softclock = 1;
  266         } else if (softticks + 1 == ticks)
  267                 ++softticks;
  268         mtx_unlock_spin_flags(&callout_lock, MTX_QUIET);
  269 
  270         /*
  271          * swi_sched acquires sched_lock, so we don't want to call it with
  272          * callout_lock held; incorrect locking order.
  273          */
  274         if (need_softclock)
  275                 swi_sched(softclock_ih, 0);
  276 
  277 #ifdef SW_WATCHDOG
  278         if (watchdog_enabled > 0 && --watchdog_ticks <= 0)
  279                 watchdog_fire();
  280 #endif /* SW_WATCHDOG */
  281 }
  282 
  283 /*
  284  * Compute number of ticks in the specified amount of time.
  285  */
  286 int
  287 tvtohz(tv)
  288         struct timeval *tv;
  289 {
  290         register unsigned long ticks;
  291         register long sec, usec;
  292 
  293         /*
  294          * If the number of usecs in the whole seconds part of the time
  295          * difference fits in a long, then the total number of usecs will
  296          * fit in an unsigned long.  Compute the total and convert it to
  297          * ticks, rounding up and adding 1 to allow for the current tick
  298          * to expire.  Rounding also depends on unsigned long arithmetic
  299          * to avoid overflow.
  300          *
  301          * Otherwise, if the number of ticks in the whole seconds part of
  302          * the time difference fits in a long, then convert the parts to
  303          * ticks separately and add, using similar rounding methods and
  304          * overflow avoidance.  This method would work in the previous
  305          * case but it is slightly slower and assumes that hz is integral.
  306          *
  307          * Otherwise, round the time difference down to the maximum
  308          * representable value.
  309          *
  310          * If ints have 32 bits, then the maximum value for any timeout in
  311          * 10ms ticks is 248 days.
  312          */
  313         sec = tv->tv_sec;
  314         usec = tv->tv_usec;
  315         if (usec < 0) {
  316                 sec--;
  317                 usec += 1000000;
  318         }
  319         if (sec < 0) {
  320 #ifdef DIAGNOSTIC
  321                 if (usec > 0) {
  322                         sec++;
  323                         usec -= 1000000;
  324                 }
  325                 printf("tvotohz: negative time difference %ld sec %ld usec\n",
  326                        sec, usec);
  327 #endif
  328                 ticks = 1;
  329         } else if (sec <= LONG_MAX / 1000000)
  330                 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
  331                         / tick + 1;
  332         else if (sec <= LONG_MAX / hz)
  333                 ticks = sec * hz
  334                         + ((unsigned long)usec + (tick - 1)) / tick + 1;
  335         else
  336                 ticks = LONG_MAX;
  337         if (ticks > INT_MAX)
  338                 ticks = INT_MAX;
  339         return ((int)ticks);
  340 }
  341 
  342 /*
  343  * Start profiling on a process.
  344  *
  345  * Kernel profiling passes proc0 which never exits and hence
  346  * keeps the profile clock running constantly.
  347  */
  348 void
  349 startprofclock(p)
  350         register struct proc *p;
  351 {
  352 
  353         /*
  354          * XXX; Right now sched_lock protects statclock(), but perhaps
  355          * it should be protected later on by a time_lock, which would
  356          * cover psdiv, etc. as well.
  357          */
  358         PROC_LOCK_ASSERT(p, MA_OWNED);
  359         if (p->p_flag & P_STOPPROF)
  360                 return;
  361         if ((p->p_flag & P_PROFIL) == 0) {
  362                 mtx_lock_spin(&sched_lock);
  363                 p->p_flag |= P_PROFIL;
  364                 if (++profprocs == 1)
  365                         cpu_startprofclock();
  366                 mtx_unlock_spin(&sched_lock);
  367         }
  368 }
  369 
  370 /*
  371  * Stop profiling on a process.
  372  */
  373 void
  374 stopprofclock(p)
  375         register struct proc *p;
  376 {
  377 
  378         PROC_LOCK_ASSERT(p, MA_OWNED);
  379         if (p->p_flag & P_PROFIL) {
  380                 if (p->p_profthreads != 0) {
  381                         p->p_flag |= P_STOPPROF;
  382                         while (p->p_profthreads != 0)
  383                                 msleep(&p->p_profthreads, &p->p_mtx, PPAUSE,
  384                                     "stopprof", 0);
  385                         p->p_flag &= ~P_STOPPROF;
  386                 }
  387                 if ((p->p_flag & P_PROFIL) == 0)
  388                         return;
  389                 mtx_lock_spin(&sched_lock);
  390                 p->p_flag &= ~P_PROFIL;
  391                 if (--profprocs == 0)
  392                         cpu_stopprofclock();
  393                 mtx_unlock_spin(&sched_lock);
  394         }
  395 }
  396 
  397 /*
  398  * Statistics clock.  Grab profile sample, and if divider reaches 0,
  399  * do process and kernel statistics.  Most of the statistics are only
  400  * used by user-level statistics programs.  The main exceptions are
  401  * ke->ke_uticks, p->p_rux.rux_sticks, p->p_rux.rux_iticks, and p->p_estcpu.
  402  * This should be called by all active processors.
  403  */
  404 void
  405 statclock(frame)
  406         register struct clockframe *frame;
  407 {
  408         struct rusage *ru;
  409         struct vmspace *vm;
  410         struct thread *td;
  411         struct proc *p;
  412         long rss;
  413 
  414         td = curthread;
  415         p = td->td_proc;
  416 
  417         mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
  418         if (CLKF_USERMODE(frame)) {
  419                 /*
  420                  * Charge the time as appropriate.
  421                  */
  422                 if (p->p_flag & P_SA)
  423                         thread_statclock(1);
  424                 p->p_rux.rux_uticks++;
  425                 if (p->p_nice > NZERO)
  426                         cp_time[CP_NICE]++;
  427                 else
  428                         cp_time[CP_USER]++;
  429         } else {
  430                 /*
  431                  * Came from kernel mode, so we were:
  432                  * - handling an interrupt,
  433                  * - doing syscall or trap work on behalf of the current
  434                  *   user process, or
  435                  * - spinning in the idle loop.
  436                  * Whichever it is, charge the time as appropriate.
  437                  * Note that we charge interrupts to the current process,
  438                  * regardless of whether they are ``for'' that process,
  439                  * so that we know how much of its real time was spent
  440                  * in ``non-process'' (i.e., interrupt) work.
  441                  */
  442                 if ((td->td_pflags & TDP_ITHREAD) ||
  443                     td->td_intr_nesting_level >= 2) {
  444                         p->p_rux.rux_iticks++;
  445                         cp_time[CP_INTR]++;
  446                 } else {
  447                         if (p->p_flag & P_SA)
  448                                 thread_statclock(0);
  449                         td->td_sticks++;
  450                         p->p_rux.rux_sticks++;
  451                         if (td != PCPU_GET(idlethread))
  452                                 cp_time[CP_SYS]++;
  453                         else
  454                                 cp_time[CP_IDLE]++;
  455                 }
  456         }
  457         CTR4(KTR_SCHED, "statclock: %p(%s) prio %d stathz %d",
  458             td, td->td_proc->p_comm, td->td_priority, (stathz)?stathz:hz);
  459 
  460         sched_clock(td);
  461 
  462         /* Update resource usage integrals and maximums. */
  463         MPASS(p->p_stats != NULL);
  464         MPASS(p->p_vmspace != NULL);
  465         vm = p->p_vmspace;
  466         ru = &p->p_stats->p_ru;
  467         ru->ru_ixrss += pgtok(vm->vm_tsize);
  468         ru->ru_idrss += pgtok(vm->vm_dsize);
  469         ru->ru_isrss += pgtok(vm->vm_ssize);
  470         rss = pgtok(vmspace_resident_count(vm));
  471         if (ru->ru_maxrss < rss)
  472                 ru->ru_maxrss = rss;
  473         mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
  474 }
  475 
  476 void
  477 profclock(frame)
  478         register struct clockframe *frame;
  479 {
  480         struct thread *td;
  481 #ifdef GPROF
  482         struct gmonparam *g;
  483         int i;
  484 #endif
  485 
  486         td = curthread;
  487         if (CLKF_USERMODE(frame)) {
  488                 /*
  489                  * Came from user mode; CPU was in user state.
  490                  * If this process is being profiled, record the tick.
  491                  * if there is no related user location yet, don't
  492                  * bother trying to count it.
  493                  */
  494                 if (td->td_proc->p_flag & P_PROFIL)
  495                         addupc_intr(td, CLKF_PC(frame), 1);
  496         }
  497 #ifdef GPROF
  498         else {
  499                 /*
  500                  * Kernel statistics are just like addupc_intr, only easier.
  501                  */
  502                 g = &_gmonparam;
  503                 if (g->state == GMON_PROF_ON) {
  504                         i = CLKF_PC(frame) - g->lowpc;
  505                         if (i < g->textsize) {
  506                                 i /= HISTFRACTION * sizeof(*g->kcount);
  507                                 g->kcount[i]++;
  508                         }
  509                 }
  510         }
  511 #endif
  512 }
  513 
  514 /*
  515  * Return information about system clocks.
  516  */
  517 static int
  518 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
  519 {
  520         struct clockinfo clkinfo;
  521         /*
  522          * Construct clockinfo structure.
  523          */
  524         bzero(&clkinfo, sizeof(clkinfo));
  525         clkinfo.hz = hz;
  526         clkinfo.tick = tick;
  527         clkinfo.profhz = profhz;
  528         clkinfo.stathz = stathz ? stathz : hz;
  529         return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
  530 }
  531 
  532 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
  533         0, 0, sysctl_kern_clockrate, "S,clockinfo",
  534         "Rate and period of various kernel clocks");
  535 
  536 #ifdef SW_WATCHDOG
  537 
  538 static void
  539 watchdog_config(void *unused __unused, u_int cmd, int *err)
  540 {
  541         u_int u;
  542 
  543         u = cmd & WD_INTERVAL;
  544         if ((cmd & WD_ACTIVE) && u >= WD_TO_1SEC) {
  545                 watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz;
  546                 watchdog_enabled = 1;
  547                 *err = 0;
  548         } else {
  549                 watchdog_enabled = 0;
  550         }
  551 }
  552 
  553 /*
  554  * Handle a watchdog timeout by dumping interrupt information and
  555  * then either dropping to DDB or panicing.
  556  */
  557 static void
  558 watchdog_fire(void)
  559 {
  560         int nintr;
  561         u_int64_t inttotal;
  562         u_long *curintr;
  563         char *curname;
  564 
  565         curintr = intrcnt;
  566         curname = intrnames;
  567         inttotal = 0;
  568         nintr = eintrcnt - intrcnt;
  569         
  570         printf("interrupt                   total\n");
  571         while (--nintr >= 0) {
  572                 if (*curintr)
  573                         printf("%-12s %20lu\n", curname, *curintr);
  574                 curname += strlen(curname) + 1;
  575                 inttotal += *curintr++;
  576         }
  577         printf("Total        %20ju\n", (uintmax_t)inttotal);
  578 
  579 #ifdef KDB
  580         kdb_backtrace();
  581         kdb_enter("watchdog timeout");
  582 #else
  583         panic("watchdog timeout");
  584 #endif /* KDB */
  585 }
  586 
  587 #endif /* SW_WATCHDOG */

Cache object: cdfc23b396c2395ba6cd9bb7ed275fdc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.