The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_clock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)kern_clock.c        8.5 (Berkeley) 1/21/94
   39  * $FreeBSD: releng/5.1/sys/kern/kern_clock.c 114216 2003-04-29 13:36:06Z kan $
   40  */
   41 
   42 #include "opt_ntp.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/callout.h>
   47 #include <sys/kernel.h>
   48 #include <sys/lock.h>
   49 #include <sys/ktr.h>
   50 #include <sys/mutex.h>
   51 #include <sys/proc.h>
   52 #include <sys/resource.h>
   53 #include <sys/resourcevar.h>
   54 #include <sys/sched.h>
   55 #include <sys/signalvar.h>
   56 #include <sys/smp.h>
   57 #include <vm/vm.h>
   58 #include <vm/pmap.h>
   59 #include <vm/vm_map.h>
   60 #include <sys/sysctl.h>
   61 #include <sys/bus.h>
   62 #include <sys/interrupt.h>
   63 #include <sys/limits.h>
   64 #include <sys/timetc.h>
   65 
   66 #include <machine/cpu.h>
   67 
   68 #ifdef GPROF
   69 #include <sys/gmon.h>
   70 #endif
   71 
   72 #ifdef DEVICE_POLLING
   73 extern void hardclock_device_poll(void);
   74 #endif /* DEVICE_POLLING */
   75 
   76 static void initclocks(void *dummy);
   77 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
   78 
   79 /* Some of these don't belong here, but it's easiest to concentrate them. */
   80 long cp_time[CPUSTATES];
   81 
   82 SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time),
   83     "LU", "CPU time statistics");
   84 
   85 /*
   86  * Clock handling routines.
   87  *
   88  * This code is written to operate with two timers that run independently of
   89  * each other.
   90  *
   91  * The main timer, running hz times per second, is used to trigger interval
   92  * timers, timeouts and rescheduling as needed.
   93  *
   94  * The second timer handles kernel and user profiling,
   95  * and does resource use estimation.  If the second timer is programmable,
   96  * it is randomized to avoid aliasing between the two clocks.  For example,
   97  * the randomization prevents an adversary from always giving up the cpu
   98  * just before its quantum expires.  Otherwise, it would never accumulate
   99  * cpu ticks.  The mean frequency of the second timer is stathz.
  100  *
  101  * If no second timer exists, stathz will be zero; in this case we drive
  102  * profiling and statistics off the main clock.  This WILL NOT be accurate;
  103  * do not do it unless absolutely necessary.
  104  *
  105  * The statistics clock may (or may not) be run at a higher rate while
  106  * profiling.  This profile clock runs at profhz.  We require that profhz
  107  * be an integral multiple of stathz.
  108  *
  109  * If the statistics clock is running fast, it must be divided by the ratio
  110  * profhz/stathz for statistics.  (For profiling, every tick counts.)
  111  *
  112  * Time-of-day is maintained using a "timecounter", which may or may
  113  * not be related to the hardware generating the above mentioned
  114  * interrupts.
  115  */
  116 
  117 int     stathz;
  118 int     profhz;
  119 int     profprocs;
  120 int     ticks;
  121 int     psratio;
  122 
  123 /*
  124  * Initialize clock frequencies and start both clocks running.
  125  */
  126 /* ARGSUSED*/
  127 static void
  128 initclocks(dummy)
  129         void *dummy;
  130 {
  131         register int i;
  132 
  133         /*
  134          * Set divisors to 1 (normal case) and let the machine-specific
  135          * code do its bit.
  136          */
  137         cpu_initclocks();
  138 
  139         /*
  140          * Compute profhz/stathz, and fix profhz if needed.
  141          */
  142         i = stathz ? stathz : hz;
  143         if (profhz == 0)
  144                 profhz = i;
  145         psratio = profhz / i;
  146 }
  147 
  148 /*
  149  * Each time the real-time timer fires, this function is called on all CPUs.
  150  * Note that hardclock() calls hardclock_process() for the boot CPU, so only
  151  * the other CPUs in the system need to call this function.
  152  */
  153 void
  154 hardclock_process(frame)
  155         register struct clockframe *frame;
  156 {
  157         struct pstats *pstats;
  158         struct thread *td = curthread;
  159         struct proc *p = td->td_proc;
  160 
  161         /*
  162          * Run current process's virtual and profile time, as needed.
  163          */
  164         mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
  165         if (p->p_flag & P_THREADED) {
  166                 /* XXXKSE What to do? */
  167         } else {
  168                 pstats = p->p_stats;
  169                 if (CLKF_USERMODE(frame) &&
  170                     timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
  171                     itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
  172                         p->p_sflag |= PS_ALRMPEND;
  173                         td->td_flags |= TDF_ASTPENDING;
  174                 }
  175                 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
  176                     itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
  177                         p->p_sflag |= PS_PROFPEND;
  178                         td->td_flags |= TDF_ASTPENDING;
  179                 }
  180         }
  181         mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
  182 }
  183 
  184 /*
  185  * The real-time timer, interrupting hz times per second.
  186  */
  187 void
  188 hardclock(frame)
  189         register struct clockframe *frame;
  190 {
  191         int need_softclock = 0;
  192 
  193         CTR0(KTR_CLK, "hardclock fired");
  194         hardclock_process(frame);
  195 
  196         tc_ticktock();
  197         /*
  198          * If no separate statistics clock is available, run it from here.
  199          *
  200          * XXX: this only works for UP
  201          */
  202         if (stathz == 0) {
  203                 profclock(frame);
  204                 statclock(frame);
  205         }
  206 
  207 #ifdef DEVICE_POLLING
  208         hardclock_device_poll();        /* this is very short and quick */
  209 #endif /* DEVICE_POLLING */
  210 
  211         /*
  212          * Process callouts at a very low cpu priority, so we don't keep the
  213          * relatively high clock interrupt priority any longer than necessary.
  214          */
  215         mtx_lock_spin_flags(&callout_lock, MTX_QUIET);
  216         ticks++;
  217         if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
  218                 need_softclock = 1;
  219         } else if (softticks + 1 == ticks)
  220                 ++softticks;
  221         mtx_unlock_spin_flags(&callout_lock, MTX_QUIET);
  222 
  223         /*
  224          * swi_sched acquires sched_lock, so we don't want to call it with
  225          * callout_lock held; incorrect locking order.
  226          */
  227         if (need_softclock)
  228                 swi_sched(softclock_ih, 0);
  229 }
  230 
  231 /*
  232  * Compute number of ticks in the specified amount of time.
  233  */
  234 int
  235 tvtohz(tv)
  236         struct timeval *tv;
  237 {
  238         register unsigned long ticks;
  239         register long sec, usec;
  240 
  241         /*
  242          * If the number of usecs in the whole seconds part of the time
  243          * difference fits in a long, then the total number of usecs will
  244          * fit in an unsigned long.  Compute the total and convert it to
  245          * ticks, rounding up and adding 1 to allow for the current tick
  246          * to expire.  Rounding also depends on unsigned long arithmetic
  247          * to avoid overflow.
  248          *
  249          * Otherwise, if the number of ticks in the whole seconds part of
  250          * the time difference fits in a long, then convert the parts to
  251          * ticks separately and add, using similar rounding methods and
  252          * overflow avoidance.  This method would work in the previous
  253          * case but it is slightly slower and assumes that hz is integral.
  254          *
  255          * Otherwise, round the time difference down to the maximum
  256          * representable value.
  257          *
  258          * If ints have 32 bits, then the maximum value for any timeout in
  259          * 10ms ticks is 248 days.
  260          */
  261         sec = tv->tv_sec;
  262         usec = tv->tv_usec;
  263         if (usec < 0) {
  264                 sec--;
  265                 usec += 1000000;
  266         }
  267         if (sec < 0) {
  268 #ifdef DIAGNOSTIC
  269                 if (usec > 0) {
  270                         sec++;
  271                         usec -= 1000000;
  272                 }
  273                 printf("tvotohz: negative time difference %ld sec %ld usec\n",
  274                        sec, usec);
  275 #endif
  276                 ticks = 1;
  277         } else if (sec <= LONG_MAX / 1000000)
  278                 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
  279                         / tick + 1;
  280         else if (sec <= LONG_MAX / hz)
  281                 ticks = sec * hz
  282                         + ((unsigned long)usec + (tick - 1)) / tick + 1;
  283         else
  284                 ticks = LONG_MAX;
  285         if (ticks > INT_MAX)
  286                 ticks = INT_MAX;
  287         return ((int)ticks);
  288 }
  289 
  290 /*
  291  * Start profiling on a process.
  292  *
  293  * Kernel profiling passes proc0 which never exits and hence
  294  * keeps the profile clock running constantly.
  295  */
  296 void
  297 startprofclock(p)
  298         register struct proc *p;
  299 {
  300 
  301         /*
  302          * XXX; Right now sched_lock protects statclock(), but perhaps
  303          * it should be protected later on by a time_lock, which would
  304          * cover psdiv, etc. as well.
  305          */
  306         PROC_LOCK_ASSERT(p, MA_OWNED);
  307         if (p->p_flag & P_STOPPROF)
  308                 return;
  309         if ((p->p_flag & P_PROFIL) == 0) {
  310                 mtx_lock_spin(&sched_lock);
  311                 p->p_flag |= P_PROFIL;
  312                 if (++profprocs == 1)
  313                         cpu_startprofclock();
  314                 mtx_unlock_spin(&sched_lock);
  315         }
  316 }
  317 
  318 /*
  319  * Stop profiling on a process.
  320  */
  321 void
  322 stopprofclock(p)
  323         register struct proc *p;
  324 {
  325 
  326         PROC_LOCK_ASSERT(p, MA_OWNED);
  327         if (p->p_flag & P_PROFIL) {
  328                 if (p->p_profthreads != 0) {
  329                         p->p_flag |= P_STOPPROF;
  330                         while (p->p_profthreads != 0)
  331                                 msleep(&p->p_profthreads, &p->p_mtx, PPAUSE,
  332                                     "stopprof", NULL);
  333                         p->p_flag &= ~P_STOPPROF;
  334                 }
  335                 mtx_lock_spin(&sched_lock);
  336                 p->p_flag &= ~P_PROFIL;
  337                 if (--profprocs == 0)
  338                         cpu_stopprofclock();
  339                 mtx_unlock_spin(&sched_lock);
  340         }
  341 }
  342 
  343 /*
  344  * Statistics clock.  Grab profile sample, and if divider reaches 0,
  345  * do process and kernel statistics.  Most of the statistics are only
  346  * used by user-level statistics programs.  The main exceptions are
  347  * ke->ke_uticks, p->p_sticks, p->p_iticks, and p->p_estcpu.
  348  * This should be called by all active processors.
  349  */
  350 void
  351 statclock(frame)
  352         register struct clockframe *frame;
  353 {
  354         struct pstats *pstats;
  355         struct rusage *ru;
  356         struct vmspace *vm;
  357         struct thread *td;
  358         struct kse *ke;
  359         struct proc *p;
  360         long rss;
  361 
  362         td = curthread;
  363         p = td->td_proc;
  364 
  365         mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
  366         ke = td->td_kse;
  367         if (CLKF_USERMODE(frame)) {
  368                 /*
  369                  * Charge the time as appropriate.
  370                  */
  371                 if (p->p_flag & P_THREADED)
  372                         thread_statclock(1);
  373                 p->p_uticks++;
  374                 if (ke->ke_ksegrp->kg_nice > NZERO)
  375                         cp_time[CP_NICE]++;
  376                 else
  377                         cp_time[CP_USER]++;
  378         } else {
  379                 /*
  380                  * Came from kernel mode, so we were:
  381                  * - handling an interrupt,
  382                  * - doing syscall or trap work on behalf of the current
  383                  *   user process, or
  384                  * - spinning in the idle loop.
  385                  * Whichever it is, charge the time as appropriate.
  386                  * Note that we charge interrupts to the current process,
  387                  * regardless of whether they are ``for'' that process,
  388                  * so that we know how much of its real time was spent
  389                  * in ``non-process'' (i.e., interrupt) work.
  390                  */
  391                 if ((td->td_ithd != NULL) || td->td_intr_nesting_level >= 2) {
  392                         p->p_iticks++;
  393                         cp_time[CP_INTR]++;
  394                 } else {
  395                         if (p->p_flag & P_THREADED)
  396                                 thread_statclock(0);
  397                         td->td_sticks++;
  398                         p->p_sticks++;
  399                         if (p != PCPU_GET(idlethread)->td_proc)
  400                                 cp_time[CP_SYS]++;
  401                         else
  402                                 cp_time[CP_IDLE]++;
  403                 }
  404         }
  405 
  406         sched_clock(ke);
  407 
  408         /* Update resource usage integrals and maximums. */
  409         if ((pstats = p->p_stats) != NULL &&
  410             (ru = &pstats->p_ru) != NULL &&
  411             (vm = p->p_vmspace) != NULL) {
  412                 ru->ru_ixrss += pgtok(vm->vm_tsize);
  413                 ru->ru_idrss += pgtok(vm->vm_dsize);
  414                 ru->ru_isrss += pgtok(vm->vm_ssize);
  415                 rss = pgtok(vmspace_resident_count(vm));
  416                 if (ru->ru_maxrss < rss)
  417                         ru->ru_maxrss = rss;
  418         }
  419         mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
  420 }
  421 
  422 void
  423 profclock(frame)
  424         register struct clockframe *frame;
  425 {
  426         struct thread *td;
  427 #ifdef GPROF
  428         struct gmonparam *g;
  429         int i;
  430 #endif
  431 
  432         td = curthread;
  433         if (CLKF_USERMODE(frame)) {
  434                 /*
  435                  * Came from user mode; CPU was in user state.
  436                  * If this process is being profiled, record the tick.
  437                  * if there is no related user location yet, don't
  438                  * bother trying to count it.
  439                  */
  440                 td = curthread;
  441                 if (td->td_proc->p_flag & P_PROFIL)
  442                         addupc_intr(td, CLKF_PC(frame), 1);
  443         }
  444 #ifdef GPROF
  445         else {
  446                 /*
  447                  * Kernel statistics are just like addupc_intr, only easier.
  448                  */
  449                 g = &_gmonparam;
  450                 if (g->state == GMON_PROF_ON) {
  451                         i = CLKF_PC(frame) - g->lowpc;
  452                         if (i < g->textsize) {
  453                                 i /= HISTFRACTION * sizeof(*g->kcount);
  454                                 g->kcount[i]++;
  455                         }
  456                 }
  457         }
  458 #endif
  459 }
  460 
  461 /*
  462  * Return information about system clocks.
  463  */
  464 static int
  465 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
  466 {
  467         struct clockinfo clkinfo;
  468         /*
  469          * Construct clockinfo structure.
  470          */
  471         bzero(&clkinfo, sizeof(clkinfo));
  472         clkinfo.hz = hz;
  473         clkinfo.tick = tick;
  474         clkinfo.profhz = profhz;
  475         clkinfo.stathz = stathz ? stathz : hz;
  476         return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
  477 }
  478 
  479 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
  480         0, 0, sysctl_kern_clockrate, "S,clockinfo",
  481         "Rate and period of various kernel clocks");

Cache object: 832e527d5af16b45f3b70427742c575f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.