The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_clock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: kern_clock.c,v 1.148 2022/03/19 14:34:47 riastradh Exp $       */
    2 
    3 /*-
    4  * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
    9  * NASA Ames Research Center.
   10  * This code is derived from software contributed to The NetBSD Foundation
   11  * by Charles M. Hannum.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32  * POSSIBILITY OF SUCH DAMAGE.
   33  */
   34 
   35 /*-
   36  * Copyright (c) 1982, 1986, 1991, 1993
   37  *      The Regents of the University of California.  All rights reserved.
   38  * (c) UNIX System Laboratories, Inc.
   39  * All or some portions of this file are derived from material licensed
   40  * to the University of California by American Telephone and Telegraph
   41  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   42  * the permission of UNIX System Laboratories, Inc.
   43  *
   44  * Redistribution and use in source and binary forms, with or without
   45  * modification, are permitted provided that the following conditions
   46  * are met:
   47  * 1. Redistributions of source code must retain the above copyright
   48  *    notice, this list of conditions and the following disclaimer.
   49  * 2. Redistributions in binary form must reproduce the above copyright
   50  *    notice, this list of conditions and the following disclaimer in the
   51  *    documentation and/or other materials provided with the distribution.
   52  * 3. Neither the name of the University nor the names of its contributors
   53  *    may be used to endorse or promote products derived from this software
   54  *    without specific prior written permission.
   55  *
   56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   66  * SUCH DAMAGE.
   67  *
   68  *      @(#)kern_clock.c        8.5 (Berkeley) 1/21/94
   69  */
   70 
   71 #include <sys/cdefs.h>
   72 __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.148 2022/03/19 14:34:47 riastradh Exp $");
   73 
   74 #ifdef _KERNEL_OPT
   75 #include "opt_dtrace.h"
   76 #include "opt_gprof.h"
   77 #include "opt_multiprocessor.h"
   78 #endif
   79 
   80 #include <sys/param.h>
   81 #include <sys/systm.h>
   82 #include <sys/callout.h>
   83 #include <sys/kernel.h>
   84 #include <sys/proc.h>
   85 #include <sys/resourcevar.h>
   86 #include <sys/signalvar.h>
   87 #include <sys/sysctl.h>
   88 #include <sys/timex.h>
   89 #include <sys/sched.h>
   90 #include <sys/time.h>
   91 #include <sys/timetc.h>
   92 #include <sys/cpu.h>
   93 #include <sys/atomic.h>
   94 #include <sys/rndsource.h>
   95 
   96 #ifdef GPROF
   97 #include <sys/gmon.h>
   98 #endif
   99 
  100 #ifdef KDTRACE_HOOKS
  101 #include <sys/dtrace_bsd.h>
  102 #include <sys/cpu.h>
  103 
  104 cyclic_clock_func_t     cyclic_clock_func[MAXCPUS];
  105 #endif
  106 
  107 static int sysctl_kern_clockrate(SYSCTLFN_PROTO);
  108 
  109 /*
  110  * Clock handling routines.
  111  *
  112  * This code is written to operate with two timers that run independently of
  113  * each other.  The main clock, running hz times per second, is used to keep
  114  * track of real time.  The second timer handles kernel and user profiling,
  115  * and does resource use estimation.  If the second timer is programmable,
  116  * it is randomized to avoid aliasing between the two clocks.  For example,
  117  * the randomization prevents an adversary from always giving up the CPU
  118  * just before its quantum expires.  Otherwise, it would never accumulate
  119  * CPU ticks.  The mean frequency of the second timer is stathz.
  120  *
  121  * If no second timer exists, stathz will be zero; in this case we drive
  122  * profiling and statistics off the main clock.  This WILL NOT be accurate;
  123  * do not do it unless absolutely necessary.
  124  *
  125  * The statistics clock may (or may not) be run at a higher rate while
  126  * profiling.  This profile clock runs at profhz.  We require that profhz
  127  * be an integral multiple of stathz.
  128  *
  129  * If the statistics clock is running fast, it must be divided by the ratio
  130  * profhz/stathz for statistics.  (For profiling, every tick counts.)
  131  */
  132 
  133 int     stathz;
  134 int     profhz;
  135 int     profsrc;
  136 int     schedhz;
  137 int     profprocs;
  138 static int hardclock_ticks;
  139 static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */
  140 static int psdiv;                       /* prof => stat divider */
  141 int     psratio;                        /* ratio: prof / stat */
  142 
  143 struct clockrnd {
  144         struct krndsource source;
  145         unsigned needed;
  146 };
  147 
  148 static struct clockrnd hardclockrnd __aligned(COHERENCY_UNIT);
  149 static struct clockrnd statclockrnd __aligned(COHERENCY_UNIT);
  150 
  151 static void
  152 clockrnd_get(size_t needed, void *cookie)
  153 {
  154         struct clockrnd *C = cookie;
  155 
  156         /* Start sampling.  */
  157         atomic_store_relaxed(&C->needed, 2*NBBY*needed);
  158 }
  159 
  160 static void
  161 clockrnd_sample(struct clockrnd *C)
  162 {
  163         struct cpu_info *ci = curcpu();
  164 
  165         /* If there's nothing needed right now, stop here.  */
  166         if (__predict_true(atomic_load_relaxed(&C->needed) == 0))
  167                 return;
  168 
  169         /*
  170          * If we're not the primary core of a package, we're probably
  171          * driven by the same clock as the primary core, so don't
  172          * bother.
  173          */
  174         if (ci != ci->ci_package1st)
  175                 return;
  176 
  177         /* Take a sample and enter it into the pool.  */
  178         rnd_add_uint32(&C->source, 0);
  179 
  180         /*
  181          * On the primary CPU, count down.  Using an atomic decrement
  182          * here isn't really necessary -- on every platform we care
  183          * about, stores to unsigned int are atomic, and the only other
  184          * memory operation that could happen here is for another CPU
  185          * to store a higher value for needed.  But using an atomic
  186          * decrement avoids giving the impression of data races, and is
  187          * unlikely to hurt because only one CPU will ever be writing
  188          * to the location.
  189          */
  190         if (CPU_IS_PRIMARY(curcpu())) {
  191                 unsigned needed __diagused;
  192 
  193                 needed = atomic_dec_uint_nv(&C->needed);
  194                 KASSERT(needed != UINT_MAX);
  195         }
  196 }
  197 
  198 static u_int get_intr_timecount(struct timecounter *);
  199 
  200 static struct timecounter intr_timecounter = {
  201         .tc_get_timecount       = get_intr_timecount,
  202         .tc_poll_pps            = NULL,
  203         .tc_counter_mask        = ~0u,
  204         .tc_frequency           = 0,
  205         .tc_name                = "clockinterrupt",
  206         /* quality - minimum implementation level for a clock */
  207         .tc_quality             = 0,
  208         .tc_priv                = NULL,
  209 };
  210 
  211 static u_int
  212 get_intr_timecount(struct timecounter *tc)
  213 {
  214 
  215         return (u_int)getticks();
  216 }
  217 
  218 int
  219 getticks(void)
  220 {
  221         return atomic_load_relaxed(&hardclock_ticks);
  222 }
  223 
  224 /*
  225  * Initialize clock frequencies and start both clocks running.
  226  */
  227 void
  228 initclocks(void)
  229 {
  230         static struct sysctllog *clog;
  231         int i;
  232 
  233         /*
  234          * Set divisors to 1 (normal case) and let the machine-specific
  235          * code do its bit.
  236          */
  237         psdiv = 1;
  238 
  239         /*
  240          * Call cpu_initclocks() before registering the default
  241          * timecounter, in case it needs to adjust hz.
  242          */
  243         const int old_hz = hz;
  244         cpu_initclocks();
  245         if (old_hz != hz) {
  246                 tick = 1000000 / hz;
  247                 tickadj = (240000 / (60 * hz)) ? (240000 / (60 * hz)) : 1;
  248         }
  249 
  250         /*
  251          * provide minimum default time counter
  252          * will only run at interrupt resolution
  253          */
  254         intr_timecounter.tc_frequency = hz;
  255         tc_init(&intr_timecounter);
  256 
  257         /*
  258          * Compute profhz and stathz, fix profhz if needed.
  259          */
  260         i = stathz ? stathz : hz;
  261         if (profhz == 0)
  262                 profhz = i;
  263         psratio = profhz / i;
  264         if (schedhz == 0) {
  265                 /* 16Hz is best */
  266                 hardscheddiv = hz / 16;
  267                 if (hardscheddiv <= 0)
  268                         panic("hardscheddiv");
  269         }
  270 
  271         sysctl_createv(&clog, 0, NULL, NULL,
  272                        CTLFLAG_PERMANENT,
  273                        CTLTYPE_STRUCT, "clockrate",
  274                        SYSCTL_DESCR("Kernel clock rates"),
  275                        sysctl_kern_clockrate, 0, NULL,
  276                        sizeof(struct clockinfo),
  277                        CTL_KERN, KERN_CLOCKRATE, CTL_EOL);
  278         sysctl_createv(&clog, 0, NULL, NULL,
  279                        CTLFLAG_PERMANENT,
  280                        CTLTYPE_INT, "hardclock_ticks",
  281                        SYSCTL_DESCR("Number of hardclock ticks"),
  282                        NULL, 0, &hardclock_ticks, sizeof(hardclock_ticks),
  283                        CTL_KERN, KERN_HARDCLOCK_TICKS, CTL_EOL);
  284 
  285         rndsource_setcb(&hardclockrnd.source, clockrnd_get, &hardclockrnd);
  286         rnd_attach_source(&hardclockrnd.source, "hardclock", RND_TYPE_SKEW,
  287             RND_FLAG_COLLECT_TIME|RND_FLAG_HASCB);
  288         if (stathz) {
  289                 rndsource_setcb(&statclockrnd.source, clockrnd_get,
  290                     &statclockrnd);
  291                 rnd_attach_source(&statclockrnd.source, "statclock",
  292                     RND_TYPE_SKEW, RND_FLAG_COLLECT_TIME|RND_FLAG_HASCB);
  293         }
  294 }
  295 
  296 /*
  297  * The real-time timer, interrupting hz times per second.
  298  */
  299 void
  300 hardclock(struct clockframe *frame)
  301 {
  302         struct lwp *l;
  303         struct cpu_info *ci;
  304 
  305         clockrnd_sample(&hardclockrnd);
  306 
  307         ci = curcpu();
  308         l = ci->ci_onproc;
  309 
  310         ptimer_tick(l, CLKF_USERMODE(frame));
  311 
  312         /*
  313          * If no separate statistics clock is available, run it from here.
  314          */
  315         if (stathz == 0)
  316                 statclock(frame);
  317         /*
  318          * If no separate schedclock is provided, call it here
  319          * at about 16 Hz.
  320          */
  321         if (schedhz == 0) {
  322                 if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) {
  323                         schedclock(l);
  324                         ci->ci_schedstate.spc_schedticks = hardscheddiv;
  325                 }
  326         }
  327         if ((--ci->ci_schedstate.spc_ticks) <= 0)
  328                 sched_tick(ci);
  329 
  330         if (CPU_IS_PRIMARY(ci)) {
  331                 atomic_store_relaxed(&hardclock_ticks,
  332                     atomic_load_relaxed(&hardclock_ticks) + 1);
  333                 tc_ticktock();
  334         }
  335 
  336         /*
  337          * Update real-time timeout queue.
  338          */
  339         callout_hardclock();
  340 }
  341 
  342 /*
  343  * Start profiling on a process.
  344  *
  345  * Kernel profiling passes proc0 which never exits and hence
  346  * keeps the profile clock running constantly.
  347  */
  348 void
  349 startprofclock(struct proc *p)
  350 {
  351 
  352         KASSERT(mutex_owned(&p->p_stmutex));
  353 
  354         if ((p->p_stflag & PST_PROFIL) == 0) {
  355                 p->p_stflag |= PST_PROFIL;
  356                 /*
  357                  * This is only necessary if using the clock as the
  358                  * profiling source.
  359                  */
  360                 if (++profprocs == 1 && stathz != 0)
  361                         psdiv = psratio;
  362         }
  363 }
  364 
  365 /*
  366  * Stop profiling on a process.
  367  */
  368 void
  369 stopprofclock(struct proc *p)
  370 {
  371 
  372         KASSERT(mutex_owned(&p->p_stmutex));
  373 
  374         if (p->p_stflag & PST_PROFIL) {
  375                 p->p_stflag &= ~PST_PROFIL;
  376                 /*
  377                  * This is only necessary if using the clock as the
  378                  * profiling source.
  379                  */
  380                 if (--profprocs == 0 && stathz != 0)
  381                         psdiv = 1;
  382         }
  383 }
  384 
  385 void
  386 schedclock(struct lwp *l)
  387 {
  388         if ((l->l_flag & LW_IDLE) != 0)
  389                 return;
  390 
  391         sched_schedclock(l);
  392 }
  393 
  394 /*
  395  * Statistics clock.  Grab profile sample, and if divider reaches 0,
  396  * do process and kernel statistics.
  397  */
  398 void
  399 statclock(struct clockframe *frame)
  400 {
  401 #ifdef GPROF
  402         struct gmonparam *g;
  403         intptr_t i;
  404 #endif
  405         struct cpu_info *ci = curcpu();
  406         struct schedstate_percpu *spc = &ci->ci_schedstate;
  407         struct proc *p;
  408         struct lwp *l;
  409 
  410         if (stathz)
  411                 clockrnd_sample(&statclockrnd);
  412 
  413         /*
  414          * Notice changes in divisor frequency, and adjust clock
  415          * frequency accordingly.
  416          */
  417         if (spc->spc_psdiv != psdiv) {
  418                 spc->spc_psdiv = psdiv;
  419                 spc->spc_pscnt = psdiv;
  420                 if (psdiv == 1) {
  421                         setstatclockrate(stathz);
  422                 } else {
  423                         setstatclockrate(profhz);
  424                 }
  425         }
  426         l = ci->ci_onproc;
  427         if ((l->l_flag & LW_IDLE) != 0) {
  428                 /*
  429                  * don't account idle lwps as swapper.
  430                  */
  431                 p = NULL;
  432         } else {
  433                 p = l->l_proc;
  434                 mutex_spin_enter(&p->p_stmutex);
  435         }
  436 
  437         if (CLKF_USERMODE(frame)) {
  438                 KASSERT(p != NULL);
  439                 if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK)
  440                         addupc_intr(l, CLKF_PC(frame));
  441                 if (--spc->spc_pscnt > 0) {
  442                         mutex_spin_exit(&p->p_stmutex);
  443                         return;
  444                 }
  445 
  446                 /*
  447                  * Came from user mode; CPU was in user state.
  448                  * If this process is being profiled record the tick.
  449                  */
  450                 p->p_uticks++;
  451                 if (p->p_nice > NZERO)
  452                         spc->spc_cp_time[CP_NICE]++;
  453                 else
  454                         spc->spc_cp_time[CP_USER]++;
  455         } else {
  456 #ifdef GPROF
  457                 /*
  458                  * Kernel statistics are just like addupc_intr, only easier.
  459                  */
  460 #if defined(MULTIPROCESSOR) && !defined(_RUMPKERNEL)
  461                 g = curcpu()->ci_gmon;
  462                 if (g != NULL &&
  463                     profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
  464 #else
  465                 g = &_gmonparam;
  466                 if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
  467 #endif
  468                         i = CLKF_PC(frame) - g->lowpc;
  469                         if (i < g->textsize) {
  470                                 i /= HISTFRACTION * sizeof(*g->kcount);
  471                                 g->kcount[i]++;
  472                         }
  473                 }
  474 #endif
  475 #ifdef LWP_PC
  476                 if (p != NULL && profsrc == PROFSRC_CLOCK &&
  477                     (p->p_stflag & PST_PROFIL)) {
  478                         addupc_intr(l, LWP_PC(l));
  479                 }
  480 #endif
  481                 if (--spc->spc_pscnt > 0) {
  482                         if (p != NULL)
  483                                 mutex_spin_exit(&p->p_stmutex);
  484                         return;
  485                 }
  486                 /*
  487                  * Came from kernel mode, so we were:
  488                  * - handling an interrupt,
  489                  * - doing syscall or trap work on behalf of the current
  490                  *   user process, or
  491                  * - spinning in the idle loop.
  492                  * Whichever it is, charge the time as appropriate.
  493                  * Note that we charge interrupts to the current process,
  494                  * regardless of whether they are ``for'' that process,
  495                  * so that we know how much of its real time was spent
  496                  * in ``non-process'' (i.e., interrupt) work.
  497                  */
  498                 if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) {
  499                         if (p != NULL) {
  500                                 p->p_iticks++;
  501                         }
  502                         spc->spc_cp_time[CP_INTR]++;
  503                 } else if (p != NULL) {
  504                         p->p_sticks++;
  505                         spc->spc_cp_time[CP_SYS]++;
  506                 } else {
  507                         spc->spc_cp_time[CP_IDLE]++;
  508                 }
  509         }
  510         spc->spc_pscnt = psdiv;
  511 
  512         if (p != NULL) {
  513                 atomic_inc_uint(&l->l_cpticks);
  514                 mutex_spin_exit(&p->p_stmutex);
  515         }
  516 
  517 #ifdef KDTRACE_HOOKS
  518         cyclic_clock_func_t func = cyclic_clock_func[cpu_index(ci)];
  519         if (func) {
  520                 (*func)((struct clockframe *)frame);
  521         }
  522 #endif
  523 }
  524 
  525 /*
  526  * sysctl helper routine for kern.clockrate. Assembles a struct on
  527  * the fly to be returned to the caller.
  528  */
  529 static int
  530 sysctl_kern_clockrate(SYSCTLFN_ARGS)
  531 {
  532         struct clockinfo clkinfo;
  533         struct sysctlnode node;
  534 
  535         clkinfo.tick = tick;
  536         clkinfo.tickadj = tickadj;
  537         clkinfo.hz = hz;
  538         clkinfo.profhz = profhz;
  539         clkinfo.stathz = stathz ? stathz : hz;
  540 
  541         node = *rnode;
  542         node.sysctl_data = &clkinfo;
  543         return (sysctl_lookup(SYSCTLFN_CALL(&node)));
  544 }

Cache object: 543a264e8a8568b1ed08e0103bf3f66d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.