kern_clock.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)kern_clock.c        8.5 (Berkeley) 1/21/94
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include "opt_kdb.h"
   43 #include "opt_device_polling.h"
   44 #include "opt_hwpmc_hooks.h"
   45 #include "opt_ntp.h"
   46 #include "opt_watchdog.h"
   47 
   48 #include <sys/param.h>
   49 #include <sys/systm.h>
   50 #include <sys/callout.h>
   51 #include <sys/epoch.h>
   52 #include <sys/eventhandler.h>
   53 #include <sys/gtaskqueue.h>
   54 #include <sys/kdb.h>
   55 #include <sys/kernel.h>
   56 #include <sys/kthread.h>
   57 #include <sys/ktr.h>
   58 #include <sys/lock.h>
   59 #include <sys/mutex.h>
   60 #include <sys/proc.h>
   61 #include <sys/resource.h>
   62 #include <sys/resourcevar.h>
   63 #include <sys/sched.h>
   64 #include <sys/sdt.h>
   65 #include <sys/signalvar.h>
   66 #include <sys/sleepqueue.h>
   67 #include <sys/smp.h>
   68 #include <vm/vm.h>
   69 #include <vm/pmap.h>
   70 #include <vm/vm_map.h>
   71 #include <sys/sysctl.h>
   72 #include <sys/bus.h>
   73 #include <sys/interrupt.h>
   74 #include <sys/limits.h>
   75 #include <sys/timetc.h>
   76 
   77 #ifdef HWPMC_HOOKS
   78 #include <sys/pmckern.h>
   79 PMC_SOFT_DEFINE( , , clock, hard);
   80 PMC_SOFT_DEFINE( , , clock, stat);
   81 PMC_SOFT_DEFINE_EX( , , clock, prof, \
   82     cpu_startprofclock, cpu_stopprofclock);
   83 #endif
   84 
   85 #ifdef DEVICE_POLLING
   86 extern void hardclock_device_poll(void);
   87 #endif /* DEVICE_POLLING */
   88 
   89 /* Spin-lock protecting profiling statistics. */
   90 static struct mtx time_lock;
   91 
   92 SDT_PROVIDER_DECLARE(sched);
   93 SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *");
   94 
   95 static int
   96 sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS)
   97 {
   98         int error;
   99         long cp_time[CPUSTATES];
  100 #ifdef SCTL_MASK32
  101         int i;
  102         unsigned int cp_time32[CPUSTATES];
  103 #endif
  104 
  105         read_cpu_time(cp_time);
  106 #ifdef SCTL_MASK32
  107         if (req->flags & SCTL_MASK32) {
  108                 if (!req->oldptr)
  109                         return SYSCTL_OUT(req, 0, sizeof(cp_time32));
  110                 for (i = 0; i < CPUSTATES; i++)
  111                         cp_time32[i] = (unsigned int)cp_time[i];
  112                 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
  113         } else
  114 #endif
  115         {
  116                 if (!req->oldptr)
  117                         return SYSCTL_OUT(req, 0, sizeof(cp_time));
  118                 error = SYSCTL_OUT(req, cp_time, sizeof(cp_time));
  119         }
  120         return error;
  121 }
  122 
  123 SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
  124     0,0, sysctl_kern_cp_time, "LU", "CPU time statistics");
  125 
  126 static long empty[CPUSTATES];
  127 
  128 static int
  129 sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS)
  130 {
  131         struct pcpu *pcpu;
  132         int error;
  133         int c;
  134         long *cp_time;
  135 #ifdef SCTL_MASK32
  136         unsigned int cp_time32[CPUSTATES];
  137         int i;
  138 #endif
  139 
  140         if (!req->oldptr) {
  141 #ifdef SCTL_MASK32
  142                 if (req->flags & SCTL_MASK32)
  143                         return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1));
  144                 else
  145 #endif
  146                         return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1));
  147         }
  148         for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) {
  149                 if (!CPU_ABSENT(c)) {
  150                         pcpu = pcpu_find(c);
  151                         cp_time = pcpu->pc_cp_time;
  152                 } else {
  153                         cp_time = empty;
  154                 }
  155 #ifdef SCTL_MASK32
  156                 if (req->flags & SCTL_MASK32) {
  157                         for (i = 0; i < CPUSTATES; i++)
  158                                 cp_time32[i] = (unsigned int)cp_time[i];
  159                         error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
  160                 } else
  161 #endif
  162                         error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES);
  163         }
  164         return error;
  165 }
  166 
  167 SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
  168     0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
  169 
  170 #ifdef DEADLKRES
  171 static const char *blessed[] = {
  172         "getblk",
  173         "so_snd_sx",
  174         "so_rcv_sx",
  175         NULL
  176 };
  177 static int slptime_threshold = 1800;
  178 static int blktime_threshold = 900;
  179 static int sleepfreq = 3;
  180 
  181 static void
  182 deadlres_td_on_lock(struct proc *p, struct thread *td, int blkticks)
  183 {
  184         int tticks;
  185 
  186         sx_assert(&allproc_lock, SX_LOCKED);
  187         PROC_LOCK_ASSERT(p, MA_OWNED);
  188         THREAD_LOCK_ASSERT(td, MA_OWNED);
  189         /*
  190          * The thread should be blocked on a turnstile, simply check
  191          * if the turnstile channel is in good state.
  192          */
  193         MPASS(td->td_blocked != NULL);
  194 
  195         tticks = ticks - td->td_blktick;
  196         if (tticks > blkticks)
  197                 /*
  198                  * Accordingly with provided thresholds, this thread is stuck
  199                  * for too long on a turnstile.
  200                  */
  201                 panic("%s: possible deadlock detected for %p (%s), "
  202                     "blocked for %d ticks\n", __func__,
  203                     td, sched_tdname(td), tticks);
  204 }
  205 
  206 static void
  207 deadlres_td_sleep_q(struct proc *p, struct thread *td, int slpticks)
  208 {
  209         const void *wchan;
  210         int i, slptype, tticks;
  211 
  212         sx_assert(&allproc_lock, SX_LOCKED);
  213         PROC_LOCK_ASSERT(p, MA_OWNED);
  214         THREAD_LOCK_ASSERT(td, MA_OWNED);
  215         /*
  216          * Check if the thread is sleeping on a lock, otherwise skip the check.
  217          * Drop the thread lock in order to avoid a LOR with the sleepqueue
  218          * spinlock.
  219          */
  220         wchan = td->td_wchan;
  221         tticks = ticks - td->td_slptick;
  222         slptype = sleepq_type(wchan);
  223         if ((slptype == SLEEPQ_SX || slptype == SLEEPQ_LK) &&
  224             tticks > slpticks) {
  225                 /*
  226                  * Accordingly with provided thresholds, this thread is stuck
  227                  * for too long on a sleepqueue.
  228                  * However, being on a sleepqueue, we might still check for the
  229                  * blessed list.
  230                  */
  231                 for (i = 0; blessed[i] != NULL; i++)
  232                         if (!strcmp(blessed[i], td->td_wmesg))
  233                                 return;
  234 
  235                 panic("%s: possible deadlock detected for %p (%s), "
  236                     "blocked for %d ticks\n", __func__,
  237                     td, sched_tdname(td), tticks);
  238         }
  239 }
  240 
  241 static void
  242 deadlkres(void)
  243 {
  244         struct proc *p;
  245         struct thread *td;
  246         int blkticks, slpticks, tryl;
  247 
  248         tryl = 0;
  249         for (;;) {
  250                 blkticks = blktime_threshold * hz;
  251                 slpticks = slptime_threshold * hz;
  252 
  253                 /*
  254                  * Avoid to sleep on the sx_lock in order to avoid a
  255                  * possible priority inversion problem leading to
  256                  * starvation.
  257                  * If the lock can't be held after 100 tries, panic.
  258                  */
  259                 if (!sx_try_slock(&allproc_lock)) {
  260                         if (tryl > 100)
  261                                 panic("%s: possible deadlock detected "
  262                                     "on allproc_lock\n", __func__);
  263                         tryl++;
  264                         pause("allproc", sleepfreq * hz);
  265                         continue;
  266                 }
  267                 tryl = 0;
  268                 FOREACH_PROC_IN_SYSTEM(p) {
  269                         PROC_LOCK(p);
  270                         if (p->p_state == PRS_NEW) {
  271                                 PROC_UNLOCK(p);
  272                                 continue;
  273                         }
  274                         FOREACH_THREAD_IN_PROC(p, td) {
  275                                 thread_lock(td);
  276                                 if (TD_ON_LOCK(td))
  277                                         deadlres_td_on_lock(p, td,
  278                                             blkticks);
  279                                 else if (TD_IS_SLEEPING(td))
  280                                         deadlres_td_sleep_q(p, td,
  281                                             slpticks);
  282                                 thread_unlock(td);
  283                         }
  284                         PROC_UNLOCK(p);
  285                 }
  286                 sx_sunlock(&allproc_lock);
  287 
  288                 /* Sleep for sleepfreq seconds. */
  289                 pause("-", sleepfreq * hz);
  290         }
  291 }
  292 
  293 static struct kthread_desc deadlkres_kd = {
  294         "deadlkres",
  295         deadlkres,
  296         (struct thread **)NULL
  297 };
  298 
  299 SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
  300 
  301 static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  302     "Deadlock resolver");
  303 SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW,
  304     &slptime_threshold, 0,
  305     "Number of seconds within is valid to sleep on a sleepqueue");
  306 SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW,
  307     &blktime_threshold, 0,
  308     "Number of seconds within is valid to block on a turnstile");
  309 SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0,
  310     "Number of seconds between any deadlock resolver thread run");
  311 #endif  /* DEADLKRES */
  312 
  313 void
  314 read_cpu_time(long *cp_time)
  315 {
  316         struct pcpu *pc;
  317         int i, j;
  318 
  319         /* Sum up global cp_time[]. */
  320         bzero(cp_time, sizeof(long) * CPUSTATES);
  321         CPU_FOREACH(i) {
  322                 pc = pcpu_find(i);
  323                 for (j = 0; j < CPUSTATES; j++)
  324                         cp_time[j] += pc->pc_cp_time[j];
  325         }
  326 }
  327 
  328 #include <sys/watchdog.h>
  329 
  330 static int watchdog_ticks;
  331 static int watchdog_enabled;
  332 static void watchdog_fire(void);
  333 static void watchdog_config(void *, u_int, int *);
  334 
  335 static void
  336 watchdog_attach(void)
  337 {
  338         EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0);
  339 }
  340 
  341 /*
  342  * Clock handling routines.
  343  *
  344  * This code is written to operate with two timers that run independently of
  345  * each other.
  346  *
  347  * The main timer, running hz times per second, is used to trigger interval
  348  * timers, timeouts and rescheduling as needed.
  349  *
  350  * The second timer handles kernel and user profiling,
  351  * and does resource use estimation.  If the second timer is programmable,
  352  * it is randomized to avoid aliasing between the two clocks.  For example,
  353  * the randomization prevents an adversary from always giving up the cpu
  354  * just before its quantum expires.  Otherwise, it would never accumulate
  355  * cpu ticks.  The mean frequency of the second timer is stathz.
  356  *
  357  * If no second timer exists, stathz will be zero; in this case we drive
  358  * profiling and statistics off the main clock.  This WILL NOT be accurate;
  359  * do not do it unless absolutely necessary.
  360  *
  361  * The statistics clock may (or may not) be run at a higher rate while
  362  * profiling.  This profile clock runs at profhz.  We require that profhz
  363  * be an integral multiple of stathz.
  364  *
  365  * If the statistics clock is running fast, it must be divided by the ratio
  366  * profhz/stathz for statistics.  (For profiling, every tick counts.)
  367  *
  368  * Time-of-day is maintained using a "timecounter", which may or may
  369  * not be related to the hardware generating the above mentioned
  370  * interrupts.
  371  */
  372 
  373 int     stathz;
  374 int     profhz;
  375 int     profprocs;
  376 volatile int    ticks;
  377 int     psratio;
  378 
  379 DPCPU_DEFINE_STATIC(int, pcputicks);    /* Per-CPU version of ticks. */
  380 #ifdef DEVICE_POLLING
  381 static int devpoll_run = 0;
  382 #endif
  383 
  384 static void
  385 ast_oweupc(struct thread *td, int tda __unused)
  386 {
  387         if ((td->td_proc->p_flag & P_PROFIL) == 0)
  388                 return;
  389         addupc_task(td, td->td_profil_addr, td->td_profil_ticks);
  390         td->td_profil_ticks = 0;
  391         td->td_pflags &= ~TDP_OWEUPC;
  392 }
  393 
  394 static void
  395 ast_alrm(struct thread *td, int tda __unused)
  396 {
  397         struct proc *p;
  398 
  399         p = td->td_proc;
  400         PROC_LOCK(p);
  401         kern_psignal(p, SIGVTALRM);
  402         PROC_UNLOCK(p);
  403 }
  404 
  405 static void
  406 ast_prof(struct thread *td, int tda __unused)
  407 {
  408         struct proc *p;
  409 
  410         p = td->td_proc;
  411         PROC_LOCK(p);
  412         kern_psignal(p, SIGPROF);
  413         PROC_UNLOCK(p);
  414 }
  415 
  416 /*
  417  * Initialize clock frequencies and start both clocks running.
  418  */
  419 static void
  420 initclocks(void *dummy __unused)
  421 {
  422         int i;
  423 
  424         /*
  425          * Set divisors to 1 (normal case) and let the machine-specific
  426          * code do its bit.
  427          */
  428         mtx_init(&time_lock, "time lock", NULL, MTX_DEF);
  429         cpu_initclocks();
  430 
  431         /*
  432          * Compute profhz/stathz, and fix profhz if needed.
  433          */
  434         i = stathz ? stathz : hz;
  435         if (profhz == 0)
  436                 profhz = i;
  437         psratio = profhz / i;
  438 
  439         ast_register(TDA_OWEUPC, ASTR_ASTF_REQUIRED, 0, ast_oweupc);
  440         ast_register(TDA_ALRM, ASTR_ASTF_REQUIRED, 0, ast_alrm);
  441         ast_register(TDA_PROF, ASTR_ASTF_REQUIRED, 0, ast_prof);
  442 
  443 #ifdef SW_WATCHDOG
  444         /* Enable hardclock watchdog now, even if a hardware watchdog exists. */
  445         watchdog_attach();
  446 #else
  447         /* Volunteer to run a software watchdog. */
  448         if (wdog_software_attach == NULL)
  449                 wdog_software_attach = watchdog_attach;
  450 #endif
  451 }
  452 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL);
  453 
  454 static __noinline void
  455 hardclock_itimer(struct thread *td, struct pstats *pstats, int cnt, int usermode)
  456 {
  457         struct proc *p;
  458         int ast;
  459 
  460         ast = 0;
  461         p = td->td_proc;
  462         if (usermode &&
  463             timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
  464                 PROC_ITIMLOCK(p);
  465                 if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL],
  466                     tick * cnt) == 0)
  467                         ast |= TDAI(TDA_ALRM);
  468                 PROC_ITIMUNLOCK(p);
  469         }
  470         if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
  471                 PROC_ITIMLOCK(p);
  472                 if (itimerdecr(&pstats->p_timer[ITIMER_PROF],
  473                     tick * cnt) == 0)
  474                         ast |= TDAI(TDA_PROF);
  475                 PROC_ITIMUNLOCK(p);
  476         }
  477         if (ast != 0)
  478                 ast_sched_mask(td, ast);
  479 }
  480 
  481 void
  482 hardclock(int cnt, int usermode)
  483 {
  484         struct pstats *pstats;
  485         struct thread *td = curthread;
  486         struct proc *p = td->td_proc;
  487         int *t = DPCPU_PTR(pcputicks);
  488         int global, i, newticks;
  489 
  490         /*
  491          * Update per-CPU and possibly global ticks values.
  492          */
  493         *t += cnt;
  494         global = ticks;
  495         do {
  496                 newticks = *t - global;
  497                 if (newticks <= 0) {
  498                         if (newticks < -1)
  499                                 *t = global - 1;
  500                         newticks = 0;
  501                         break;
  502                 }
  503         } while (!atomic_fcmpset_int(&ticks, &global, *t));
  504 
  505         /*
  506          * Run current process's virtual and profile time, as needed.
  507          */
  508         pstats = p->p_stats;
  509         if (__predict_false(
  510             timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) ||
  511             timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)))
  512                 hardclock_itimer(td, pstats, cnt, usermode);
  513 
  514 #ifdef  HWPMC_HOOKS
  515         if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
  516                 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
  517         if (td->td_intr_frame != NULL)
  518                 PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
  519 #endif
  520         /* We are in charge to handle this tick duty. */
  521         if (newticks > 0) {
  522                 tc_ticktock(newticks);
  523 #ifdef DEVICE_POLLING
  524                 /* Dangerous and no need to call these things concurrently. */
  525                 if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) {
  526                         /* This is very short and quick. */
  527                         hardclock_device_poll();
  528                         atomic_store_rel_int(&devpoll_run, 0);
  529                 }
  530 #endif /* DEVICE_POLLING */
  531                 if (watchdog_enabled > 0) {
  532                         i = atomic_fetchadd_int(&watchdog_ticks, -newticks);
  533                         if (i > 0 && i <= newticks)
  534                                 watchdog_fire();
  535                 }
  536                 intr_event_handle(clk_intr_event, NULL);
  537         }
  538         if (curcpu == CPU_FIRST())
  539                 cpu_tick_calibration();
  540         if (__predict_false(DPCPU_GET(epoch_cb_count)))
  541                 GROUPTASK_ENQUEUE(DPCPU_PTR(epoch_cb_task));
  542 }
  543 
  544 void
  545 hardclock_sync(int cpu)
  546 {
  547         int *t;
  548         KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu));
  549         t = DPCPU_ID_PTR(cpu, pcputicks);
  550 
  551         *t = ticks;
  552 }
  553 
  554 /*
  555  * Regular integer scaling formula without losing precision:
  556  */
  557 #define TIME_INT_SCALE(value, mul, div) \
  558         (((value) / (div)) * (mul) + (((value) % (div)) * (mul)) / (div))
  559 
  560 /*
  561  * Macro for converting seconds and microseconds into actual ticks,
  562  * based on the given hz value:
  563  */
  564 #define TIME_TO_TICKS(sec, usec, hz) \
  565         ((sec) * (hz) + TIME_INT_SCALE(usec, hz, 1 << 6) / (1000000 >> 6))
  566 
  567 #define TIME_ASSERT_VALID_HZ(hz)        \
  568         _Static_assert(TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) >= 0 && \
  569                        TIME_TO_TICKS(INT_MAX / (hz) - 1, 999999, hz) < INT_MAX, \
  570                        "tvtohz() can overflow the regular integer type")
  571 
  572 /*
  573  * Compile time assert the maximum and minimum values to fit into a
  574  * regular integer when computing TIME_TO_TICKS():
  575  */
  576 TIME_ASSERT_VALID_HZ(HZ_MAXIMUM);
  577 TIME_ASSERT_VALID_HZ(HZ_MINIMUM);
  578 
  579 /*
  580  * The formula is mostly linear, but test some more common values just
  581  * in case:
  582  */
  583 TIME_ASSERT_VALID_HZ(1024);
  584 TIME_ASSERT_VALID_HZ(1000);
  585 TIME_ASSERT_VALID_HZ(128);
  586 TIME_ASSERT_VALID_HZ(100);
  587 
  588 /*
  589  * Compute number of ticks representing the specified amount of time.
  590  * If the specified time is negative, a value of 1 is returned. This
  591  * function returns a value from 1 up to and including INT_MAX.
  592  */
  593 int
  594 tvtohz(struct timeval *tv)
  595 {
  596         int retval;
  597 
  598         /*
  599          * The values passed here may come from user-space and these
  600          * checks ensure "tv_usec" is within its allowed range:
  601          */
  602 
  603         /* check for tv_usec underflow */
  604         if (__predict_false(tv->tv_usec < 0)) {
  605                 tv->tv_sec += tv->tv_usec / 1000000;
  606                 tv->tv_usec = tv->tv_usec % 1000000;
  607                 /* convert tv_usec to a positive value */
  608                 if (__predict_true(tv->tv_usec < 0)) {
  609                         tv->tv_usec += 1000000;
  610                         tv->tv_sec -= 1;
  611                 }
  612         /* check for tv_usec overflow */
  613         } else if (__predict_false(tv->tv_usec >= 1000000)) {
  614                 tv->tv_sec += tv->tv_usec / 1000000;
  615                 tv->tv_usec = tv->tv_usec % 1000000;
  616         }
  617 
  618         /* check for tv_sec underflow */
  619         if (__predict_false(tv->tv_sec < 0))
  620                 return (1);
  621         /* check for tv_sec overflow (including room for the tv_usec part) */
  622         else if (__predict_false(tv->tv_sec >= tick_seconds_max))
  623                 return (INT_MAX);
  624 
  625         /* cast to "int" to avoid platform differences */
  626         retval = TIME_TO_TICKS((int)tv->tv_sec, (int)tv->tv_usec, hz);
  627 
  628         /* add one additional tick */
  629         return (retval + 1);
  630 }
  631 
  632 /*
  633  * Start profiling on a process.
  634  *
  635  * Kernel profiling passes proc0 which never exits and hence
  636  * keeps the profile clock running constantly.
  637  */
  638 void
  639 startprofclock(struct proc *p)
  640 {
  641 
  642         PROC_LOCK_ASSERT(p, MA_OWNED);
  643         if (p->p_flag & P_STOPPROF)
  644                 return;
  645         if ((p->p_flag & P_PROFIL) == 0) {
  646                 p->p_flag |= P_PROFIL;
  647                 mtx_lock(&time_lock);
  648                 if (++profprocs == 1)
  649                         cpu_startprofclock();
  650                 mtx_unlock(&time_lock);
  651         }
  652 }
  653 
  654 /*
  655  * Stop profiling on a process.
  656  */
  657 void
  658 stopprofclock(struct proc *p)
  659 {
  660 
  661         PROC_LOCK_ASSERT(p, MA_OWNED);
  662         if (p->p_flag & P_PROFIL) {
  663                 if (p->p_profthreads != 0) {
  664                         while (p->p_profthreads != 0) {
  665                                 p->p_flag |= P_STOPPROF;
  666                                 msleep(&p->p_profthreads, &p->p_mtx, PPAUSE,
  667                                     "stopprof", 0);
  668                         }
  669                 }
  670                 if ((p->p_flag & P_PROFIL) == 0)
  671                         return;
  672                 p->p_flag &= ~P_PROFIL;
  673                 mtx_lock(&time_lock);
  674                 if (--profprocs == 0)
  675                         cpu_stopprofclock();
  676                 mtx_unlock(&time_lock);
  677         }
  678 }
  679 
  680 /*
  681  * Statistics clock.  Updates rusage information and calls the scheduler
  682  * to adjust priorities of the active thread.
  683  *
  684  * This should be called by all active processors.
  685  */
  686 void
  687 statclock(int cnt, int usermode)
  688 {
  689         struct rusage *ru;
  690         struct vmspace *vm;
  691         struct thread *td;
  692         struct proc *p;
  693         long rss;
  694         long *cp_time;
  695         uint64_t runtime, new_switchtime;
  696 
  697         td = curthread;
  698         p = td->td_proc;
  699 
  700         cp_time = (long *)PCPU_PTR(cp_time);
  701         if (usermode) {
  702                 /*
  703                  * Charge the time as appropriate.
  704                  */
  705                 td->td_uticks += cnt;
  706                 if (p->p_nice > NZERO)
  707                         cp_time[CP_NICE] += cnt;
  708                 else
  709                         cp_time[CP_USER] += cnt;
  710         } else {
  711                 /*
  712                  * Came from kernel mode, so we were:
  713                  * - handling an interrupt,
  714                  * - doing syscall or trap work on behalf of the current
  715                  *   user process, or
  716                  * - spinning in the idle loop.
  717                  * Whichever it is, charge the time as appropriate.
  718                  * Note that we charge interrupts to the current process,
  719                  * regardless of whether they are ``for'' that process,
  720                  * so that we know how much of its real time was spent
  721                  * in ``non-process'' (i.e., interrupt) work.
  722                  */
  723                 if ((td->td_pflags & TDP_ITHREAD) ||
  724                     td->td_intr_nesting_level >= 2) {
  725                         td->td_iticks += cnt;
  726                         cp_time[CP_INTR] += cnt;
  727                 } else {
  728                         td->td_pticks += cnt;
  729                         td->td_sticks += cnt;
  730                         if (!TD_IS_IDLETHREAD(td))
  731                                 cp_time[CP_SYS] += cnt;
  732                         else
  733                                 cp_time[CP_IDLE] += cnt;
  734                 }
  735         }
  736 
  737         /* Update resource usage integrals and maximums. */
  738         MPASS(p->p_vmspace != NULL);
  739         vm = p->p_vmspace;
  740         ru = &td->td_ru;
  741         ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt;
  742         ru->ru_idrss += pgtok(vm->vm_dsize) * cnt;
  743         ru->ru_isrss += pgtok(vm->vm_ssize) * cnt;
  744         rss = pgtok(vmspace_resident_count(vm));
  745         if (ru->ru_maxrss < rss)
  746                 ru->ru_maxrss = rss;
  747         KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock",
  748             "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz);
  749         SDT_PROBE2(sched, , , tick, td, td->td_proc);
  750         thread_lock_flags(td, MTX_QUIET);
  751 
  752         /*
  753          * Compute the amount of time during which the current
  754          * thread was running, and add that to its total so far.
  755          */
  756         new_switchtime = cpu_ticks();
  757         runtime = new_switchtime - PCPU_GET(switchtime);
  758         td->td_runtime += runtime;
  759         td->td_incruntime += runtime;
  760         PCPU_SET(switchtime, new_switchtime);
  761 
  762         sched_clock(td, cnt);
  763         thread_unlock(td);
  764 #ifdef HWPMC_HOOKS
  765         if (td->td_intr_frame != NULL)
  766                 PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame);
  767 #endif
  768 }
  769 
  770 void
  771 profclock(int cnt, int usermode, uintfptr_t pc)
  772 {
  773         struct thread *td;
  774 
  775         td = curthread;
  776         if (usermode) {
  777                 /*
  778                  * Came from user mode; CPU was in user state.
  779                  * If this process is being profiled, record the tick.
  780                  * if there is no related user location yet, don't
  781                  * bother trying to count it.
  782                  */
  783                 if (td->td_proc->p_flag & P_PROFIL)
  784                         addupc_intr(td, pc, cnt);
  785         }
  786 #ifdef HWPMC_HOOKS
  787         if (td->td_intr_frame != NULL)
  788                 PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame);
  789 #endif
  790 }
  791 
  792 /*
  793  * Return information about system clocks.
  794  */
  795 static int
  796 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
  797 {
  798         struct clockinfo clkinfo;
  799         /*
  800          * Construct clockinfo structure.
  801          */
  802         bzero(&clkinfo, sizeof(clkinfo));
  803         clkinfo.hz = hz;
  804         clkinfo.tick = tick;
  805         clkinfo.profhz = profhz;
  806         clkinfo.stathz = stathz ? stathz : hz;
  807         return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
  808 }
  809 
  810 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate,
  811         CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE,
  812         0, 0, sysctl_kern_clockrate, "S,clockinfo",
  813         "Rate and period of various kernel clocks");
  814 
  815 static void
  816 watchdog_config(void *unused __unused, u_int cmd, int *error)
  817 {
  818         u_int u;
  819 
  820         u = cmd & WD_INTERVAL;
  821         if (u >= WD_TO_1SEC) {
  822                 watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz;
  823                 watchdog_enabled = 1;
  824                 *error = 0;
  825         } else {
  826                 watchdog_enabled = 0;
  827         }
  828 }
  829 
  830 /*
  831  * Handle a watchdog timeout by dumping interrupt information and
  832  * then either dropping to DDB or panicking.
  833  */
  834 static void
  835 watchdog_fire(void)
  836 {
  837         int nintr;
  838         uint64_t inttotal;
  839         u_long *curintr;
  840         char *curname;
  841 
  842         curintr = intrcnt;
  843         curname = intrnames;
  844         inttotal = 0;
  845         nintr = sintrcnt / sizeof(u_long);
  846 
  847         printf("interrupt                   total\n");
  848         while (--nintr >= 0) {
  849                 if (*curintr)
  850                         printf("%-12s %20lu\n", curname, *curintr);
  851                 curname += strlen(curname) + 1;
  852                 inttotal += *curintr++;
  853         }
  854         printf("Total        %20ju\n", (uintmax_t)inttotal);
  855 
  856 #if defined(KDB) && !defined(KDB_UNATTENDED)
  857         kdb_backtrace();
  858         kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout");
  859 #else
  860         panic("watchdog timeout");
  861 #endif
  862 }
Cache object: 641da87ef76c250215322bd5f54b325f
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/kern_clock.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_clock.c